# Notes
## base url 
##### https://world-rowing-api.soticcloud.net/stats/api/

### Endpoints
#### /race
All data elements have the following fields
{('Date', 'DateString', 'DisplayName', 'IsStarted', 'Progression', 'RaceNr', 'Rescheduled', 'RescheduledFrom', 'RscCode', 'eventId', 'genderId', 'id', 'racePhaseId', 'raceStatusId')}

#### /event



# code

In [31]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [26]:
import pandas as pd 
import numpy as np

import datetime
from collections.abc import Mapping
import logging

In [17]:
#from js import XMLHttpRequest
import requests
import json
from urllib import parse

# after python 3.9, the built-in types for list, set, dict and tuple can be used declaratively 
from typing import List, Set, Dict, Tuple, Optional 


In [28]:
!python --version

Python 3.10.0


In [27]:
l1 = [1,2]
l2 = [1,2]
l3 = [1,2, 3]

set(l2).issubset(set(l1))

True

In [32]:
# HELPER FUNCTIONS FOR UTILS
def _get_date_columns(str_list: list) -> list: 
    """
    return: list of columns that contain 'date' in their name
    """
    lower = [s.lower() for s in str_list]
    filtered = filter(lambda x: "date" in x, lower)
    
    if len(filtered) > 0: 
        #return {k: str_list[lower.index(k)] for k in filtered}
        return [str_list[lower.index(k)] for k in filtered]
    else: 
        #return {}
        return []

def _get_binary_columns(df: pd.DataFrame) -> list: 
    """
    return: list of columns with potentially binary data
    """
    return df.loc[:, df.isin(0, 1, np.nan)].columns.to_list()

def _alter_dataframe_column_types(df: pd.DataFrame, type_mapping: dict[str, str]) -> pd.DataFrame:
    """
    return: DataFrame with altered types, according to `type_mapping`
    """
    # switch/case would be the call here. That is only present in python >= 3.10
    # TODO: adjust me if we use python >= 3.10
    assert(set(type_mapping.keys()).issubset(set(df.columns.to_list())))
    
    
    for k, v in type_mapping.items(): 
        if v == "str":
            df[k] = df[k].astype(str)
        elif v == "int":
            df[k] = df[k].astype(int)
        elif v == "float":
            df[k] = df[k].astype(float)
        elif v == "bool":
            df[k] = df[k].astype(bool)
        elif v == "date":
            df[k] = pd.to_datetime(df[k])
        else: 
            # log / error
            print("PANIC!!!")
            
    return df

In [34]:
d1 = {1:1, 2:2}
d2 = {2:2, 3:3}

d1 | d2 == {1: 1, 2: 2, 3: 3}

True

# code scratches

In [49]:
race = get_races()
race

Unnamed: 0,Date,DateString,DisplayName,IsStarted,Progression,RaceNr,Rescheduled,RescheduledFrom,RscCode,eventId,genderId,id,racePhaseId,raceStatusId
0,2002-06-14 00:00:00,,H2,,,14,0.0,,ROWWSCULL2------------HEAT000200--,35787d65-1c05-40aa-b568-88f26ad84beb,dae73748-90c9-4a49-9d94-4d4810fa457e,00014c9d-f22b-4bb2-9e62-02ed5bde2c65,cd3d5ca1-5aed-4146-b39b-a192ae6533f1,182f6f15-8e78-41c3-95b3-8b006af2c6a1
1,2003-07-25 00:00:00,,H2,,,6,0.0,,ROWWSCULL1-LU23-------HEAT000200--,49564f91-d4a5-4a39-a31d-036a622e1f2d,dae73748-90c9-4a49-9d94-4d4810fa457e,00025d82-8f5c-47e7-b150-99877ed99fb4,cd3d5ca1-5aed-4146-b39b-a192ae6533f1,182f6f15-8e78-41c3-95b3-8b006af2c6a1
2,2013-08-28 14:40:00,,FB,,,124,0.0,,ROWXSCULL2--TA--------FNL-000200--,728997f3-df52-4dbd-9257-568a363cc2e3,1c403f88-2b4f-4c7e-82d9-21a4c25cc009,0004edaa-8b4a-441e-a29c-0ce18faf60d6,e0fc3320-cd66-43af-a5b5-97afd55b2971,182f6f15-8e78-41c3-95b3-8b006af2c6a1
3,2009-08-08 14:00:00,,FA,,,186,0.0,,ROWMSCULL2--J---------FNL-000100--,708c21e2-4330-4440-ae44-8582892b0031,5beae5a3-10e4-4d33-96e5-c1a9f612dd54,0006f523-1f10-48cc-af84-1887f29dde19,e0fc3320-cd66-43af-a5b5-97afd55b2971,182f6f15-8e78-41c3-95b3-8b006af2c6a1
4,2006-07-08 18:46:00,,FB,,,123,0.0,,ROWMSCULL1-L----------FNL-000200--,445ab098-a2d3-467a-9218-36b62c503e16,5beae5a3-10e4-4d33-96e5-c1a9f612dd54,00073646-8dfd-41b5-8e36-7fe040e2f253,e0fc3320-cd66-43af-a5b5-97afd55b2971,182f6f15-8e78-41c3-95b3-8b006af2c6a1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2016-08-09 12:00:00,,R1,,1-3->SA/B,64,0.0,,ROWMNOCOX4------------REP-000100--,08da2c72-33f3-4602-8fe1-b57e31494027,5beae5a3-10e4-4d33-96e5-c1a9f612dd54,0942444d-f07f-4192-8365-cff8b81e7fe2,0959f5e8-f85a-40fb-93ab-b6c477f6aade,182f6f15-8e78-41c3-95b3-8b006af2c6a1
996,2006-07-20 09:55:00,,H2,,"1-2->SA/B, 3..->R",6,0.0,,ROWWSCULL1-LU23-------HEAT000200--,26627ba9-50f4-4f1a-9f1a-ea768b123e63,dae73748-90c9-4a49-9d94-4d4810fa457e,09441744-b09b-4c92-9151-058b13f76cea,cd3d5ca1-5aed-4146-b39b-a192ae6533f1,182f6f15-8e78-41c3-95b3-8b006af2c6a1
997,2003-07-26 00:00:00,,R1,,,58,0.0,,ROWMSCULL1--U23-------REP-005100--,a2987f9c-9c95-4d25-90b0-23ccb4e8dca9,5beae5a3-10e4-4d33-96e5-c1a9f612dd54,09443294-0cd4-422e-af94-182fb5dd370f,0959f5e8-f85a-40fb-93ab-b6c477f6aade,182f6f15-8e78-41c3-95b3-8b006af2c6a1
998,2011-08-28 10:30:00,,H4,,"1->SA/B, 2..->R",11,0.0,,ROWMNOCOX4-L----------HEAT000400--,c4c5233f-1900-429c-a9a1-81e32209a5e5,5beae5a3-10e4-4d33-96e5-c1a9f612dd54,094c820c-6698-4b5c-a3e3-0d824dcb1ee6,cd3d5ca1-5aed-4146-b39b-a192ae6533f1,182f6f15-8e78-41c3-95b3-8b006af2c6a1


In [38]:
# CONSTANTS
WR_BASE_URL = "https://world-rowing-api.soticcloud.net/stats/api/"
# ENDPOINTS FOR THE BASE-URL
WR_ENDPOINT_RACE = "race/"
WR_ENDPOINT_EVENT = "event/"
WR_ENDPOINT_COMPETITION = "competition/"



def load_json(url: str, params=None, timeout=20., **kwargs):
    r = requests.get(url, params=params, timeout=timeout, **kwargs)
    r.raise_for_status()
    if r.text:
        return r.json()
    else:
        return {}

def pre_process_dataframe(df: pd.DataFrame) -> pd.DataFrame: 
    """
    Identifies date- and binary-columns and transforms their types.
    
    FYI: since python >= 3.9, one can merge dict's the following way: 
        d1 = {1:1, 2:2}
        d2 = {2:2, 3:3}
        d1 | d2 == {1: 1, 2: 2, 3: 3}
    """
    date_cols = _get_date_columns(df.columns.to_list())
    binary_cols = _get_binary_columns(df)
    
    date_cols = {k: "date" for k in date_cols}
    binary_cols = {k: "bool" for k in binary_cols}
    
    _dict = date_cols | binary_cols
    
    df = _alter_dataframe_column_types(df, _dict)
    
    return df 
    
    
    
def get_dataframe_from_dict(dictionary: dict) -> pd.DataFrame: 
    assert("data" in dictionary.keys())
    return pd.DataFrame.from_dict(dictionary['data'])


def get_competitions(year: int = None, kind: str = None): 
    _json_dict = load_json(url=f'{WR_BASE_URL}{WR_ENDPOINT_COMPETITION}')
    df = get_dataframe_from_dict(_json_dict)
    
    if year: 
        # if the date column is known, one can filter for it
        # df = df[df['date'].year == year]
        return df
    else: 
        return df
    
def get_races(year: int = None, kind: str = None): 
    _json_dict = load_json(url=f'{WR_BASE_URL}{WR_ENDPOINT_RACE}')
    df = get_dataframe_from_dict(_json_dict)
    
    if year: 
        # if the date column is known, one can filter for it
        # df = df[df['date'].year == year]
        return df
    else: 
        return df
    
def get_events(year: int = None, kind: str = None): 
    _json_dict = load_json(url=f'{WR_BASE_URL}{WR_ENDPOINT_EVENT}')
    df = get_dataframe_from_dict(_json_dict)
    
    if year: 
        # if the date column is known, one can filter for it
        # df = df[df['date'].year == year]
        return df
    else: 
        return df
    
    
    

# testing 


In [4]:
race = load_json(url='https://world-rowing-api.soticcloud.net/stats/api/race')
print(race.keys())
d = race['data']
data_keys = [tuple(el.keys()) for el in d]
print(set(data_keys))

dict_keys(['data', 'meta'])
{('Date', 'DateString', 'DisplayName', 'IsStarted', 'Progression', 'RaceNr', 'Rescheduled', 'RescheduledFrom', 'RscCode', 'eventId', 'genderId', 'id', 'racePhaseId', 'raceStatusId')}


In [5]:
event = load_json(url='https://world-rowing-api.soticcloud.net/stats/api/event')
print(event.keys())
d = event['data']
data_keys = [tuple(el.keys()) for el in d]
print(set(data_keys))

dict_keys(['data', 'meta'])
{('DisplayName', 'RscCode', 'boatClassId', 'competitionId', 'competitionTypeId', 'id')}


In [8]:
competition = load_json(url='https://world-rowing-api.soticcloud.net/stats/api/competition')
print(competition.keys())
d = competition['data']
data_keys = [tuple(el.keys()) for el in d]
print(set(data_keys))

dict_keys(['data', 'meta'])
{('CompetitionCode', 'DisplayName', 'EndDate', 'EntryDeadlineDate', 'HasResults', 'IsFisa', 'StartDate', 'Year', 'competitionTypeId', 'id', 'venueId')}
