# Notes
## base url 
##### https://world-rowing-api.soticcloud.net/stats/api/

### Endpoints
#### /race
All data elements have the following fields
{('Date', 'DateString', 'DisplayName', 'IsStarted', 'Progression', 'RaceNr', 'Rescheduled', 'RescheduledFrom', 'RscCode', 'eventId', 'genderId', 'id', 'racePhaseId', 'raceStatusId')}

#### /event



# code

In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import pandas as pd 
import numpy as np

import datetime
from collections.abc import Mapping
import logging

ModuleNotFoundError: No module named 'pandas'

In [None]:
#from js import XMLHttpRequest
import requests
import json
from urllib import parse

# after python 3.9, the built-in types for list, set, dict and tuple can be used declaratively 
from typing import List, Set, Dict, Tuple, Optional 


In [None]:
!python --version

In [None]:
l1 = [1,2]
l2 = [1,2]
l3 = [1,2, 3]

set(l2).issubset(set(l1))

In [None]:
# HELPER FUNCTIONS FOR UTILS
def _get_date_columns(str_list: list) -> list: 
    """
    return: list of columns that contain 'date' in their name
    """
    lower = [s.lower() for s in str_list]
    filtered = filter(lambda x: "date" in x, lower)
    
    if len(filtered) > 0: 
        #return {k: str_list[lower.index(k)] for k in filtered}
        return [str_list[lower.index(k)] for k in filtered]
    else: 
        #return {}
        return []

def _get_binary_columns(df: pd.DataFrame) -> list: 
    """
    return: list of columns with potentially binary data
    """
    return df.loc[:, df.isin(0, 1, np.nan)].columns.to_list()

def _alter_dataframe_column_types(df: pd.DataFrame, type_mapping: dict[str, str]) -> pd.DataFrame:
    """
    return: DataFrame with altered types, according to `type_mapping`
    """
    # switch/case would be the call here. That is only present in python >= 3.10
    # TODO: adjust me if we use python >= 3.10
    assert(set(type_mapping.keys()).issubset(set(df.columns.to_list())))
    
    
    for k, v in type_mapping.items(): 
        if v == "str":
            df[k] = df[k].astype(str)
        elif v == "int":
            df[k] = df[k].astype(int)
        elif v == "float":
            df[k] = df[k].astype(float)
        elif v == "bool":
            df[k] = df[k].astype(bool)
        elif v == "date":
            df[k] = pd.to_datetime(df[k])
        else: 
            # log / error
            print("PANIC!!!")
            
    return df

In [None]:
d1 = {1:1, 2:2}
d2 = {2:2, 3:3}

d1 | d2 == {1: 1, 2: 2, 3: 3}

# code scratches

In [None]:
race = get_races()
race

In [None]:
# CONSTANTS
WR_BASE_URL = "https://world-rowing-api.soticcloud.net/stats/api/"
# ENDPOINTS FOR THE BASE-URL
WR_ENDPOINT_RACE = "race/"
WR_ENDPOINT_EVENT = "event/"
WR_ENDPOINT_COMPETITION = "competition/"



def load_json(url: str, params=None, timeout=20., **kwargs):
    r = requests.get(url, params=params, timeout=timeout, **kwargs)
    r.raise_for_status()
    if r.text:
        return r.json()
    else:
        return {}

def pre_process_dataframe(df: pd.DataFrame) -> pd.DataFrame: 
    """
    Identifies date- and binary-columns and transforms their types.
    
    FYI: since python >= 3.9, one can merge dict's the following way: 
        d1 = {1:1, 2:2}
        d2 = {2:2, 3:3}
        d1 | d2 == {1: 1, 2: 2, 3: 3}
    """
    date_cols = _get_date_columns(df.columns.to_list())
    binary_cols = _get_binary_columns(df)
    
    date_cols = {k: "date" for k in date_cols}
    binary_cols = {k: "bool" for k in binary_cols}
    
    _dict = date_cols | binary_cols
    
    df = _alter_dataframe_column_types(df, _dict)
    
    return df 
    
    
    
def get_dataframe_from_dict(dictionary: dict) -> pd.DataFrame: 
    assert("data" in dictionary.keys())
    return pd.DataFrame.from_dict(dictionary['data'])


def get_competitions(year: int = None, kind: str = None): 
    _json_dict = load_json(url=f'{WR_BASE_URL}{WR_ENDPOINT_COMPETITION}')
    df = get_dataframe_from_dict(_json_dict)
    
    if year: 
        # if the date column is known, one can filter for it
        # df = df[df['date'].year == year]
        return df
    else: 
        return df
    
def get_races(year: int = None, kind: str = None): 
    _json_dict = load_json(url=f'{WR_BASE_URL}{WR_ENDPOINT_RACE}')
    df = get_dataframe_from_dict(_json_dict)
    
    if year: 
        # if the date column is known, one can filter for it
        # df = df[df['date'].year == year]
        return df
    else: 
        return df
    
def get_events(year: int = None, kind: str = None): 
    _json_dict = load_json(url=f'{WR_BASE_URL}{WR_ENDPOINT_EVENT}')
    df = get_dataframe_from_dict(_json_dict)
    
    if year: 
        # if the date column is known, one can filter for it
        # df = df[df['date'].year == year]
        return df
    else: 
        return df
    
    
    

# testing 


In [None]:
race = load_json(url='https://world-rowing-api.soticcloud.net/stats/api/race')
print(race.keys())
d = race['data']
data_keys = [tuple(el.keys()) for el in d]
print(set(data_keys))

In [None]:
event = load_json(url='https://world-rowing-api.soticcloud.net/stats/api/event')
print(event.keys())
d = event['data']
data_keys = [tuple(el.keys()) for el in d]
print(set(data_keys))

In [None]:
competition = load_json(url='https://world-rowing-api.soticcloud.net/stats/api/competition')
print(competition.keys())
d = competition['data']
data_keys = [tuple(el.keys()) for el in d]
print(set(data_keys))