Cyclingrace results

Cyclingrace is an amateaur road race series.
Each race is a separate event which gives points towards season standing.

In [1]:
import requests
import pandas as pd
import yaml

from collections import defaultdict

In [2]:
with open('races-config.yaml') as fp:
    race_config = yaml.safe_load(fp)

In [3]:
# from typing import Union


COLUMNS = ['bib', 'name', 'gender', 'category', 'status', 'rank_abs', 'result', 'result_time', 'team', 'club']

def get_results(results_url: str, full: bool = False) -> pd.DataFrame | dict:
    r = requests.get(results_url, params=dict(page=1))
    r.raise_for_status()
    results = r.json()

    full_results = results['items']
    for page in range(2, results['page_info']['totalPages'] + 1):
        r = requests.get(results_url, params=dict(page=page))
        r.raise_for_status()
        full_results.extend(r.json()['items'])
    
    if full:
        return full_results
    results_df = pd.DataFrame(full_results, columns=COLUMNS)
    return results_df

In [4]:
# r = get_results(race_config['groups'][-1]['results']['B'] + '.json', True)

In [5]:
from itertools import chain, repeat, islice

points = [100,  98,  96,  94,  92,  90,  88,  86,  84,  82,  80,  78,  76,
        74,  72,  70,  68,  66,  64,  62,  60,  58,  56,  54,  52,  50,
        48,  46,  44,  42,  40,  38,  36,  34,  32,  30,  28,  26,  24,
        22,  20,  20,  20,  20,  20,  20,  20,  20,  20,  20]

def _pad_infinite(iterable, padding=None):
    return chain(iterable, repeat(padding))

def _pad(iterable, size, padding=None):
    return islice(_pad_infinite(iterable, padding), size)

def generate_points(n_places: int):
    return list(_pad(points, n_places, 0))

In [6]:
from pathlib import Path


official_clusters = defaultdict(tuple)

clusters_dir = Path('data/clusters/')
for file in list(clusters_dir.iterdir()):
    if file.name.startswith('.'):
        continue
    cluster = file.name.strip('+')
    with open(file) as fp:
        official_clusters[cluster] = official_clusters[cluster] + tuple(fp.read().split('\n'))

In [7]:
def set_tt_cluster(tt_results: pd.DataFrame, clusters: dict[str, tuple[str]]) -> None:
    tt_results['cluster'] = 'C'
    tt_results.loc[tt_results.gender == 'female', 'cluster'] = 'F'
    for cluster, racers in official_clusters.items():
        tt_results.loc[tt_results.name.str.startswith(racers), 'cluster'] = cluster

In [10]:
os.path.join(race_results_dir, 'test.csv')

'data/Садовое кольцо/test.csv'

In [11]:
import os
from pathlib import Path
from copy import deepcopy

race_results = {
    'groups': [],
    'time-trials': []
}

for race in race_config['groups']:
    race_results_dir = Path(os.path.join("data", race['name']))
    race_results_dir.mkdir(exist_ok=True)
    cluster_results = deepcopy(race)
    for cluster, result_link in race['results'].items():
        cluster_results['results'][cluster] = get_results(result_link + '.json')
        cluster_results['results'][cluster].to_csv(os.path.join(race_results_dir, f'{cluster}.csv'))
    race_results['groups'].append(cluster_results)

In [9]:
def shorten_name(name: str) -> str:
    splited_name = name.split(" ")
    return " ".join(chain(splited_name[:-1], splited_name[-1][0]))

group_clusters = defaultdict(set)
for race in race_results['groups']:
    for cluster, results in race['results'].items():
        group_clusters[cluster].update(map(shorten_name, results.name.unique()))
        
for cluster in group_clusters:
    group_clusters[cluster].update(official_clusters[cluster])

In [10]:
has_cluster = set()

for cluster, group in group_clusters.items():
    official_clusters[cluster] = tuple(group - has_cluster)
    print(len(official_clusters[cluster]), len(group))
    has_cluster.update(group)

282 282
440 474
591 653
305 309


In [15]:
for race in race_config['time-trials']:
    tt_results = deepcopy(race)
    tt_results['results'] = get_results(race['results'] + '.json')
    set_tt_cluster(tt_results['results'], official_clusters)
    race_results['time-trials'].append(tt_results)
    # tt_results['results'].to_csv(f"data/tt-results/{tt_results['name']}.csv")

In [12]:
def get_points_for_race(race_results, race_name: str | None = None) -> pd.DataFrame:
    points_column = 'points'
    if race_name:
        points_column = race_name
    sorted_results = race_results.dropna(subset=['rank_abs']).astype({'rank_abs': int}).sort_values('rank_abs').set_index('rank_abs')
    sorted_results[points_column] = generate_points(sorted_results.shape[0])
    return sorted_results.loc[:, ['name', points_column]]

In [13]:
cluster_standing = {}

for race in race_results['groups']:
    for cluster, results in race['results'].items():
        race_points = get_points_for_race(results, race['name'])
        if cluster in cluster_standing:
            cluster_standing[cluster] = pd.merge(cluster_standing[cluster], race_points, how='outer', on='name')
        else:
            cluster_standing[cluster] = race_points

for race in race_results['time-trials']:
    for cluster in cluster_standing:
        cluster_results = race['results'][race['results']['cluster'] == cluster]
        race_points = get_points_for_race(cluster_results, race['name'])
        cluster_standing[cluster] = pd.merge(cluster_standing[cluster], race_points, how='outer', on='name')

In [14]:
from itertools import chain
race_order = list(map(lambda x: x[-1], sorted(
    chain(
        ((race['order'], race['name']) for race in race_config['groups']),
        ((race['order'], race['name']) for race in race_config['time-trials']),
    )
)))

In [15]:
renamed_columns = {
    "name": "Гонщик",
    "total": "Очки"
}

for cluster in cluster_standing:
    all_results = cluster_standing[cluster].set_index('name').loc[:, race_order]
    all_results['total'] = all_results.sum(axis='columns')
    cluster_standing[cluster] = all_results.sort_values('total', ascending=False)


In [16]:
writer = pd.ExcelWriter('data/current_standing.xlsx', engine = 'xlsxwriter')
for cluster in cluster_standing:
    cluster_standing[cluster].reset_index().to_excel(writer, sheet_name=cluster)

    data = cluster_standing[cluster].reset_index().rename(renamed_columns, axis='columns')
    data.index += 1
    data.to_csv(f'data/cluster_{cluster}.csv')
writer.close()