Cyclingrace results

Cyclingrace is an amateaur road race series.
Each race is a separate event which gives points towards season standing.

In [1]:
import requests
import pandas as pd
import yaml

In [2]:
with open('races-config.yaml') as fp:
    race_config = yaml.safe_load(fp)

In [3]:
race_config

{'groups': [{'name': 'Садовое кольцо',
   'order': 1,
   'results': {'A': 'https://results.zone/ring-2024/races/7257/results',
    'B': 'https://results.zone/ring-2024/races/7258/results',
    'C': 'https://results.zone/ring-2024/races/7259/results',
    'F': 'https://results.zone/ring-2024/races/7261/results'}},
  {'name': 'Верея',
   'order': 4,
   'results': {'A': 'https://results.zone/cyclingrace-verey-2024/races/7306/results',
    'B': 'https://results.zone/cyclingrace-verey-2024/races/7307/results',
    'C': 'https://results.zone/cyclingrace-verey-2024/races/7308/results',
    'F': 'https://results.zone/cyclingrace-verey-2024/races/7309/results'}}],
 'time-trials': [{'name': 'Крылатские холмы',
   'order': 2,
   'results': 'https://results.zone/cyclingrace-tt-2024/races/7265/results'},
  {'name': 'Верея ITT',
   'order': 3,
   'results': 'https://results.zone/cyclingrace-vereyitt-2024/races/7305/results'}]}

In [4]:
COLUMNS = ['bib', 'name', 'category', 'status', 'rank_abs', 'result', 'result_time', 'team', 'club']

def get_results(results_url: str) -> pd.DataFrame:
    r = requests.get(results_url, params=dict(page=1))
    r.raise_for_status()
    results = r.json()

    full_results = results['items']
    for page in range(2, results['page_info']['totalPages'] + 1):
        r = requests.get(results_url, params=dict(page=page))
        r.raise_for_status()
        full_results.extend(r.json()['items'])

    results_df = pd.DataFrame(full_results, columns=COLUMNS)
    return results_df

In [6]:
from itertools import chain, repeat, islice

points = [100,  98,  96,  94,  92,  90,  88,  86,  84,  82,  80,  78,  76,
        74,  72,  70,  68,  66,  64,  62,  60,  58,  56,  54,  52,  50,
        48,  46,  44,  42,  40,  38,  36,  34,  32,  30,  28,  26,  24,
        22,  20,  20,  20,  20,  20,  20,  20,  20,  20,  20]

def _pad_infinite(iterable, padding=None):
    return chain(iterable, repeat(padding))

def _pad(iterable, size, padding=None):
    return islice(_pad_infinite(iterable, padding), size)

def generate_points(n_places: int):
    return list(_pad(points, n_places, 0))

In [9]:
from typing import Dict


def get_race_standings(group_race_urls: Dict[str, str]) -> pd.DataFrame:
    all_results = None
    for race_name, group_race_url in group_race_urls.items():
        result_df = get_results(group_race_url)
        sorted_results = result_df.dropna(subset=['rank_abs']).astype({'rank_abs': int}).sort_values('rank_abs').set_index('rank_abs')
        sorted_results['points'] = generate_points(sorted_results.shape[0])
        if all_results is None:
            all_results = sorted_results.loc[:, ['name', 'points']]
        else:
            all_results = all_results.merge(sorted_results.loc[:, ['name', 'points']], how='outer', on='name', suffixes=('', '_' + race_name))
        all_results = all_results.rename({'points': race_name}, axis='columns')

    return all_results

In [10]:
def add_tt_to_standing(group_race_standing: pd.DataFrame, itt_results: pd.DataFrame, race_name: str) -> pd.DataFrame:
    group_racers = group_race_standing.loc[:, ['name']]
    
    tt_res_filtered = itt_results.dropna(subset=['rank_abs']).loc[:, ['name', 'rank_abs']].merge(
        group_racers,
        how='inner',
        on='name'
    ).astype({'rank_abs': int}).sort_values('rank_abs').set_index('rank_abs').drop_duplicates()

    tt_res_filtered['points'] = generate_points(tt_res_filtered.shape[0])

    group_race_standing = group_race_standing.merge(tt_res_filtered.loc[:, ['name', 'points']], how='outer', on='name', suffixes=('', '_' + race_name))
    group_race_standing = group_race_standing.rename({'points': race_name}, axis='columns')
    return group_race_standing
    # group_race_standing = group_race_standing.fillna(0).set_index('name')
    # group_race_standing['total'] = group_race_standing.sum(axis='columns')

In [29]:
from collections import defaultdict
cluster_raсes = defaultdict(dict)

for race in race_config['groups']:
    for cluster, url in race['results'].items():
        cluster_raсes[cluster][race['name']] = url + '.json'

In [None]:
from itertools import chain
race_order = list(map(lambda x: x[-1], sorted(
    chain(
        ((race['order'], race['name']) for race in race_config['groups']),
        ((race['order'], race['name']) for race in race_config['time-trials']),
    )
)))

In [49]:
standings = {}


for cluster, urls in cluster_raсes.items():
    standings[cluster] = get_race_standings(urls)

for time_trial in race_config['time-trials']:
    tt_results = get_results(time_trial['results'] + '.json')
    for cluster in standings:
        standings[cluster] = add_tt_to_standing(standings[cluster], tt_results, time_trial['name'])

renamed_columns = {
    "name": "Гонщик",
    "total": "Очки"
}

for cluster in standings:
    all_results = standings[cluster].set_index('name').loc[:, race_order]
    all_results['total'] = all_results.sum(axis='columns')
    standings[cluster] = all_results.sort_values('total', ascending=False)


In [50]:
race_order

['Садовое кольцо', 'Крылатские холмы', 'Верея ITT', 'Верея']

In [51]:
writer = pd.ExcelWriter('data/current_standing.xlsx', engine = 'xlsxwriter')
for cluster in standings:
    standings[cluster].reset_index().to_excel(writer, sheet_name=cluster)

    data = standings[cluster].reset_index().rename(renamed_columns, axis='columns')
    data.index += 1
    data.to_csv(f'data/cluster_{cluster}.csv')
writer.close()

In [40]:
# all_results.sort_values('total', ascending=False).to_excel('current_standing.xlsx')

In [23]:
data = standings['A'].reset_index()
data.index += 1

data.reset_index().values.tolist()

[[1, 'Разумов Никита', 96.0, 82.0, 100.0, 96.0, 374.0],
 [2, 'Ильин Роман', 100.0, 88.0, 92.0, 90.0, 370.0],
 [3, 'Фокин Михаил', 88.0, 86.0, 94.0, 98.0, 366.0],
 [4, 'Пузанов Дмитрий', 92.0, 96.0, 84.0, 94.0, 366.0],
 [5, 'Тихонин Евгений', 72.0, 94.0, 98.0, 100.0, 364.0],
 [6, 'Анисимов Иван', 94.0, 84.0, 76.0, 86.0, 340.0],
 [7, 'Жданов Александр', 66.0, 80.0, 88.0, 84.0, 318.0],
 [8, 'Шеваров Дмитрий', 38.0, 62.0, 82.0, 76.0, 258.0],
 [9, 'Новиков Савва', 80.0, 72.0, 96.0, 0.0, 248.0],
 [10, 'Соловьев Павел', 44.0, 70.0, 52.0, 60.0, 226.0],
 [11, 'Христенко Максим', 0.0, 60.0, 80.0, 80.0, 220.0],
 [12, 'Герасимов Иван', 28.0, 52.0, 60.0, 68.0, 208.0],
 [13, 'Кузнецов Владимир', 20.0, 40.0, 72.0, 74.0, 206.0],
 [14, 'Трубецкой Сергей', 90.0, 100.0, 0.0, 0.0, 190.0],
 [15, 'Хилькович Денис', 56.0, 38.0, 38.0, 56.0, 188.0],
 [16, 'Курьянов Степан', 0.0, 92.0, 90.0, 0.0, 182.0],
 [17, 'Степанов Андрей', 0.0, 98.0, 0.0, 82.0, 180.0],
 [18, 'Соколов Евгений', 0.0, 0.0, 86.0, 88.0, 174.0]

In [46]:
results = get_results(group_a_races['verey'])

In [50]:
results.loc[results.name.str.startswith('Вилен')]

Unnamed: 0,bib,name,category,status,rank_abs,result,result_time,team,club
64,35,Виленский Максим,М5 50-54,Q,65.0,02:56:48.56,10608.562,Кластер А. HBFS Black,Кластер А. HBFS Black
