Cyclingrace results

Cyclingrace is an amateaur road race series.
Each race is a separate event which gives points towards season standing.

In [1]:
import requests
import pandas as pd
import yaml

from collections import defaultdict

In [2]:
from cr_racing_results import (
    RaceResults,
    RaceType,
    update_clusters
)

In [3]:
with open('races-config.yaml') as fp:
    race_config = yaml.safe_load(fp)

In [4]:
from itertools import chain, repeat, islice

POINTS = [100,  98,  96,  94,  92,  90,  88,  86,  84,  82,  80,  78,  76,
        74,  72,  70,  68,  66,  64,  62,  60,  58,  56,  54,  52,  50,
        48,  46,  44,  42,  40,  38,  36,  34,  32,  30,  28,  26,  24,
        22,  20,  20,  20,  20,  20,  20,  20,  20,  20,  20]

def _pad_infinite(iterable, padding=None):
    return chain(iterable, repeat(padding))

def _pad(iterable, size, padding=None):
    return islice(_pad_infinite(iterable, padding), size)

def generate_points(n_places: int):
    return list(_pad(POINTS, n_places, 0))

In [5]:
from pathlib import Path


official_clusters = defaultdict(tuple)

clusters_dir = Path('data/clusters/')
for file in list(clusters_dir.iterdir()):
    if file.name.startswith('.'):
        continue
    cluster = file.name.strip('+')
    with open(file) as fp:
        official_clusters[cluster] = official_clusters[cluster] + tuple(fp.read().split('\n'))

In [6]:
def set_tt_cluster(tt_results: pd.DataFrame, clusters: dict[str, tuple[str]], column_name: str = 'cluster') -> None:
    tt_results[column_name] = 'C'
    tt_results.loc[tt_results.gender == 'female', column_name] = 'F'
    for cluster, racers in official_clusters.items():
        tt_results.loc[tt_results.name.str.startswith(racers), column_name] = cluster

In [7]:
import os
from pathlib import Path
from copy import deepcopy

race_results = {
    'groups': [],
    'time-trials': []
}

for race in race_config['groups']:
    race_results['groups'].append(RaceResults.group_from_config(race))

In [8]:
def shorten_name(name: str) -> str:
    splited_name = name.split(" ")
    return " ".join(chain(splited_name[:-1], splited_name[-1][0]))

group_clusters = defaultdict(set)
for race in race_results['groups']:
    for cluster, results in race.results.items():
        group_clusters[cluster].update(map(shorten_name, results.name.unique()))

cluster_from_group = set()
for cluster in group_clusters:
    cluster_from_group.update(group_clusters[cluster])
    # group_clusters[cluster].update(official_clusters[cluster])

In [9]:
has_cluster = set()
original_off_clusters = deepcopy(official_clusters)

for cluster, group in group_clusters.items():
    official_clusters[cluster] = tuple((set(official_clusters[cluster]) - cluster_from_group) | (group - has_cluster))
    print(len(original_off_clusters[cluster]), len(official_clusters[cluster]), len(group))
    has_cluster.update(group)

250 279 217
438 444 248
603 601 290
288 309 150


In [49]:
for race in race_config['time-trials']:
    race_results['time-trials'].append(RaceResults.tt_from_config(race, official_clusters))
    # tt_results['results'].to_csv(f"data/tt-results/{tt_results['name']}.csv")

In [50]:
def get_points_for_race(race_results, race_name: str | None = None) -> pd.DataFrame:
    points_column = 'points'
    if race_name:
        points_column = race_name
    sorted_results = race_results.dropna(subset=['rank_abs']).astype({'rank_abs': int}).sort_values('rank_abs').set_index('rank_abs')
    sorted_results[points_column] = generate_points(sorted_results.shape[0])
    return sorted_results.loc[:, ['name', points_column]]

In [52]:
cluster_standing = {}

for race in race_results['groups']:
    for cluster, results in race.results.items():
        race_points = get_points_for_race(results, race.name)
        if cluster in cluster_standing:
            cluster_standing[cluster] = pd.merge(cluster_standing[cluster], race_points, how='outer', on='name')
        else:
            cluster_standing[cluster] = race_points

for race in race_results['time-trials']:
    for cluster in cluster_standing:
        cluster_results = race.results[race.results['cluster'] == cluster]
        race_points = get_points_for_race(cluster_results, race.name)
        cluster_standing[cluster] = pd.merge(cluster_standing[cluster], race_points, how='outer', on='name')

In [54]:
from itertools import chain
race_order = list(map(lambda x: x[-1], sorted(
    chain(
        ((race.order, race.name) for race in race_results['groups']),
        ((race.order, race.name) for race in race_results['time-trials']),
    )
)))

In [55]:
renamed_columns = {
    "name": "Гонщик",
    "total": "Очки"
}

for cluster in cluster_standing:
    all_results = cluster_standing[cluster].set_index('name').loc[:, race_order]
    all_results['total'] = all_results.sum(axis='columns')
    cluster_standing[cluster] = all_results.sort_values('total', ascending=False)


In [56]:
writer = pd.ExcelWriter('data/current_standing.xlsx', engine = 'xlsxwriter')
for cluster in cluster_standing:
    cluster_standing[cluster].reset_index().to_excel(writer, sheet_name=cluster)

    data = cluster_standing[cluster].reset_index().rename(renamed_columns, axis='columns')
    data.index += 1
    data.to_csv(f'data/cluster_{cluster}.csv')
writer.close()

# Менялись ли кластеры между гонок

In [16]:
from functools import reduce
all_dfs = reduce(lambda  left,right: pd.merge(left,right, on=['name'], how='outer'), (pd.concat(dfs) for race_name, dfs in all_results.items()))

In [22]:
all_results = defaultdict(list)
for race in race_results['groups']:
    for cluster, results in race.results.items():
        set_tt_cluster(results, official_clusters, 'fixed_cluster')
        set_tt_cluster(results, original_off_clusters, 'official_cluster')
        results['real_cluster'] = cluster
        all_results[race.name].append(results.loc[:, ['name', 'real_cluster']].rename({'real_cluster': race.name}, axis=1))

In [23]:
pd.concat([
    all_dfs.loc[(all_dfs['Дубна'] != all_dfs['Верея']) & ~all_dfs['Дубна'].isna() & ~all_dfs['Верея'].isna()],
    all_dfs.loc[(all_dfs['Садовое кольцо'] != all_dfs['Верея']) & ~all_dfs['Садовое кольцо'].isna() & ~all_dfs['Верея'].isna()],
    all_dfs.loc[(all_dfs['Дубна'] != all_dfs['Верея']) & ~all_dfs['Дубна'].isna() & ~all_dfs['Верея'].isna()]
]).drop_duplicates()

Unnamed: 0,name,Садовое кольцо,Верея,Дубна
78,Сальников Роман,A,A,B
156,Карпов Антон,A,A,B
370,Дмитриев Андрей,C,C,B
712,Худиев Омар,,B,A
188,Попов Сергей,C,B,B
362,Нечаев Виктор,C,B,B
548,Руез Фабьен,C,A,
