In [None]:
import pandas as pd

import vendee_globe_api.constants as c
from vendee_globe_api.utils import data_prep_web

In [6]:
df_race = pd.read_parquet(c.race_2024_path)
df_web = pd.read_parquet(c.web_2024_path)
df_wiki = pd.read_parquet(c.wiki_2024_path)

In [7]:
df_web = data_prep_web(
    df_web, skipper_corrections=[("Kojiro Shiraishi", "Kōjirō Shiraishi")]
)

In [8]:
df = pd.merge(df_race, df_web, on="skipper", how="left")
df = pd.merge(df, df_wiki, on="skipper", how="left")

In [9]:
# Séparer les colonnes dynamiques et statiques
dynamic_cols = [
    "rang",
    "heure",
    "latitude",
    "longitude",
    "cap_30min",
    "vitesse_30min",
    "VMG_30min",
    "distance_30min",
    "cap_last",
    "vitesse_last",
    "VMG_last",
    "distance_last",
    "cap_24h",
    "vitesse_24h",
    "VMG_24h",
    "distance_24h",
    "DTF",
    "DTL",
    "date",
]

static_cols = [col for col in df.columns if col not in dynamic_cols]

df_race = df[dynamic_cols].copy()
df_infos = df[static_cols].copy()

# Dictionnaire de renommage des colonnes en anglais
rename_dict = {
    "rang": "rank",
    "heure": "time",
    "latitude": "latitude",
    "longitude": "longitude",
    "cap_30min": "heading_30min",
    "vitesse_30min": "speed_30min",
    "VMG_30min": "vmg_30min",
    "distance_30min": "distance_30min",
    "cap_last": "heading_last",
    "vitesse_last": "speed_last",
    "VMG_last": "vmg_last",
    "distance_last": "distance_last",
    "cap_24h": "heading_24h",
    "vitesse_24h": "speed_24h",
    "VMG_24h": "vmg_24h",
    "distance_24h": "distance_24h",
    "DTF": "distance_to_finish",
    "DTL": "distance_to_leader",
    "date": "date",
    "skipper": "skipper",
    "voilier": "boat_name",
    "nat_voile": "boat_nationality",
    "color": "boat_color",
    "first_name_x": "skipper_first_name",
    "last_name_x": "skipper_last_name",
    "team_x": "team",
    "Anciens noms du bateau_x": "previous_boat_names",
    "Architecte_x": "boat_architect",
    "Chantier_x": "boat_shipyard",
    "Date de lancement_x": "launch_date",
    "Longueur_x": "boat_length",
    "Largeur_x": "boat_width",
    "Tirant d'eau_x": "boat_draft",
    "Hauteur mât_x": "mast_height",
    "Surface de voiles au près_x": "sail_area_upwind",
    "Surface de voiles au portant_x": "sail_area_downwind",
    "Poids_x": "boat_weight",
    "Ancien nom du bateau_x": "previous_boat_name",
    "Déplacement (poids)_x": "displacement_weight",
    "genre_x": "skipper_gender",
    "nationalité_x": "skipper_nationality",
    "age_x": "skipper_age",
    "participations_x": "race_participations",
    "appendices_x": "boat_appendices",
    "architecte_x": "boat_architect",
    "chantier_x": "boat_shipyard",
    "annee_x": "boat_year",
    "foil": "has_foil",
}

# Appliquer le renommage
df_race.rename(columns=rename_dict, inplace=True)
df_infos.rename(columns=rename_dict, inplace=True)

# Fusionner les colonnes en double (versions _x et _y)
cols_x = [col for col in df_infos.columns if col.endswith("_x")]
for col in cols_x:
    col_y = col.replace("_x", "_y")
    if col_y in df_infos.columns:
        df_infos[col] = df_infos[col].combine_first(df_infos[col_y])
        df_infos.drop(columns=[col_y], inplace=True)
    df_infos.rename(columns={col: col.replace("_x", "")}, inplace=True)

df_infos.columns = [col.replace("_y", "") for col in df_infos.columns]
df_infos.drop_duplicates(inplace=True)

df_race = df_race.sort_values(by="date")  # Trier par date pour garantir l'ordre correct
df_race["batch"] = df_race["date"].ne(df_race["date"].shift()).cumsum() - 1

In [2]:
df_race = pd.read_parquet(c.df_race_path)
df_infos = pd.read_parquet(c.df_infos_path)

In [15]:
df_cleaned = df_infos.map(lambda x: None if pd.isna(x) else x)

In [None]:
df_cleaned.isna()

Unnamed: 0,nat_voile,skipper_voilier,skipper,voilier,color,first_name,last_name,team,Anciens noms du bateau,Architecte,...,genre,nationalité,age,participations,bateau,appendices,architecte,chantier,annee,foil
0,False,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
