In [2]:
import requests, pandas as pd, math
from src.utils.file_utils import convert_time_to_seconds
from Levenshtein import ratio
import nest_asyncio, asyncio, pandas as pd
nest_asyncio.apply() 

from src.utils.ffa_fast import get_all_results_fast as get_all_athlete_results
from src.utils.http_utils import search_athletes            # FFA autocomplete

In [3]:
def find_most_similar_club(club_name, clubs_list):
    """Trouve le club le plus similaire dans une liste de clubs.
    
    Args:
        club_name (str): Le nom du club à chercher
        clubs_list (list): Liste des noms de clubs possibles
        
    Returns:
        str: Le club le plus similaire trouvé ou None si aucun match satisfaisant
    """
    # Nettoyage des noms de clubs
    club_name = club_name.strip().lower()
    
    best_match = None
    best_ratio = 0
    
    for club in clubs_list:
        if not club or len(club.strip()) == 0:
            continue
            
        # Nettoyage du club de la liste
        clean_club = club.strip().lower()
        # Suppression du tiret en début de ligne s'il existe
        if clean_club.startswith('- '):
            clean_club = clean_club[2:]
            
        # Calcul de la similarité
        similarity = ratio(club_name, clean_club)
        
        # Mise à jour du meilleur match si la similarité est meilleure
        if similarity > best_ratio:
            best_ratio = similarity
            best_match = club
            
    # On ne renvoie le match que si la similarité est suffisante (> 0.6)
    return best_match if best_ratio > 0.6 else None

In [5]:
# 1) paramètres
chOID    = "6924b473a76024287d28cdb4"
groupOID = "6924b73145477b54b874d922"
url      = "https://athle.live/api/results"

params  = {"chOID": chOID, "groupOID": groupOID}
headers = {"Accept": "application/json",
           "User-Agent": "Mozilla/5.0 (python-requests)"}

# 2) requête – surtout **ne PAS** envoyer If-None-Match si tu veux la réponse complète
r = requests.get(url, params=params, headers=headers, timeout=10)
r.raise_for_status()
j = r.json()

# 3) extraction des engagés
rows = []
for c in j.get("commitments", []):
    perf_ms = None

    # ordre de priorité : perf engagée (pe), PB, SB, temps inscrit (tReg)
    if isinstance(c.get("pe"), dict) and c["pe"].get("u") == "ms":
        perf_ms = c["pe"]["v"]
    elif isinstance(c.get("pb"), dict) and c["pb"].get("u") == "ms":
        perf_ms = c["pb"]["v"]
    elif isinstance(c.get("sb"), dict) and c["sb"].get("u") == "ms":
        perf_ms = c["sb"]["v"]
    elif isinstance(c.get("tReg"), dict) and c["tReg"].get("u") == "ms":
        perf_ms = c["tReg"]["v"]

        # --- CORRECTION ---
    # On convertit en float si c'est une chaîne
    if perf_ms is not None:
        try:
            perf_ms = float(perf_ms)
        except (ValueError, TypeError):
            perf_ms = None
    # ------------------

    # mise en forme mm:ss.cc (ex. 1:59.84) si on a bien une valeur
    if perf_ms is not None and math.isfinite(perf_ms):
        minutes, ms = divmod(int(perf_ms), 60000)
        seconds     = ms / 1000
        perf_str    = f"{minutes}:{seconds:05.2f}"
    else:
        perf_str    = ""

    rows.append({
        "Prénom" : c.get("first"),
        "Nom"    : c.get("last"),
        "Club"   : c.get("clubName") or c.get("club"),
        "Temps d'engagement" : perf_str
    })

df_athle_live = pd.DataFrame(rows)
df_athle_live.to_csv("engages_1500m_tcm.csv", index=False, encoding="utf-8")

print(f"{len(df_athle_live)} athlètes exportés dans engages_1500m_tcm.csv")
print(df_athle_live.head())


78 athlètes exportés dans engages_1500m_tcm.csv
     Prénom            Nom                          Club Temps d'engagement
0    Nathan        BRUYERE                 AL ECHIROLLES            3:55.00
1   Anthony  COEUR D ACIER    SARAN LOIRET ATHLETIC CLUB            3:49.80
2    Franck          DIRAT                     ATHLE 92*            3:50.06
3   Mickael           YEYE           DYNAMIC AULNAY CLUB            3:45.00
4  Francois       MEILLEUR  ENTENTE ANGEVINE ATHLETISME*            4:15.00


In [6]:
df_athle_live[df_athle_live.Prénom =="Paul"]

Unnamed: 0,Prénom,Nom,Club,Temps d'engagement
10,Paul,COEUDEVEZ,AVON ATHLETISME CLUB,3:55.00
54,Paul,LASCABETTES,CA MONTREUIL 93,3:43.00


# TEST

In [13]:
for i in range(len(df_athle_live)):
    
    ath = df_athle_live.iloc[i]['Nom']
    
    ath = ' '.join(ath.split())
    print(ath)

BRUYERE
COEUR D ACIER
DIRAT
YEYE
MEILLEUR
DELACROIX
LANGLAIS
MOULINIE
VAUDOIS
LARUE
COEUDEVEZ
PERTHUE
GARDES
AUBANTON
CATTEAU
CLAREBOUT
LASNIER
DALIL
BENDJADOUR
LE RAI
BONNET
MARREC
IMADE
CORNU
ZAAF
JACQUIN
ARNAULT
MADKOUR
REMONDI
COUGNAUD
AUBIN
HAMON
GRENECHE
KERMARREC
ABDISALAM ADAN MOHAMED
HAMID ALI
SOMBAERT
HARDOUIN
BARY
LIOT
NANCEL
EZOUHRI
ALLARD
COGET
GALTIER-VILLA
LE FUR
LIMIER
ISLAME-GADRAT
OBLET
PIQUART
LECOEUR
ROCCA
SANCHEZ
ZAMOUM
LASCABETTES
KERVELLA
ENJOLRAS
GUILLEMIN
LE ROUX
DUTERTRE
GRIVEAU-MATTEI
LEMOINE
BOUNABI
DAO
BALCOU
DEPRET-SEMET
BOUAITA
CADIOU
CHEVALIER VADE
CHEVALIER VADE
COUET
DEYME
ENGUEHARD
FALHY
FOUBERT
JARNET
LEHMAN
MOUTIMA BACKENGA


In [16]:
search_athletes("vaudois")

[{'name': 'VAUDOIS Aurelien',
  'club': ' - Athle 92*',
  'sex': 'M',
  'seq': '1106614'},
 {'name': 'VAUDOIS Frederic', 'club': '', 'sex': 'M', 'seq': '880207'},
 {'name': 'VAUDOIS Louane',
  'club': ' - Efs Reims A. *',
  'sex': 'F',
  'seq': '2707199'}]

In [19]:
test = get_all_athlete_results("1106614")

In [23]:
# TEST
for i in range(len(df_athle_live)):
    
    ath = df_athle_live.iloc[i]['Nom']
    
    ath = ' '.join(ath.split())
    print(ath)
    
    results = search_athletes(ath)
    print(f"Results found: {len(results)}")
    
    if len(results) > 1:
        
        club = df_athle_live.iloc[i]['Club']
        
        clubs_candidat = []
        for i in range(len(results)):
            clubs_candidat.append(results[i]['club'])
        
        most_similar_club = find_most_similar_club(club, clubs_candidat)
        
        if most_similar_club is None:
            print(f"No similar club found for {club}. Skipping athlete.")
            continue
        
        index = clubs_candidat.index(most_similar_club)
        
        seq = results[index]['seq']
        
        df = get_all_athlete_results(results[index]['seq'])
        
        
        df = df[(df.Epreuve == '1 500m') | (df.Epreuve == '1 500m Piste Courte')]
        
        if df.empty:
            print(f"No results found for {ath}.")
            continue

        df = df[~df["Performance"].str.contains("|".join(["DNS", "DNF", "AB", "DQ"]), na=False)]
        df["time"] = df["Performance"].apply(convert_time_to_seconds)
        
        best_1500_2025 = df[df.Annee == "2025"]['time'].min()
        
        best_1500_all_time = df['time'].min()
        
        year_of_best_1500 = df.loc[df['time'].idxmin()]['Annee']
        
        print(f"Best 1500m time in 2025: {best_1500_2025}")
        print(f"Best 1500m time all time: {best_1500_all_time}")
        print(f"Year of best 1500m time: {year_of_best_1500}")
        
    elif len(results) == 1:
        seq = results[0]['seq']
        
        df = get_all_athlete_results(seq)
        
        df = df[(df.Epreuve == '1 500m') | (df.Epreuve == '1 500m Piste Courte')]
        
        if df.empty:
            print(f"No results found for {ath}.")
            continue

        df = df[~df["Performance"].str.contains("|".join(["DNS", "DNF", "AB", "DQ"]), na=False)]
        df["time"] = df["Performance"].apply(convert_time_to_seconds)
        
        best_1500_2025 = df[df.Annee == "2025"]['time'].min()
        
        best_1500_all_time = df['time'].min()
        
        year_of_best_1500 = df.loc[df['time'].idxmin()]['Annee']
        
        print(f"Best 1500m time in 2025: {best_1500_2025}")
        print(f"Best 1500m time all time: {best_1500_all_time}")
        print(f"Year of best 1500m time: {year_of_best_1500}")
        
        
    else:
        print(f"No results found for {ath}.")
        continue
    
                                
            
            
            
        
    

BRUYERE
Results found: 29
No results found for BRUYERE.
COEUR D ACIER
Results found: 1
Best 1500m time in 2025: 232.04
Best 1500m time all time: 229.47
Year of best 1500m time: 2023
DIRAT
Results found: 3
Best 1500m time in 2025: 225.8
Best 1500m time all time: 225.8
Year of best 1500m time: 2025
YEYE
Results found: 3
Best 1500m time in 2025: nan
Best 1500m time all time: 227.95
Year of best 1500m time: 2021
MEILLEUR
Results found: 2
Best 1500m time in 2025: 264.33
Best 1500m time all time: 264.33
Year of best 1500m time: 2025
DELACROIX
Results found: 38
No results found for DELACROIX.
LANGLAIS
Results found: 34


KeyboardInterrupt: 

# version lente

In [8]:
import pandas as pd
from tqdm import tqdm

# ── fonctions utilitaires ────────────────────────────────────────────────────
IGNORED_MARKS = ("DNS", "DNF", "AB", "DQ")

def clean_name(row: pd.Series) -> str:
    """Prénom + Nom normalisés, sans double-espaces."""
    return ' '.join(f"{row['Prénom']} {row['Nom']}".split())

def choose_match(results, target_club):
    """Heuristique club + premier résultat sinon."""
    if not results:
        return None
    if len(results) == 1:
        return results[0]

    clubs = [r["club"] for r in results]
    best = find_most_similar_club(target_club, clubs)
    return results[clubs.index(best)] if best else None

def extract_bests(df800):
    """Renvoie (best_all_time, best_2025, year_of_best_all_time) ou (None, None, None)."""
    if df800.empty:
        return None, None, None

    df800 = df800[~df800["Perf."].str.contains("|".join(IGNORED_MARKS), na=False)]
    df800["time"] = df800["Perf."].apply(convert_time_to_seconds)

    if df800.empty:
        return None, None, None

    best_all = df800["time"].min()
    best_2025 = df800.loc[df800.Annee == "2025", "time"].min()  # NaN si pas 2025
    year_best = df800.loc[df800["time"].idxmin(), "Annee"]
    return best_all, best_2025, year_best

# ── pipeline principal ───────────────────────────────────────────────────────
memo_seq = {}            #   nom complet  → seq  (évite les doubles hits API)
memo_results = {}        #   seq          → (best_all, best_2025, year_best)

best_all_col   = []
best_2025_col  = []
year_best_col  = []

for _, row in tqdm(df_athle_live.iterrows(), total=len(df_athle_live)):
    full_name = clean_name(row)

    # 1) Trouver ou récupérer le seq -----------------------------------------
    if full_name not in memo_seq:
        search = search_athletes(full_name)
        match  = choose_match(search, row["Club"])
        if not match:
            memo_seq[full_name] = None          # on retient l’échec pour ne pas recommencer
        else:
            memo_seq[full_name] = match["seq"]

    seq = memo_seq[full_name]
    if not seq:
        best_all_col.append(None); best_2025_col.append(None); year_best_col.append(None)
        continue

    # 2) Scraper / récupérer les perfs ---------------------------------------
    if seq not in memo_results:
        df_res   = get_all_athlete_results(seq)
        df_800   = df_res[df_res.Epreuve.isin(["800m", "800m Piste Courte"])]
        memo_results[seq] = extract_bests(df_800)

    best_all, best_2025, year_best = memo_results[seq]
    best_all_col.append(best_all)
    best_2025_col.append(best_2025)
    year_best_col.append(year_best)

# ── enrichissement du DataFrame ---------------------------------------------
df_athle_live = df_athle_live.assign(best_800_all_time=best_all_col,
                                     best_800_2025=best_2025_col,
                                     year_of_best_800=year_best_col)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df800["time"] = df800["Perf."].apply(convert_time_to_seconds)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df800["time"] = df800["Perf."].apply(convert_time_to_seconds)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df800["time"] = df800["Perf."].apply(convert_time_to_seconds)
A value is trying t

KeyboardInterrupt: 

#  version rapide

In [12]:
import pandas as pd
from tqdm import tqdm
import pickle
import os
import time
import asyncio
import aiohttp
from concurrent.futures import ThreadPoolExecutor
import threading
from queue import Queue

# ── Configuration ────────────────────────────────────────────────────────
IGNORED_MARKS = ("DNS", "DNF", "AB", "DQ")
CACHE_FILE = "athlete_cache.pkl"
MAX_CONCURRENT = 10  # Nombre de requêtes simultanées

# ── Fonctions utilitaires ────────────────────────────────────────────────
def clean_name(row: pd.Series) -> str:
    return ' '.join(f"{row['Prénom']} {row['Nom']}".split())

def choose_match(results, target_club):
    if not results:
        return None
    if len(results) == 1:
        return results[0]
    clubs = [r["club"] for r in results]
    best = find_most_similar_club(target_club, clubs)
    return results[clubs.index(best)] if best else None

def extract_bests(df800):
    if df800.empty:
        return None, None, None
    df800 = df800[~df800["Perf."].str.contains("|".join(IGNORED_MARKS), na=False)]
    df800["time"] = df800["Perf."].apply(convert_time_to_seconds)
    if df800.empty:
        return None, None, None
    best_all = df800["time"].min()
    best_2025 = df800.loc[df800.Annee == "2025", "time"].min()
    year_best = df800.loc[df800["time"].idxmin(), "Annee"]
    return best_all, best_2025, year_best

# ── Cache management ─────────────────────────────────────────────────────
def load_cache():
    if os.path.exists(CACHE_FILE):
        try:
            with open(CACHE_FILE, 'rb') as f:
                return pickle.load(f)
        except:
            pass
    return {"seq": {}, "results": {}}

def save_cache(cache):
    try:
        with open(CACHE_FILE, 'wb') as f:
            pickle.dump(cache, f)
    except Exception as e:
        print(f"Erreur sauvegarde: {e}")

# ── Approche par batch avec threading ────────────────────────────────────
def worker_search_athletes(work_queue, results_queue, cache, lock):
    """Worker thread pour la recherche d'athlètes."""
    while True:
        item = work_queue.get()
        if item is None:
            break
        
        idx, full_name, club = item
        try:
            search = search_athletes(full_name)
            match = choose_match(search, club)
            seq = match["seq"] if match else None
            
            with lock:
                cache["seq"][full_name] = seq
            
            results_queue.put((idx, seq))
            time.sleep(0.1)  # Petite pause
            
        except Exception as e:
            print(f"Erreur recherche {full_name}: {e}")
            with lock:
                cache["seq"][full_name] = None
            results_queue.put((idx, None))
            time.sleep(0.5)
        
        work_queue.task_done()

def worker_get_results(work_queue, results_queue, cache, lock):
    """Worker thread pour récupérer les résultats."""
    while True:
        item = work_queue.get()
        if item is None:
            break
        
        idx, seq = item
        try:
            df_res = get_all_athlete_results(seq)
            df_800 = df_res[df_res.Epreuve.isin(["800m", "800m Piste Courte"])]
            result = extract_bests(df_800)
            
            with lock:
                cache["results"][seq] = result
            
            results_queue.put((idx, result))
            time.sleep(0.1)
            
        except Exception as e:
            print(f"Erreur résultats seq {seq}: {e}")
            result = (None, None, None)
            with lock:
                cache["results"][seq] = result
            results_queue.put((idx, result))
            time.sleep(0.5)
        
        work_queue.task_done()

def process_batch_threaded(items, worker_func, cache, num_workers=5):
    """Traite un batch d'items avec des threads."""
    work_queue = Queue()
    results_queue = Queue()
    lock = threading.Lock()
    
    # Créer et démarrer les workers
    workers = []
    for _ in range(num_workers):
        worker = threading.Thread(target=worker_func, args=(work_queue, results_queue, cache, lock))
        worker.daemon = True
        worker.start()
        workers.append(worker)
    
    # Ajouter les tâches
    for item in items:
        work_queue.put(item)
    
    # Collecter les résultats
    results = {}
    for _ in tqdm(range(len(items)), desc="Traitement batch"):
        idx, result = results_queue.get()
        results[idx] = result
    
    # Arrêter les workers
    for _ in workers:
        work_queue.put(None)
    for worker in workers:
        worker.join()
    
    return results

# ── Version ultra-optimisée ──────────────────────────────────────────────
def process_athletes_ultra_fast(df_athle_live):
    """Version ultra-rapide avec traitement par batch et threading."""
    
    cache = load_cache()
    print(f"Cache initial: {len(cache['seq'])} noms, {len(cache['results'])} résultats")
    
    # Identifier les tâches à faire
    search_tasks = []
    result_tasks = []
    
    for idx, row in df_athle_live.iterrows():
        full_name = clean_name(row)
        
        if full_name not in cache["seq"]:
            search_tasks.append((idx, full_name, row["Club"]))
        elif cache["seq"][full_name] and cache["seq"][full_name] not in cache["results"]:
            result_tasks.append((idx, cache["seq"][full_name]))
    
    print(f"À traiter: {len(search_tasks)} recherches, {len(result_tasks)} résultats")
    
    # Phase 1: Recherche des athlètes par batch
    if search_tasks:
        print("Phase 1: Recherche des athlètes...")
        batch_size = 50
        for i in range(0, len(search_tasks), batch_size):
            batch = search_tasks[i:i+batch_size]
            process_batch_threaded(batch, worker_search_athletes, cache, num_workers=3)
            save_cache(cache)  # Sauvegarder après chaque batch
            time.sleep(1)  # Pause entre les batchs
    
    # Recalculer les tâches de résultats après la phase 1
    result_tasks = []
    for idx, row in df_athle_live.iterrows():
        full_name = clean_name(row)
        seq = cache["seq"].get(full_name)
        if seq and seq not in cache["results"]:
            result_tasks.append((idx, seq))
    
    # Phase 2: Récupération des résultats par batch
    if result_tasks:
        print("Phase 2: Récupération des résultats...")
        batch_size = 30
        for i in range(0, len(result_tasks), batch_size):
            batch = result_tasks[i:i+batch_size]
            process_batch_threaded(batch, worker_get_results, cache, num_workers=3)
            save_cache(cache)
            time.sleep(1)
    
    # Phase 3: Construction des résultats finaux
    print("Phase 3: Construction des colonnes...")
    best_all_col = []
    best_2025_col = []
    year_best_col = []
    
    for _, row in df_athle_live.iterrows():
        full_name = clean_name(row)
        seq = cache["seq"].get(full_name)
        
        if seq and seq in cache["results"]:
            best_all, best_2025, year_best = cache["results"][seq]
        else:
            best_all, best_2025, year_best = None, None, None
        
        best_all_col.append(best_all)
        best_2025_col.append(best_2025)
        year_best_col.append(year_best)
    
    save_cache(cache)
    return best_all_col, best_2025_col, year_best_col

# ── Version simple mais plus rapide ──────────────────────────────────────
def process_athletes_simple_fast(df_athle_live):
    """Version simple mais avec optimisations de base."""
    
    cache = load_cache()
    
    # Pré-calculer tous les noms pour éviter les recalculs
    names_map = {}
    for idx, row in df_athle_live.iterrows():
        names_map[idx] = clean_name(row)
    
    print("Phase 1: Recherche des athlètes manquants...")
    search_count = 0
    for idx, row in tqdm(df_athle_live.iterrows(), desc="Recherche"):
        full_name = names_map[idx]
        
        if full_name not in cache["seq"]:
            try:
                search = search_athletes(full_name)
                match = choose_match(search, row["Club"])
                cache["seq"][full_name] = match["seq"] if match else None
                search_count += 1
                
                # Sauvegarder tous les 25 nouveaux
                if search_count % 25 == 0:
                    save_cache(cache)
                
                time.sleep(0.2)  # Pause réduite
                
            except Exception as e:
                cache["seq"][full_name] = None
                time.sleep(1)
    
    save_cache(cache)
    
    print("Phase 2: Récupération des résultats manquants...")
    result_count = 0
    for idx, row in tqdm(df_athle_live.iterrows(), desc="Résultats"):
        full_name = names_map[idx]
        seq = cache["seq"].get(full_name)
        
        if seq and seq not in cache["results"]:
            try:
                df_res = get_all_athlete_results(seq)
                df_800 = df_res[df_res.Epreuve.isin(["800m", "800m Piste Courte"])]
                cache["results"][seq] = extract_bests(df_800)
                result_count += 1
                
                if result_count % 25 == 0:
                    save_cache(cache)
                
                time.sleep(0.2)
                
            except Exception as e:
                cache["results"][seq] = (None, None, None)
                time.sleep(1)
    
    save_cache(cache)
    
    # Construction finale
    print("Construction des colonnes finales...")
    best_all_col = []
    best_2025_col = []
    year_best_col = []
    
    for idx in df_athle_live.index:
        full_name = names_map[idx]
        seq = cache["seq"].get(full_name)
        
        if seq and seq in cache["results"]:
            best_all, best_2025, year_best = cache["results"][seq]
        else:
            best_all, best_2025, year_best = None, None, None
        
        best_all_col.append(best_all)
        best_2025_col.append(best_2025)
        year_best_col.append(year_best)
    
    return best_all_col, best_2025_col, year_best_col

# ── Utilisation ──────────────────────────────────────────────────────────
# Choisissez la version selon votre situation

# Version ultra-rapide (recommandée si le serveur peut gérer la charge)
# best_all_col, best_2025_col, year_best_col = process_athletes_ultra_fast(df_athle_live)

# Version simple mais optimisée (plus sûre)
best_all_col, best_2025_col, year_best_col = process_athletes_simple_fast(df_athle_live)

# Enrichissement du DataFrame
df_athle_live = df_athle_live.assign(
    best_800_all_time=best_all_col,
    best_800_2025=best_2025_col,
    year_of_best_800=year_best_col
)

print(f"Traitement terminé pour {len(df_athle_live)} athlètes")

Phase 1: Recherche des athlètes manquants...


Recherche: 83it [01:10,  1.18it/s]


Phase 2: Récupération des résultats manquants...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df800["time"] = df800["Perf."].apply(convert_time_to_seconds)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df800["time"] = df800["Perf."].apply(convert_time_to_seconds)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df800["time"] = df800["Perf."].apply(convert_time_to_seconds)
A value is trying t

Construction des colonnes finales...
Traitement terminé pour 83 athlètes





In [15]:
df_athle_live.head()

Unnamed: 0,Prénom,Nom,Club,Temps d'engagement,best_800_all_time,best_800_2025,year_of_best_800
0,Jeremy,DACHICOURT,ATHLETIC CLUBS 92*,1:57.00,1:58.06,1:58.06,2025.0
1,Lucas,CANTALOUBE,ATHLETIC CLUBS 92*,1:57.63,1:57.63,1:58.55,2024.0
2,Hugo,DEL DIN,AC DU PAYS DE MEAUX,1:55.00,1:59.58,,2024.0
3,Valere,GRAFFIN,SAM PARIS 12,1:59.90,1:58.42,,2015.0
4,Max,FRADILLON,SAM PARIS 12,,,,


In [14]:
def format_time(x):

    minutes = int(x) // 60
    seconds = int(x) % 60
    centiseconds = int((x * 100) % 100)
    return f"{minutes}:{seconds:02d}.{centiseconds:02d}"

df_athle_live.best_800_all_time = df_athle_live.best_800_all_time.apply(lambda x: format_time(x) if pd.notnull(x) else x)
df_athle_live.best_800_2025 = df_athle_live.best_800_2025.apply(lambda x: format_time(x) if pd.notnull(x) else x)

# df_athle_live.to_excel('800m_TCM_st_maur_4juin.xlsx', index=False)

In [26]:
import pandas as pd
from tqdm import tqdm
import pickle
import os
import time
import asyncio
import nest_asyncio

# Important pour que l'asyncio fonctionne dans un notebook
nest_asyncio.apply()

# ── Configuration ────────────────────────────────────────────────────────
IGNORED_MARKS = ("DNS", "DNF", "AB", "DQ")
CACHE_FILE = "athlete_cache_1500.pkl"  # Fichier cache spécifique au 1500m
EPREUVES_CIBLES = ["1 500m", "1 500m Piste Courte"] # <--- Adaptation 1500m

# ── Fonctions utilitaires ────────────────────────────────────────────────
def clean_name(row: pd.Series) -> str:
    # Utilise 'Prénom' et 'Nom' depuis vos colonnes
    return ' '.join(f"{row['Prénom']} {row['Nom']}".split())

def choose_match(results, target_club):
    if not results:
        return None
    if len(results) == 1:
        return results[0]
    
    # Si le club cible est vide ou manquant, on prend le premier résultat par défaut
    if not target_club or pd.isna(target_club):
         return results[0]

    clubs = [r["club"] for r in results]
    best = find_most_similar_club(target_club, clubs)
    
    # Si un club similaire est trouvé, on renvoie ce résultat, sinon None
    return results[clubs.index(best)] if best else None

def extract_bests(df_epreuve):
    """Extrait les meilleures perfs (All Time, 2025, Année du RP)."""
    if df_epreuve.empty:
        return None, None, None

    # Nettoyage des DNF/DNS/etc.
    df_epreuve = df_epreuve[~df_epreuve["Performance"].str.contains("|".join(IGNORED_MARKS), na=False)]
    
    # Conversion du temps (utilise votre fonction convert_time_to_seconds importée ailleurs)
    df_epreuve["time"] = df_epreuve["Performance"].apply(convert_time_to_seconds) # Attention au nom de colonne 'Perf.' vs 'Performance' selon votre API

    if df_epreuve.empty:
        return None, None, None

    # Calcul des stats
    best_all = df_epreuve["time"].min()
    best_2025 = df_epreuve.loc[df_epreuve.Annee == "2025", "time"].min()
    
    # Récupération de l'année du record personnel
    # idxmin renvoie l'index de la perf minimale
    idx_min = df_epreuve["time"].idxmin()
    year_best = df_epreuve.loc[idx_min, "Annee"]
    
    return best_all, best_2025, year_best

# ── Cache management ─────────────────────────────────────────────────────
def load_cache():
    if os.path.exists(CACHE_FILE):
        try:
            with open(CACHE_FILE, 'rb') as f:
                return pickle.load(f)
        except:
            pass
    return {"seq": {}, "results": {}}

def save_cache(cache):
    try:
        with open(CACHE_FILE, 'wb') as f:
            pickle.dump(cache, f)
    except Exception as e:
        print(f"Erreur sauvegarde: {e}")

# ── Version optimisée pour 1500m ─────────────────────────────────────────
def process_athletes_1500m(df_athle_live):
    
    cache = load_cache()
    loop = asyncio.get_event_loop() # Nécessaire pour appeler la fonction async
    
    # Map des noms nettoyés
    names_map = {}
    for idx, row in df_athle_live.iterrows():
        names_map[idx] = clean_name(row)
    
    # ── Phase 1: Recherche des athlètes (SEQ) ──
    print("Phase 1: Recherche des athlètes manquants...")
    search_count = 0
    for idx, row in tqdm(df_athle_live.iterrows(), total=len(df_athle_live), desc="Recherche"):
        full_name = names_map[idx]
        
        if full_name not in cache["seq"]:
            try:
                # Recherche Synchrone (car search_athletes est synchrone dans votre code actuel)
                search = search_athletes(full_name)
                match = choose_match(search, row["Club"])
                
                cache["seq"][full_name] = match["seq"] if match else None
                search_count += 1
                
                # Sauvegarde périodique
                if search_count % 25 == 0:
                    save_cache(cache)
                
            except Exception as e:
                # print(f"Erreur recherche pour {full_name}: {e}")
                cache["seq"][full_name] = None
    
    save_cache(cache)
    
    # ── Phase 2: Récupération des résultats (PERFS) ──
    print("Phase 2: Récupération des résultats manquants...")
    result_count = 0
    
    for idx, row in tqdm(df_athle_live.iterrows(), total=len(df_athle_live), desc="Résultats"):
        full_name = names_map[idx]
        seq = cache["seq"].get(full_name)
        
        # Si on a un SEQ et qu'on n'a pas encore les résultats en cache
        if seq and seq not in cache["results"]:
            try:
                # Appel ASYNCHRONE converti en SYNCHRONE via loop
                # get_all_athlete_results est votre alias pour "get_all_results_fast"
                df_res = loop.run_until_complete(get_all_athlete_results(seq))
                
                # Filtrage sur le 1500m
                df_cible = df_res[df_res.Epreuve.isin(EPREUVES_CIBLES)].copy()
                
                # Extraction des meilleures perfs
                cache["results"][seq] = extract_bests(df_cible)
                result_count += 1
                
                if result_count % 25 == 0:
                    save_cache(cache)
                
            except Exception as e:
                # print(f"Erreur résultats seq {seq}: {e}")
                cache["results"][seq] = (None, None, None)
    
    save_cache(cache)
    
    # ── Phase 3: Construction du tableau final ──
    print("Construction des colonnes finales...")
    best_all_col = []
    best_2025_col = []
    year_best_col = []
    
    for idx in df_athle_live.index:
        full_name = names_map[idx]
        seq = cache["seq"].get(full_name)
        
        if seq and seq in cache["results"]:
            best_all, best_2025, year_best = cache["results"][seq]
        else:
            best_all, best_2025, year_best = None, None, None
        
        best_all_col.append(best_all)
        best_2025_col.append(best_2025)
        year_best_col.append(year_best)
    
    return best_all_col, best_2025_col, year_best_col

# ── Exécution ────────────────────────────────────────────────────────────

# Lancement du processing
best_all, best_2025, year_best = process_athletes_1500m(df_athle_live)

# Enrichissement du DataFrame
df_athle_live = df_athle_live.assign(
    best_1500_all_time=best_all,
    best_1500_2025=best_2025,
    year_of_best_1500=year_best
)

# Formatage pour l'affichage (optionnel)
def format_time(x):
    if pd.isna(x): return x
    minutes = int(x) // 60
    seconds = int(x) % 60
    centiseconds = int((x * 100) % 100)
    return f"{minutes}:{seconds:02d}.{centiseconds:02d}"

# Appliquer le formatage si désiré sur des copies de colonnes pour l'export Excel
best_1500_fmt = [format_time(x) for x in best_all]
best_2025_fmt = [format_time(x) for x in best_2025]

df_athle_live["best_1500_fmt"] = best_1500_fmt
df_athle_live["best_1500_2025_fmt"] = best_2025_fmt

print(f"Traitement terminé pour {len(df_athle_live)} athlètes")
df_athle_live.head()

Phase 1: Recherche des athlètes manquants...


Recherche: 100%|██████████| 78/78 [01:56<00:00,  1.49s/it]


Phase 2: Récupération des résultats manquants...


Résultats: 100%|██████████| 78/78 [00:00<?, ?it/s]

Construction des colonnes finales...
Traitement terminé pour 78 athlètes





Unnamed: 0,Prénom,Nom,Club,Temps d'engagement,best_1500_all_time,best_1500_2025,year_of_best_1500,best_1500_fmt,best_1500_2025_fmt
0,Nathan,BRUYERE,AL ECHIROLLES,3:55.00,,,,,
1,Anthony,COEUR D ACIER,SARAN LOIRET ATHLETIC CLUB,3:49.80,,,,,
2,Franck,DIRAT,ATHLE 92*,3:50.06,,,,,
3,Mickael,YEYE,DYNAMIC AULNAY CLUB,3:45.00,,,,,
4,Francois,MEILLEUR,ENTENTE ANGEVINE ATHLETISME*,4:15.00,,,,,


In [28]:
import pandas as pd
from tqdm import tqdm
import asyncio
import nest_asyncio

# Important : permet d'exécuter des coroutines (appels async) dans une boucle simple
nest_asyncio.apply()

def process_athletes_1500m_simple(df_input):
    # loop = asyncio.get_event_loop()  <-- Pas nécessaire si tout est synchrone
    
    bests_all_time = []
    bests_2025 = []
    years_best = []
    
    print(f"Traitement de {len(df_input)} athlètes...")
    
    for i, row in tqdm(df_input.iterrows(), total=len(df_input)):
        
        ath_name = row['Nom']
        if not isinstance(ath_name, str):
            ath_name = str(ath_name)
        ath_name = ' '.join(ath_name.split()) 
        
        club_target = row['Club']
        
        res_all, res_25, res_yr = None, None, None
        
        try:
            results = search_athletes(ath_name)
            seq = None
            
            if len(results) > 1:
                clubs_candidat = [r['club'] for r in results]
                most_similar_club = find_most_similar_club(club_target, clubs_candidat)
                
                if most_similar_club is not None:
                    index = clubs_candidat.index(most_similar_club)
                    seq = results[index]['seq']
                    
            elif len(results) == 1:
                seq = results[0]['seq']
            
            if seq:
                # --- CORRECTION ICI ---
                # La fonction est synchrone, on l'appelle directement
                df_res = get_all_athlete_results(seq)
                # ----------------------
                
                # Le reste du code est identique
                df_1500 = df_res[(df_res.Epreuve == '1 500m') | (df_res.Epreuve == '1 500m Piste Courte')].copy()
                
                if not df_1500.empty:
                    df_1500 = df_1500[~df_1500["Performance"].str.contains("|".join(["DNS", "DNF", "AB", "DQ"]), na=False)]
                    
                    if not df_1500.empty:
                        df_1500["time"] = df_1500["Performance"].apply(convert_time_to_seconds)
                        
                        res_all = df_1500['time'].min()
                        res_yr = df_1500.loc[df_1500['time'].idxmin()]['Annee']
                        
                        df_2025 = df_1500[df_1500.Annee == "2025"]
                        if not df_2025.empty:
                            res_25 = df_2025['time'].min()

        except Exception as e:
            print(f"Erreur pour {ath_name}: {e}")
        
        bests_all_time.append(res_all)
        bests_2025.append(res_25)
        years_best.append(res_yr)
        
    return bests_all_time, bests_2025, years_best

# --- Exécution ---

cols_all, cols_25, cols_yr = process_athletes_1500m_simple(df_athle_live)

# Assignation
df_athle_live = df_athle_live.assign(
    best_1500_all_time=cols_all,
    best_1500_2025=cols_25,
    year_of_best_1500=cols_yr
)

# Formatage pour affichage "mm:ss.cc"
def format_time_display(x):
    if pd.isna(x): return ""
    minutes = int(x) // 60
    seconds = int(x) % 60
    centiseconds = int((x * 100) % 100)
    return f"{minutes}:{seconds:02d}.{centiseconds:02d}"

df_athle_live["best_1500_fmt"] = df_athle_live['best_1500_all_time'].apply(format_time_display)
df_athle_live["best_1500_2025_fmt"] = df_athle_live['best_1500_2025'].apply(format_time_display)

Traitement de 78 athlètes...


100%|██████████| 78/78 [12:28<00:00,  9.60s/it]


In [29]:
df_athle_live.head()

Unnamed: 0,Prénom,Nom,Club,Temps d'engagement,best_1500_all_time,best_1500_2025,year_of_best_1500,best_1500_fmt,best_1500_2025_fmt
0,Nathan,BRUYERE,AL ECHIROLLES,3:55.00,,,,,
1,Anthony,COEUR D ACIER,SARAN LOIRET ATHLETIC CLUB,3:49.80,229.47,232.04,2023.0,3:49.47,3:52.04
2,Franck,DIRAT,ATHLE 92*,3:50.06,225.8,225.8,2025.0,3:45.80,3:45.80
3,Mickael,YEYE,DYNAMIC AULNAY CLUB,3:45.00,227.95,,2021.0,3:47.95,
4,Francois,MEILLEUR,ENTENTE ANGEVINE ATHLETISME*,4:15.00,264.33,264.33,2025.0,4:24.33,4:24.33


In [34]:
df_athle_live.to_excel('1500m_TCM_TRL_17janvier.xlsx', index=False)

In [32]:
import pandas as pd
from tqdm import tqdm
import asyncio
import nest_asyncio

# Important : permet d'exécuter des coroutines (appels async) dans une boucle simple
nest_asyncio.apply()

def process_athletes_1500m_simple(df_input):
    # loop = asyncio.get_event_loop()  <-- Pas nécessaire si tout est synchrone
    
    bests_all_time = []
    bests_2025 = []
    years_best = []
    
    # Nouveaux tableaux pour la Piste Courte (Indoor)
    bests_pc = []
    years_pc = []
    
    print(f"Traitement de {len(df_input)} athlètes...")
    
    for i, row in tqdm(df_input.iterrows(), total=len(df_input)):
        
        ath_name = row['Nom']
        if not isinstance(ath_name, str):
            ath_name = str(ath_name)
        ath_name = ' '.join(ath_name.split()) 
        
        club_target = row['Club']
        
        res_all, res_25, res_yr = None, None, None
        res_pc, res_pc_yr = None, None
        
        try:
            results = search_athletes(ath_name)
            seq = None
            
            if len(results) > 1:
                clubs_candidat = [r['club'] for r in results]
                most_similar_club = find_most_similar_club(club_target, clubs_candidat)
                
                if most_similar_club is not None:
                    index = clubs_candidat.index(most_similar_club)
                    seq = results[index]['seq']
                    
            elif len(results) == 1:
                seq = results[0]['seq']
            
            if seq:
                # La fonction est synchrone, on l'appelle directement
                df_res = get_all_athlete_results(seq)
                
                # --- 1. GENERAL (Plein air + Piste Courte) pour le record absolu ---
                df_1500 = df_res[(df_res.Epreuve == '1 500m') | (df_res.Epreuve == '1 500m Piste Courte')].copy()
                
                if not df_1500.empty:
                    df_1500 = df_1500[~df_1500["Performance"].str.contains("|".join(["DNS", "DNF", "AB", "DQ"]), na=False)]
                    
                    if not df_1500.empty:
                        df_1500["time"] = df_1500["Performance"].apply(convert_time_to_seconds)
                        
                        # Global Best
                        res_all = df_1500['time'].min()
                        res_yr = df_1500.loc[df_1500['time'].idxmin()]['Annee']
                        
                        # Best 2025
                        df_2025 = df_1500[df_1500.Annee == "2025"]
                        if not df_2025.empty:
                            res_25 = df_2025['time'].min()
                            
                        # --- 2. SPECIFIQUE PISTE COURTE ---
                        df_pc = df_1500[df_1500.Epreuve == '1 500m Piste Courte']
                        if not df_pc.empty:
                            res_pc = df_pc['time'].min()
                            res_pc_yr = df_pc.loc[df_pc['time'].idxmin()]['Annee']

        except Exception as e:
            print(f"Erreur pour {ath_name}: {e}")
        
        bests_all_time.append(res_all)
        bests_2025.append(res_25)
        years_best.append(res_yr)
        
        bests_pc.append(res_pc)
        years_pc.append(res_pc_yr)
        
    return bests_all_time, bests_2025, years_best, bests_pc, years_pc

# --- Exécution ---

cols_all, cols_25, cols_yr, cols_pc, cols_pc_yr = process_athletes_1500m_simple(df_athle_live)

# Assignation
df_athle_live = df_athle_live.assign(
    best_1500_all_time=cols_all,
    best_1500_2025=cols_25,
    year_of_best_1500=cols_yr,
    best_1500_PC=cols_pc,           # Piste Courte
    year_of_best_1500_PC=cols_pc_yr # Année Piste Courte
)

# Formatage pour affichage "mm:ss.cc"
def format_time_display(x):
    if pd.isna(x): return ""
    minutes = int(x) // 60
    seconds = int(x) % 60
    centiseconds = int((x * 100) % 100)
    return f"{minutes}:{seconds:02d}.{centiseconds:02d}"


df_athle_live["best_1500_fmt"] = df_athle_live['best_1500_all_time'].apply(format_time_display)
df_athle_live["best_1500_2025_fmt"] = df_athle_live['best_1500_2025'].apply(format_time_display)
df_athle_live["best_1500_PC_fmt"] = df_athle_live['best_1500_PC'].apply(format_time_display)

Traitement de 78 athlètes...


100%|██████████| 78/78 [08:58<00:00,  6.90s/it]


In [33]:
df_athle_live.head()

Unnamed: 0,Prénom,Nom,Club,Temps d'engagement,best_1500_all_time,best_1500_2025,year_of_best_1500,best_1500_fmt,best_1500_2025_fmt,best_1500_PC,year_of_best_1500_PC,best_1500_PC_fmt
0,Nathan,BRUYERE,AL ECHIROLLES,3:55.00,,,,,,,,
1,Anthony,COEUR D ACIER,SARAN LOIRET ATHLETIC CLUB,3:49.80,229.47,232.04,2023.0,3:49.47,3:52.04,233.69,2023.0,3:53.69
2,Franck,DIRAT,ATHLE 92*,3:50.06,225.8,225.8,2025.0,3:45.80,3:45.80,230.06,2025.0,3:50.06
3,Mickael,YEYE,DYNAMIC AULNAY CLUB,3:45.00,227.95,,2021.0,3:47.95,,235.5,2021.0,3:55.50
4,Francois,MEILLEUR,ENTENTE ANGEVINE ATHLETISME*,4:15.00,264.33,264.33,2025.0,4:24.33,4:24.33,,,


In [35]:
df_athle_live.columns

Index(['Prénom', 'Nom', 'Club', 'Temps d'engagement', 'best_1500_all_time',
       'best_1500_2025', 'year_of_best_1500', 'best_1500_fmt',
       'best_1500_2025_fmt', 'best_1500_PC', 'year_of_best_1500_PC',
       'best_1500_PC_fmt'],
      dtype='object')

In [38]:
final_df = df_athle_live[['Prénom', 'Nom', 'Club', "Temps d'engagement", 
        'year_of_best_1500', 'best_1500_fmt',
       'best_1500_2025_fmt',  'year_of_best_1500_PC',
       'best_1500_PC_fmt']]

In [40]:
final_df = final_df.rename(columns={
    'year_of_best_1500': "annee du record personnel tout temps",
    'best_1500_fmt': "record personnel tout temps",
    'best_1500_2025_fmt': "meilleur performance 2025",
    'year_of_best_1500_PC': "annee du record personnel INDOOR",
    'best_1500_PC_fmt': "record personnel INDOOR"})

In [41]:
final_df.to_excel('1500m_TCM_TRL_17janvier_v2.xlsx', index=False)

In [2]:
import pandas as pd

In [3]:
df = pd.read_excel('1500m_TCM_TRL_17janvier_v2.xlsx')

In [4]:
df.shape

(78, 9)

In [6]:
df.head(1)

Unnamed: 0,Prénom,Nom,Club,Temps d'engagement,annee du record personnel tout temps,record personnel tout temps,meilleur performance 2025,annee du record personnel INDOOR,record personnel INDOOR
0,Nathan,BRUYERE,AL ECHIROLLES,3:55.00,,,,,


In [8]:
target_names = [
    "yeye", "imade", "galtier-villa", "sombaert", "le fur", 
    "lascabettes", "dirat", "bruyere", "liot", "piquart", "sanchez"
]

# On nettoie la colonne Nom (minuscule + suppression espaces) pour comparer avec la liste
mask = df['Nom'].astype(str).str.strip().str.lower().isin(target_names)

df_filtered = df[mask]
df_filtered

Unnamed: 0,Prénom,Nom,Club,Temps d'engagement,annee du record personnel tout temps,record personnel tout temps,meilleur performance 2025,annee du record personnel INDOOR,record personnel INDOOR
0,Nathan,BRUYERE,AL ECHIROLLES,3:55.00,,,,,
2,Franck,DIRAT,ATHLE 92*,3:50.06,2025.0,3:45.80,3:45.80,2025.0,3:50.06
3,Mickael,YEYE,DYNAMIC AULNAY CLUB,3:45.00,2021.0,3:47.95,,2021.0,3:55.50
22,Abdelmounaim,IMADE,SAINT DENIS EMOTION,3:47.09,2025.0,3:46.14,3:46.14,2025.0,3:47.09
36,Loic,SOMBAERT,ATHLETIC CLUBS 92*,3:45.00,2025.0,3:50.16,3:50.16,2018.0,4:26.00
39,Theodore,LIOT,ATHLE DU PAYS DE VANNES *,3:51.20,2025.0,3:47.04,3:47.04,2026.0,3:51.20
44,Cyann,GALTIER-VILLA,SATUC TOULOUSE ATHLE,3:44.97,2025.0,3:40.37,3:40.37,2025.0,3:44.97
45,Yvan,LE FUR,NICE COTE D'AZUR ATHLETISME *,3:45.01,2025.0,3:43.09,3:43.09,2025.0,3:45.01
49,Alex,PIQUART,ATHLE BOCAGE VENDEE*,3:47.20,2023.0,3:47.20,,,
52,Yanis,SANCHEZ,A. CARBON BLANC LORMONT,3:51.03,,,,,


In [9]:
df_filtered.to_excel('serie_1500m_TCM_TRL_17janvier.xlsx', index=False)