## Creating a the complete RG 2025 Draw based on the JSON files by rounds and Assigning IDs to Players

In [1]:
import json
import pandas as pd

In [5]:
# Step 1: Load both JSON files
with open('./roland_garros_2025_round1.json', 'r', encoding='utf-8') as f1:
    data_r1 = json.load(f1)

with open('./roland_garros_2025_round2.json', 'r', encoding='utf-8') as f2:
    data_r2 = json.load(f2)
    
with open('./roland_garros_2025_round3.json', 'r', encoding='utf-8') as f3:
    data_r3 = json.load(f3)
    
with open('./roland_garros_2025_round4.json', 'r', encoding='utf-8') as f4:
    data_r4 = json.load(f4)

with open('./roland_garros_2025_final_phases.json', 'r', encoding='utf-8') as f5:
    data_r5 = json.load(f5)


# Step 2: Load the player ID CSV
df_players = pd.read_csv('./PLAYERS_ID.csv')

In [6]:
# Step 3: Build normalized name → ID dictionary
def make_full_name(row):
    return f"{row['last_name'].strip().upper()} {row['first_name'].strip()}"

df_players['full_name'] = df_players.apply(make_full_name, axis=1)
name_to_id = dict(zip(df_players['full_name'], df_players['id']))

# Step 4: Normalize player name exactly like in the dict keys
def normalize_player_name(name):
    parts = name.strip().split(' ', 1)
    if len(parts) != 2:
        return name.strip().upper()  # fallback
    last = parts[0].upper()
    first = parts[1].strip()
    return f"{last} {first}"

# Step 5: Assign IDs using normalized name
def assign_ids(match):
    for key in ['player1', 'player2']:
        raw_name = match[key]['name']
        norm_name = normalize_player_name(raw_name)
        match[key]['id'] = name_to_id.get(norm_name)
        if match[key]['id'] is None:
            print(f"[Warning] Player not found: '{raw_name}' → '{norm_name}'")
    return match

# Step 6: Process matches
matches_r1 = [assign_ids(match) for match in data_r1['matches']]
matches_r2 = [assign_ids(match) for match in data_r2['matches']]
matches_r3 = [assign_ids(match) for match in data_r3['matches']]
matches_r4 = [assign_ids(match) for match in data_r4['matches']]
matches_r5 = [assign_ids(match) for match in data_r5['matches']]

all_matches = matches_r1 + matches_r2 + matches_r3 + matches_r4 + matches_r5

# Step 7: Combine and save
combined_data = {
    "tournament": "Roland-Garros 2025",
    "surface": "CLAY",
    "matches": all_matches
}

with open('./roland_garros_2025_complete_final.json', 'w', encoding='utf-8') as f:
    json.dump(combined_data, f, ensure_ascii=False, indent=2)

print("\nFinal JSON saved to: roland_garros_2025_complete_final.json")


Final JSON saved to: roland_garros_2025_complete_final.json
