In [360]:
import pandas as pd
import os
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.preprocessing import StandardScaler
from statsmodels.discrete.conditional_models import ConditionalLogit
from getpass import getuser

In [361]:
# =============================
# CONFIG
# =============================
USER = getuser()
INPUT_DIR = f"C:/Users/{USER}/Documents/GitHub/tennis-homophily/data/atp/"
INPUT_FILE = os.path.join(INPUT_DIR, "men_matches_with_ranks.xlsx")
OUTPUT_FILE = os.path.join(INPUT_DIR, "win_lose_df.xlsx")
CLEANED_FILE = os.path.join(INPUT_DIR, "men_matches_with_ranks_cleaned.xlsx")

# =============================
# Load Dataset
# =============================
print("Loading Grand Slam matches…")
if not os.path.exists(INPUT_FILE):
    raise FileNotFoundError(f"Grand Slam file not found: {INPUT_FILE}")

df = pd.read_excel(INPUT_FILE)
print(df.shape)
display(df.head())
df.columns.tolist()


Loading Grand Slam matches…
(1405, 160)


Unnamed: 0,match_id,tournament,location,date,year,tournament_code,stage,match_duration,winners_set1,winners_set2,...,losers_p2_weight-lbs,losers_p2_weight-kg,losers_p2_height-ft,losers_p2_height-in,losers_p2_height-cm,losers_p2_experience,losers_p2_birthplace-city,losers_p2_birthplace-country,losers_p2_hand,losers_p2_backhand
0,0,Australian Open,"Melbourne,Australia",2018-01-15,2018,580,Finals,01:33:00,6,6.0,...,196.0,89.0,6.0,4.0,193.0,8.0,Montreal,Canada,Right-Handed,Two-Handed Backhand
1,1,Australian Open,"Melbourne,Australia",2018-01-15,2018,580,Semi-Finals,02:26:00,4,7.0,...,202.0,92.0,6.0,4.0,193.0,9.0,Warstein,Germany,Right-Handed,Two-Handed Backhand
2,2,Australian Open,"Melbourne,Australia",2018-01-15,2018,580,Semi-Finals,01:29:00,7,7.0,...,185.0,84.0,6.0,3.0,191.0,20.0,"Camarillo, CA, USA",,Right-Handed,One-Handed Backhand
3,3,Australian Open,"Melbourne,Australia",2018-01-15,2018,580,Quarter-Finals,01:06:00,6,6.0,...,182.0,83.0,6.0,0.0,183.0,20.0,Lahore,Pakistan,Right-Handed,One-Handed Backhand
4,4,Australian Open,"Melbourne,Australia",2018-01-15,2018,580,Quarter-Finals,02:40:00,6,6.0,...,207.0,94.0,6.0,6.0,198.0,,London,England,Right-Handed,One-Handed Backhand


['match_id',
 'tournament',
 'location',
 'date',
 'year',
 'tournament_code',
 'stage',
 'match_duration',
 'winners_set1',
 'winners_set2',
 'winners_set3',
 'winners_set1_tiebreak',
 'winners_set2_tiebreak',
 'winners_set3_tiebreak',
 'losers_set1',
 'losers_set2',
 'losers_set3',
 'losers_set1_tiebreak',
 'losers_set2_tiebreak',
 'losers_set3_tiebreak',
 'winners_set4',
 'winners_set5',
 'losers_set4',
 'losers_set5',
 'winners_set4_tiebreak',
 'losers_set4_tiebreak',
 'losers_set5_tiebreak',
 'winners_p1_name',
 'winners_p1_surname',
 'winners_p1_ranking',
 'winners_p1_status',
 'winners_p2_name',
 'winners_p2_surname',
 'winners_p2_ranking',
 'winners_p2_status',
 'losers_p1_name',
 'losers_p1_surname',
 'losers_p1_ranking',
 'losers_p1_status',
 'losers_p2_name',
 'losers_p2_surname',
 'losers_p2_ranking',
 'losers_p2_status',
 'tourn_key',
 'winners_p1_initial',
 'winners_p2_initial',
 'losers_p1_initial',
 'losers_p2_initial',
 'winners_p1_rank',
 'winners_p1_tourns',
 'winner

  # Filter dataset

In [362]:
# Rimuovi l'anno 2020 dal dataset, COVID time, Wimblendon non si è giocato
df = df[df['year'] != 2020].copy()

# Controllo rapido
print(df['year'].value_counts())

2018    262
2019    250
2021    249
2022    249
2023    238
Name: year, dtype: int64


# Create new variables

In [363]:
# Create surface variable
surface_map = {
    "Australian Open": "Hard",
    "US Open": "Hard",
    "Roland Garros": "Clay",
    "Wimbledon": "Grass"
}

df['surface'] = df['tournament'].map(surface_map)


In [364]:
# 2. Normalize stage names
df['stage_clean'] = df['stage'].str.lower().str.strip()


stage_order = {
    "finals": 7,
    "semi-finals": 6,
    "quarter-finals": 5,
    "round of 16": 4,
    "round of 32": 3,
    "round of 64": 2,
    "2nd round qualifying": 1,
    "1st round qualifying": 0
}

df['stage_code'] = df['stage_clean'].map(stage_order)

In [365]:
df['stage_code'].unique()

array([7, 6, 5, 4, 3, 2, 1, 0], dtype=int64)

In [366]:
# --- Physical Homophily --- #

# --- 2. Omofilia sulla mano ---

df['same_hand_winners'] = (df['winners_p1_hand'] == df['winners_p2_hand']).astype(int)
df['same_hand_losers']  = (df['losers_p1_hand']  == df['losers_p2_hand']).astype(int)

# --- 3. Omofilia sul tipo di rovescio ---

df['same_backhand_winners'] = (df['winners_p1_backhand'] == df['winners_p2_backhand']).astype(int)
df['same_backhand_losers']  = (df['losers_p1_backhand']  == df['losers_p2_backhand']).astype(int)

# Altezza: differenza assoluta in cm
df['height_diff_winners'] = (df['winners_p1_height-cm'] - df['winners_p2_height-cm']).abs()
df['height_diff_losers']  = (df['losers_p1_height-cm']  - df['losers_p2_height-cm']).abs()

# Peso: differenza assoluta in kg
df['weight_diff_winners'] = (df['winners_p1_weight-kg'] - df['winners_p2_weight-kg']).abs()
df['weight_diff_losers']  = (df['losers_p1_weight-kg']  - df['losers_p2_weight-kg']).abs()

df['experience_diff_winners'] = (df['winners_p1_experience'] - df['winners_p2_experience']).abs()
df['experience_diff_losers']  = (df['losers_p1_experience']  - df['losers_p2_experience']).abs()

df['rank_diff_winners'] = (df['winners_p1_rank'] - df['winners_p2_rank']).abs()
df['rank_diff_losers']  = (df['losers_p1_rank']  - df['losers_p2_rank']).abs()


In [367]:

# --- Social Homophily --- #

# Stessa nazionalità
df['same_country_winners'] = (df['winners_p1_country'] == df['winners_p2_country']).astype(int)
df['same_country_losers']  = (df['losers_p1_country']  == df['losers_p2_country']).astype(int)

# Stesso coach
df['same_coach_winners'] = (
    (df['winners_p1_coach'].notna()) &
    (df['winners_p2_coach'].notna()) &
    (df['winners_p1_coach'] == df['winners_p2_coach'])
).astype(int)

df['same_coach_losers'] = (
    (df['losers_p1_coach'].notna()) &
    (df['losers_p2_coach'].notna()) &
    (df['losers_p1_coach'] == df['losers_p2_coach'])
).astype(int)

# (Volendo puoi aggiungere anche same_birthplace_* se pensi abbia senso)


In [368]:

# Differenza assoluta nel win ratio YTD e Career
df['wl_ytd_diff_winners']    = (df['winners_p1_win-ratio-ytd'] - df['winners_p2_win-ratio-ytd']).abs()
df['wl_ytd_diff_losers']     = (df['losers_p1_win-ratio-ytd']  - df['losers_p2_win-ratio-ytd']).abs()
df['wl_career_diff_winners'] = (df['winners_p1_win-ratio-career'] - df['winners_p2_win-ratio-career']).abs()
df['wl_career_diff_losers']  = (df['losers_p1_win-ratio-career']  - df['losers_p2_win-ratio-career']).abs()

# Differenza in titoli (YTD e Career)
df['titles_ytd_diff_winners']    = (df['winners_p1_titles-ytd'] - df['winners_p2_titles-ytd']).abs()
df['titles_ytd_diff_losers']     = (df['losers_p1_titles-ytd'] - df['losers_p2_titles-ytd']).abs()
df['titles_career_diff_winners'] = (df['winners_p1_titles-career'] - df['winners_p2_titles-career']).abs()
df['titles_career_diff_losers']  = (df['losers_p1_titles-career'] - df['losers_p2_titles-career']).abs()

## standardize variables 

In [369]:
# 1. Elenco delle variabili continue di cui vogliamo lo z-score
cont_vars_winners = [
    'height_diff_winners',
    'weight_diff_winners',
    'experience_diff_winners',
    'rank_diff_winners',
    'wl_ytd_diff_winners',
    'wl_career_diff_winners',
    'titles_career_diff_winners'
]

cont_vars_losers = [
    'height_diff_losers',
    'weight_diff_losers',
    'experience_diff_losers',
    'rank_diff_losers',
    'wl_ytd_diff_losers',
    'wl_career_diff_losers',
    'titles_career_diff_losers'
]

# 2. Funzione per aggiungere z-score SOLO per le colonne che esistono

def add_z_scores(df, cols, suffix='_z'):
    for c in cols:
        if c in df.columns:
            mean = df[c].mean()
            std = df[c].std(ddof=0)
            if std == 0 or np.isnan(std):
                # Se la var è costante o tutta NaN: z-score = 0
                df[c + suffix] = 0
            else:
                df[c + suffix] = (df[c] - mean) / std
        else:
            # Se non esiste, stampo info e NON creo la colonna
            print(f"[add_z_scores] Column {c} not found, skipping z-score.")
    return df

df = add_z_scores(df, cont_vars_winners, suffix='_z')
df = add_z_scores(df, cont_vars_losers,  suffix='_z')

# 3. Ora individuiamo le colonne *_z effettivamente presenti

z_cols_winners = [c for c in df.columns if c.endswith('_winners_z')]
z_cols_losers  = [c for c in df.columns if c.endswith('_losers_z')]

# 4. Sostituiamo i NaN con 0 SOLO per queste colonne

df[z_cols_winners] = df[z_cols_winners].fillna(0)
df[z_cols_losers]  = df[z_cols_losers].fillna(0)

# 5. Calcoliamo gli indici di omofilia
# (se una colonna z non esiste, usiamo 0 come fallback)

def get_col(df, name):
    return df[name] if name in df.columns else 0

df['homophily_index_winners'] = (
    - get_col(df, 'height_diff_winners_z')
    - get_col(df, 'weight_diff_winners_z')
    - get_col(df, 'experience_diff_winners_z') 
    - get_col(df, 'rank_diff_winners_z')
    - get_col(df, 'wl_ytd_diff_winners_z')
    - get_col(df, 'wl_career_diff_winners_z')
    - get_col(df, 'titles_career_diff_winners_z')
    + df['same_hand_winners'].fillna(0) 
    + df['same_backhand_winners'].fillna(0) 
    + df['same_country_winners'].fillna(0)
    + df['same_coach_winners'].fillna(0)
)

df['homophily_index_losers'] = (
    - get_col(df, 'height_diff_losers_z')
    - get_col(df, 'weight_diff_losers_z')
    - get_col(df, 'experience_diff_losers_z')
    - get_col(df, 'rank_diff_losers_z')
    - get_col(df, 'wl_ytd_diff_losers_z')
    - get_col(df, 'wl_career_diff_losers_z')
    - get_col(df, 'titles_career_diff_losers_z')
    + df['same_hand_losers'].fillna(0)
    + df['same_backhand_losers'].fillna(0)
    + df['same_country_losers'].fillna(0)
    + df['same_coach_losers'].fillna(0)
)

# 6. Controllo veloce

display(
    df[[
        'same_hand_winners',
        'same_hand_losers',
        'same_backhand_winners',
        'same_backhand_losers',
        'height_diff_winners',
        'height_diff_losers',
        'homophily_index_winners',
        'homophily_index_losers'
    ]].head(10)
)


Unnamed: 0,same_hand_winners,same_hand_losers,same_backhand_winners,same_backhand_losers,height_diff_winners,height_diff_losers,homophily_index_winners,homophily_index_losers
0,0,1,0,1,6.0,8.0,-2.031063,5.69164
1,0,1,0,0,6.0,8.0,-2.031063,0.705088
2,1,0,1,1,8.0,2.0,5.242303,7.313747
3,0,1,1,0,2.0,2.0,7.169658,3.558321
4,0,1,0,0,6.0,10.0,-2.031063,0.16519
5,1,0,1,0,8.0,,5.242303,0.0
6,1,1,0,1,8.0,10.0,0.144544,1.989806
7,1,0,1,0,10.0,10.0,1.874334,-2.369769
8,0,1,1,0,2.0,10.0,7.169658,-0.112516
9,0,1,0,0,6.0,8.0,-2.031063,2.598207


In [370]:
cols_to_numeric = [
    # --- Rank ---
    'winners_p1_rank', 'winners_p2_rank',
    'losers_p1_rank',  'losers_p2_rank',

    # --- Titles career ---
    'winners_p1_titles-career', 'winners_p2_titles-career',
    'losers_p1_titles-career',  'losers_p2_titles-career',

    # --- Titles YTD ---
    'winners_p1_titles-ytd', 'winners_p2_titles-ytd',
    'losers_p1_titles-ytd',  'losers_p2_titles-ytd',

    # --- Weight (kg) ---
    'winners_p1_weight-kg', 'winners_p2_weight-kg',
    'losers_p1_weight-kg',  'losers_p2_weight-kg',

    # --- Height (cm) ---
    'winners_p1_height-cm', 'winners_p2_height-cm',
    'losers_p1_height-cm',  'losers_p2_height-cm',

    # --- Win ratio (career) ---
    'winners_p1_win-ratio-career', 'winners_p2_win-ratio-career',
    'losers_p1_win-ratio-career',  'losers_p2_win-ratio-career',

    # --- Win ratio (YTD) ---
    'winners_p1_win-ratio-ytd', 'winners_p2_win-ratio-ytd',
    'losers_p1_win-ratio-ytd',  'losers_p2_win-ratio-ytd'
]


for col in cols_to_numeric:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')


## difference winners and losers variables 

In [371]:
# #--- Homophily difference ---
# df['homophily_diff'] = df['homophily_index_winners'] - df['homophily_index_losers']


# # --- W-L Career win ratio difference ---
# df['wl_career_diff'] = (
#     df['winners_p1_win-ratio-career'] + df['winners_p2_win-ratio-career']
#     - (df['losers_p1_win-ratio-career'] + df['losers_p2_win-ratio-career'])
# )


# # --- Titles career difference ---
# df['titles_career_diff'] = (
#     (df['winners_p1_titles-career'] + df['winners_p2_titles-career'])
#     - (df['losers_p1_titles-career'] + df['losers_p2_titles-career'])
# )


# # --- Rank difference ---
# df['rank_diff'] = (
#     (df['winners_p1_rank'] + df['winners_p2_rank'])
#     - (df['losers_p1_rank'] + df['losers_p2_rank'])
# )


# duplicate rows win == 1 and win == 0

In [372]:
team_rows = []

for i, row in df.iterrows():

    # --------------------------
    # WINNERS ROW
    # --------------------------
    team_rows.append({
        'match_id': row['match_id'],
        'team': 'winners',
        'won': 1,

        'homophily': row['homophily_index_winners'],

        # Z-differences
        'height_diff': row['height_diff_winners_z'],
        'weight_diff': row['weight_diff_winners_z'],
        'experience_diff': row['experience_diff_winners_z'],

        # Player experience
        'experience_p1': row['winners_p1_experience'],
        'experience_p2': row['winners_p2_experience'],

        # Rank
        'rank_p1': row['winners_p1_rank'],
        'rank_p2': row['winners_p2_rank'],
        'rank_diff': row['rank_diff_winners'],

        # W/L career ratio
        'wl_career_diff': row['wl_career_diff_winners_z'],  
        'wl_career_p1': row['winners_p1_win-ratio-career'],
        'wl_career_p2': row['winners_p2_win-ratio-career'],

        # W/L YTD ratio
        'wl_ytd_diff': row['wl_ytd_diff_winners_z'],
        'wl_ytd_p1': row['winners_p1_win-ratio-ytd'],
        'wl_ytd_p2': row['winners_p2_win-ratio-ytd'],

        # Titles (career)
        'titles_p1': row['winners_p1_titles-career'],
        'titles_p2': row['winners_p2_titles-career'],
        'titles_career_diff': row['titles_career_diff_winners_z'],

        # Homophily components
        'same_country': row['same_country_winners'],
        'same_hand': row['same_hand_winners'],
        'same_backhand': row['same_backhand_winners'],
        'same_coach': row['same_coach_winners'],

        # Match meta
        'year': row['year'],
        'tournament': row['tournament'],
        'stage': row['stage_code'],
        'surface': row['surface']
    })


    # --------------------------
    # LOSERS ROW
    # --------------------------
    team_rows.append({
        'match_id': row['match_id'],
        'team': 'losers',
        'won': 0,

        'homophily': row['homophily_index_losers'],

        # Z-differences
        'height_diff': row['height_diff_losers_z'],
        'weight_diff': row['weight_diff_losers_z'],
        'experience_diff': row['experience_diff_losers_z'],

        # Player experience
        'experience_p1': row['losers_p1_experience'],
        'experience_p2': row['losers_p2_experience'],

        # Rank
        'rank_p1': row['losers_p1_rank'],
        'rank_p2': row['losers_p2_rank'],
        'rank_diff': row['rank_diff_losers'],

        # W/L career ratio
        'wl_career_diff': row['wl_career_diff_losers_z'],   
        'wl_career_p1': row['losers_p1_win-ratio-career'],
        'wl_career_p2': row['losers_p2_win-ratio-career'],

        # W/L YTD ratio
        'wl_ytd_diff': row['wl_ytd_diff_losers_z'],
        'wl_ytd_p1': row['losers_p1_win-ratio-ytd'],
        'wl_ytd_p2': row['losers_p2_win-ratio-ytd'],

        # Titles
        'titles_p1': row['losers_p1_titles-career'],
        'titles_p2': row['losers_p2_titles-career'],
        'titles_career_diff': row['titles_career_diff_losers_z'],

        # Homophily components
        'same_country': row['same_country_losers'],
        'same_hand': row['same_hand_losers'],
        'same_backhand': row['same_backhand_losers'],
        'same_coach': row['same_coach_losers'],

        # Match meta
        'year': row['year'],
        'tournament': row['tournament'],
        'stage': row['stage_code'],
        'surface': row['surface']
    })

# Build the final team-level dataframe
team_df = pd.DataFrame(team_rows)


In [373]:
team_df.shape

(2496, 29)

In [374]:
# Team-level ability (media/somma)
team_df['rank_team'] = team_df['rank_p1'] + team_df['rank_p2']
team_df['experience_team'] = (team_df['experience_p1'] + team_df['experience_p2']) / 2
team_df['wl_career_team'] = team_df['wl_career_p1'] + team_df['wl_career_p2']
team_df['wl_ytd_team'] = team_df['wl_ytd_p1'] + team_df['wl_ytd_p2']
team_df['titles_team'] = team_df['titles_p1'] + team_df['titles_p2']
team_df['wl_ratio_team_career'] = team_df['wl_career_p1'] + team_df['wl_career_p2']
team_df['wl_ratio_team_ytd']    = team_df['wl_ytd_p1']    + team_df['wl_ytd_p2']


In [375]:
# Copia il dataset
dfc = team_df.copy()

# Variabili richieste dal modello
cols_needed = [
    'won',
    'rank_team', 'wl_career_team', 'wl_ytd_team', 'titles_team', 'experience_team',
    'height_diff', 'weight_diff', 'experience_diff',
    'wl_career_diff', 'wl_ytd_diff',
    'same_country', 'same_hand', 'same_coach'
]

# Rimuovi solo righe con NaN in queste colonne
dfc = dfc.dropna(subset=cols_needed)


In [376]:
dfc.shape

(888, 36)

# Logistic regressions

In [377]:

continuous_vars = [
    'rank_team', 'wl_career_team', 'wl_ytd_team', 'titles_team',
    'height_diff', 'weight_diff', 'experience_diff',
    'wl_career_diff', 'wl_ytd_diff'
]

scaler = StandardScaler()
dfc[continuous_vars] = scaler.fit_transform(dfc[continuous_vars])


In [378]:
formula = """
won ~
    rank_team + 
    height_diff + weight_diff + wl_career_team + 
    same_country + same_hand  + same_backhand +
      C(surface) 
"""


In [379]:

model_reg = smf.logit(formula, data=dfc).fit_regularized(
    method='l1',
    alpha=1.0   # puoi provare 0.1, 0.5, 2.0 come robustness
)

print(model_reg.summary())


Optimization terminated successfully    (Exit mode 0)
            Current function value: 0.6386209107916776
            Iterations: 55
            Function evaluations: 55
            Gradient evaluations: 55
                           Logit Regression Results                           
Dep. Variable:                    won   No. Observations:                  888
Model:                          Logit   Df Residuals:                      879
Method:                           MLE   Df Model:                            8
Date:                ven, 12 dic 2025   Pseudo R-squ.:                 0.06969
Time:                        13:19:50   Log-Likelihood:                -565.80
converged:                       True   LL-Null:                       -608.18
Covariance Type:            nonrobust   LLR p-value:                 5.353e-15
                          coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------

## cluster SE

In [380]:
# 1. Build a clean dataset for the model
vars_for_model = [
    'won', 'match_id',
    'rank_diff', 'height_diff', 'weight_diff', 'wl_ytd_diff', 'wl_career_diff',
    'same_country', 'same_hand', 'same_backhand',
    'surface', 'stage'
]

dfc_clean = dfc[vars_for_model].replace([np.inf, -np.inf], np.nan).dropna().copy()

print(dfc_clean.shape)        # quick check
print(dfc_clean.isna().sum()) # should all be 0


(888, 12)
won               0
match_id          0
rank_diff         0
height_diff       0
weight_diff       0
wl_ytd_diff       0
wl_career_diff    0
same_country      0
same_hand         0
same_backhand     0
surface           0
stage             0
dtype: int64


In [381]:
def map_stage(s):
    if s in [0, 1, 2, 3]:
        return 'early'
    elif s == 4:
        return 'middle'
    elif s in [5, 6]:
        return 'late'
    elif s == 7:
        return 'final'
    else:
        return None

dfc_clean['stage_simple'] = dfc_clean['stage'].apply(map_stage)


In [382]:
dfc_clean['stage_simple'].value_counts()


early     649
middle    120
late      100
final      19
Name: stage_simple, dtype: int64

In [383]:
formula = """
won ~ rank_diff + height_diff + weight_diff +
       wl_career_diff + same_country + same_hand + same_backhand +
       C(surface) + C(stage_simple)
"""


In [384]:
# 3. Build model
logit_mod = smf.logit(formula, data=dfc_clean)

# 4. Fit with CLUSTERED standard errors by match_id
res_cl = logit_mod.fit(
    maxiter=200,             # to be safe
    cov_type='cluster',
    cov_kwds={'groups': dfc_clean['match_id']}
)

print(res_cl.summary())

Optimization terminated successfully.
         Current function value: 0.659377
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:                    won   No. Observations:                  888
Model:                          Logit   Df Residuals:                      875
Method:                           MLE   Df Model:                           12
Date:                ven, 12 dic 2025   Pseudo R-squ.:                 0.03724
Time:                        13:19:50   Log-Likelihood:                -585.53
converged:                       True   LL-Null:                       -608.18
Covariance Type:              cluster   LLR p-value:                 9.150e-06
                                coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------
Intercept                     0.8247      0.212      3.889      0.000       0.

In [385]:
# =============================
# Save
# =============================
print(f"Saving to {OUTPUT_FILE} …")
with pd.ExcelWriter(OUTPUT_FILE, engine="xlsxwriter") as xlw:
    team_df.to_excel(xlw, index=False, sheet_name="players_list")

print("Done.")


Saving to C:/Users/aldi/Documents/GitHub/tennis-homophily/data/atp/win_lose_df.xlsx …
Done.


In [387]:
# =============================
# Save
# =============================
print(f"Saving to {CLEANED_FILE} …")
with pd.ExcelWriter(CLEANED_FILE, engine="xlsxwriter") as xlw:
    df.to_excel(xlw, index=False, sheet_name="players_list")

print("Done.")


Saving to C:/Users/aldi/Documents/GitHub/tennis-homophily/data/atp/men_matches_with_ranks_cleaned.xlsx …
Done.
