In [3]:
import pandas as pd

# 1. Load Data
# Replace with your actual filenames
injury_df = pd.read_csv('../data/injury_risk_v1.csv') 
master_df = pd.read_csv('../data/Master2.csv')

# 2. Clean Column Names (Remove hidden spaces)
master_df.columns = master_df.columns.str.strip()

# 3. Create 'nameFull' if it doesn't exist
if 'nameFull' not in master_df.columns:
    print("Generating 'nameFull' from First and Last names...")
    # Fill NaNs with empty strings to avoid concatenation errors
    master_df['nameFirst'] = master_df['nameFirst'].fillna('')
    master_df['nameLast'] = master_df['nameLast'].fillna('')
    master_df['nameFull'] = (master_df['nameFirst'] + " " + master_df['nameLast']).str.strip()

# 4. Prepare the Master subset
# We need playerID + Name + 3 Birth Columns
cols_to_merge = ['playerID', 'nameFull', 'birthMonth', 'birthDay', 'birthYear']

# Check if birth columns exist before trying to merge
missing_cols = [c for c in cols_to_merge if c not in master_df.columns]
if missing_cols:
    print(f"❌ Warning: Missing columns in Master CSV: {missing_cols}")
    # Remove missing columns from the merge list to prevent crash
    cols_to_merge = [c for c in cols_to_merge if c not in missing_cols]

subset = master_df[cols_to_merge]

# 5. Merge
# 'how="left"' keeps all injury rows, adding info where matches are found
merged_df = injury_df.merge(subset, on='playerID', how='left')

# 6. Clean Up NaNs (Optional but recommended)
# Fill missing dates with 0 so they save as integers (e.g. 1990 instead of 1990.0)
date_cols = ['birthMonth', 'birthDay', 'birthYear']
for col in date_cols:
    if col in merged_df.columns:
        merged_df[col] = merged_df[col].fillna(0).astype(int)

# Fill missing names with "Unknown"
if 'nameFull' in merged_df.columns:
    merged_df['nameFull'] = merged_df['nameFull'].fillna('Unknown')

# 7. Save
merged_df.to_csv('../data/injury_risk.csv', index=False)

print(f"✅ Success! Enriched {len(merged_df)} rows.")
print(merged_df[['playerID', 'nameFull', 'birthYear']].head())

✅ Success! Enriched 22468 rows.
    playerID       nameFull  birthYear
0  aardsda01  David Aardsma       1981
1  aardsda01  David Aardsma       1981
2  aardsda01  David Aardsma       1981
3  aardsda01  David Aardsma       1981
4  aardsda01  David Aardsma       1981
