ALL IMPORTS FOR THIS PROJECT

In [12]:
import numpy as np
import pandas as pd

Extracting defenders and their important information

In [13]:
import pandas as pd
df = pd.read_csv("Players-DataSet.csv")
defenders = df[df["Pos"].str.contains("DF", na=False)]
info_cols = [
    "Player", "Nation", "Pos", "Squad", "Comp", "Age",
    "MP", "Starts"
]
keywords = ["tkl", "int", "block", "clr", "aerial", "press", "challenge", "duel"]
skill_cols = [
    col for col in df.columns
    if any(k in col.lower() for k in keywords)
]


selected_cols = info_cols + skill_cols
defenders_filtered = defenders[selected_cols]
defenders_filtered.to_csv("defenders_filtered.csv", index=False)

print("Done! File saved as defenders_filtered.csv")


Done! File saved as defenders_filtered.csv


Cleaning Midfielders

In [14]:

def clean_midfielder_data(csv_path="Players-DataSet.csv"):
    df = pd.read_csv(csv_path)
    mf_df = df[df['Pos'].str.contains('MF', na=False)].copy()
    mf_df = mf_df[mf_df['90s'] >= 5.0].copy()
    identifier_cols = [
        'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'MP', 'Starts', 'Min', '90s'
    ]
    passing_cols = [
        'Ast',      # Assists
        'xAG',      # Expected Assisted Goals
        'KP',       # Key Passes
        'PrgP',     # Progressive Passes
        'Cmp',      # Passes Completed
        'Att',      # Passes Attempted
        'Cmp%',     # Pass Completion %
        'TB',       # Through Balls
        'Crs',      # Crossesx
        'CK',       # Corner Kicks
        '1/3',      # passes into the final third
        'PPA',      # passes into pen area
    ]
    possession_cols = [
        'Touches',  # Total Touches
        'PrgC',     # Progressive Carries
        'Succ',     # Successful Dribbles
        'Att_stats_possession', # Dribbles Attempted
        'PrgR',     # Progressive Passes Received
        'SCA90',    # Shot Creating actions 
        'GCA90'     # Goal Creating actions
    ]
    shooting_cols = [
        'Gls',      # Goals
        'xG',       # Expected Goals
        'Sh',       # Shots 
        'SoT',      # Shots on Target 
        'G/Sh',     # Goals per Shot 
        'G/SoT',    # Goals per Shot on Target 
    ]
    defensive_cols = [
        'Tkl',      # Tackles
        'TklW',     # Tackles Won
        'Int',      # Interceptions
        'Blocks_stats_defense', # Blocks (defensive)
        'Clr',      # Clearances
        'Err'       # Errors
    ]
    all_relevant_cols = identifier_cols + passing_cols + possession_cols + shooting_cols + defensive_cols   
    mf_df = mf_df[all_relevant_cols].copy()
    numeric_stat_cols = [
        'Ast', 'xAG', 'KP', 'PrgP', 'Cmp', 'Att', 'TB', 'Crs', 'CK', '1/3', 'PPA',
        'Touches', 'PrgC', 'Succ', 'Att_stats_possession', 'PrgR', 'SCA90', 'GCA90',
        'Gls', 'xG', 'Sh', 'SoT', 'G/Sh', 'G/SoT',
        'Tkl', 'TklW', 'Int', 'Blocks_stats_defense', 'Clr', 'Err'
    ]
    
    for col in numeric_stat_cols:
        if col in mf_df.columns:
            mf_df[col] = mf_df[col].fillna(0)
    cols_to_normalize = [
        'Ast', 'xAG', 'KP', 'PrgP', 'Cmp', 'Att', 'TB', 'Crs', 'CK', '1/3', 'PPA',
        'Touches', 'PrgC', 'Succ', 'Att_stats_possession', 'PrgR',
        'Gls', 'xG', 'Sh', 'SoT',
        'Tkl', 'TklW', 'Int', 'Blocks_stats_defense', 'Clr', 'Err'
    ]
    for col in cols_to_normalize:
        if col in mf_df.columns:
            mf_df[f'{col}_p90'] = (mf_df[col] / mf_df['90s']).round(3)
    mf_df.replace([np.inf, -np.inf], 0, inplace=True)

    print("Midfielder data processing complete.")
    print(f"Original DataFrame size: {df.shape}")
    print(f"New Midfielder DataFrame size: {mf_df.shape}")
    
    return mf_df

midfielders_df = clean_midfielder_data()
print("\n--- Midfielder DataFrame Info ---")
midfielders_df.describe()
midfielders_df.to_csv("Processed-Midfielders.csv", index=False)
print("\nSaved processed data to 'Processed-Midfielders.csv'")

Midfielder data processing complete.
Original DataFrame size: (2854, 267)
New Midfielder DataFrame size: (908, 67)

--- Midfielder DataFrame Info ---

Saved processed data to 'Processed-Midfielders.csv'
