#### Imports & Constants

In [277]:
import pathlib
import pandas as pd

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

pd.set_option('display.max_columns', 110) # 110 is the number of columns in the dataset
pd.set_option('display.max_rows', 1000)

DATASET_PATH = pathlib.Path("..") / "data" / "raw" / "players.csv"
MAIN_COLUMNS = [7, 14, 15, 31, 32, 33, 35, 40, 41, 42, 43, 44, 45, 46]
DETAILED_COLUMNS =  list(range(47, 76))
USED_COLUMNS = MAIN_COLUMNS + DETAILED_COLUMNS

print(USED_COLUMNS)

[7, 14, 15, 31, 32, 33, 35, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75]


#### Load Data 

In [278]:
df = pd.read_csv(DATASET_PATH, usecols=USED_COLUMNS)

### Proccess positions

In [279]:
_POSITIONS_MAPPER = {
    "ST": "ST", "CF": "ST",
    "LW": "LW", "LF": "LW", "LM": "LW",
    "RW": "RW", "RM": "RW", "RF": "RW",
    "CM": "CM", "CAM": "CM", "CDM": "CM",
    "RB": "RB", "RWB": "RB",
    "LB": "LB", "LWB": "LB",
    "CB": "CB"
}


def _drop_GKs(df: pd.DataFrame) -> None:
    """
    Drops all goalkeepers from the dataset inplace
    """
    df.drop(df[df["player_positions"].str.contains("GK")].index, inplace=True)


def process_positions(df: pd.DataFrame) -> None:
    """
    Processes the player_positions column inplace
    Effect:
        - Drops all goalkeepers
        - Maps the positions to the main positions (first position in the string)
        - Move the column to the end of the dataframe
        - Renames the column to position to "label"
    """
    # drop goalkeepers
    _drop_GKs(df)
    # map positions
    df["player_positions"] = df["player_positions"].str.split(",").str[0].map(_POSITIONS_MAPPER)
    # move the column to the end
    df["position"] = df.pop("player_positions")

##### Process height & weight

In [280]:
def process_height(df: pd.DataFrame) -> None:
    """
    Processes the height column inplace
    Effect:
        - Normalizes the height_cm [0, 1]
    """
    df["height_cm"] = scaler.fit_transform(df[["height_cm"]])
    
def process_weight(df: pd.DataFrame) -> None:
    """
    Processes the weight column inplace
    Effect:
        - Normalizes the weight_kg [0, 1]
    """
    df["weight_kg"] = scaler.fit_transform(df[["weight_kg"]])

##### Process preferred foot, weak foot & skill moves

In [281]:
# TODO: one hot encode
def process_preferred_foot(df: pd.DataFrame) -> None:
    """
    Processes the preferred_foot column inplace
    Effect:
        - Maps the preferred_foot column to 0/1
        - 0 for left footed
        - 1 for right footed
    """
    df["preferred_foot"] = df["preferred_foot"].map({"Left": 0, "Right": 1})

def process_weak_foot(df: pd.DataFrame) -> None:
    """ 
    Processes the weak_foot column inplace
    Effect:
        - Normalizes the weak_foot column [0, 1]
    """
    df["weak_foot"] = scaler.fit_transform(df[["weak_foot"]])
    
def process_skill_moves(df: pd.DataFrame) -> None:
    """ 
    Processes the skill_moves column inplace
    Effect:
        - Normalizes the skill_moves column [0, 1]
    """
    df["skill_moves"] = scaler.fit_transform(df[["skill_moves"]])

##### Process pace, shooting, passing, dribbling, defending & physic

In [282]:
def process_pace(df: pd.DataFrame) -> None:
    """ 
    Processes the pace column inplace
    Effect:
        - Normalizes the pace column [0, 1]
    """
    df["pace"] = scaler.fit_transform(df[["pace"]])

def process_shooting(df: pd.DataFrame) -> None:
    """ 
    Processes the shooting column inplace
    Effect:
        - Normalizes the shooting column [0, 1]
    """
    df["shooting"] = scaler.fit_transform(df[["shooting"]])
    
def process_passing(df: pd.DataFrame) -> None:
    """ 
    Processes the passing column inplace
    Effect:
        - Normalizes the passing column [0, 1]
    """
    df["passing"] = scaler.fit_transform(df[["passing"]])
    
def process_dribbling(df: pd.DataFrame) -> None:
    """ 
    Processes the dribbling column inplace
    Effect:
        - Normalizes the dribbling column [0, 1]
    """
    df["dribbling"] = scaler.fit_transform(df[["dribbling"]])
    
def process_defending(df: pd.DataFrame) -> None:
    """ 
    Processes the defending column inplace
    Effect:
        - Normalizes the defending column [0, 1]
    """
    df["defending"] = scaler.fit_transform(df[["defending"]])
    
def process_physic(df: pd.DataFrame) -> None:
    """ 
    Processes the physic column inplace
    Effect:
        - Normalizes the physic column [0, 1]
    """
    df["physic"] = scaler.fit_transform(df[["physic"]])

#### Process Data

In [284]:
""" 
encode:
    nation_position with custom positions (target)
    work rate with custom work rate
    player traits with custom traits
remove:
    player_positions
    body type
    goalkeepers
    goalkeeping attributes
"""

process_positions(df)
process_height(df)
process_weight(df)
process_preferred_foot(df)
process_weak_foot(df)
process_skill_moves(df)
process_pace(df)
process_shooting(df)
process_passing(df)
process_dribbling(df)
process_defending(df)
process_physic(df)

df.head()

Unnamed: 0,height_cm,weight_kg,preferred_foot,weak_foot,skill_moves,work_rate,player_traits,pace,shooting,passing,dribbling,defending,physic,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_marking_awareness,defending_standing_tackle,defending_sliding_tackle,position
0,0.288462,0.295082,0,0.5,0.75,Medium/Low,"Finesse Shot, Speed Dribbler (AI), One Club Pl...",0.947368,0.9375,0.90411,1.0,0.168831,0.553846,84,94,71,89,85,96,89,90,76,96,96,90,94,94,95,80,73,77,60,88,48,22,92,90,76,,25,21,20,ST
1,0.596154,0.508197,1,0.75,1.0,High/Low,"Power Free-Kick, Flair, Long Shot Taker (AI), ...",0.947368,0.9875,0.835616,0.932432,0.233766,0.8,83,95,86,82,87,93,88,79,72,92,91,94,93,90,63,94,94,89,79,93,63,24,91,81,85,,22,31,23,LW
2,0.5,0.508197,0,0.25,0.75,High/Low,"Diver, Injury Prone, Avoids Using Weaker Foot,...",0.947368,0.9,0.863014,0.945946,0.233766,0.569231,80,85,50,86,86,93,85,83,76,90,93,93,93,89,91,86,61,78,65,90,47,39,89,84,80,,29,26,26,RW
3,0.788462,0.754098,1,0.75,0.75,Medium/Low,"Power Free-Kick, Leadership, Flair, Long Shot ...",0.723684,0.9625,0.835616,0.864865,0.25974,0.907692,76,91,76,84,92,88,80,80,76,90,74,77,86,85,41,93,72,78,93,88,84,20,86,83,91,,25,41,27,ST
5,0.307692,0.262295,1,0.75,0.75,High/Medium,"Finesse Shot, Playmaker (AI), Technical Dribbl...",0.710526,0.725,0.945205,0.932432,0.584416,0.553846,85,73,54,93,74,92,80,70,89,94,76,75,83,90,86,65,54,78,59,75,58,68,87,93,71,,57,57,56,CM


#### Save Data