#### Imports & Constants

In [None]:
import pathlib
import pandas as pd

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

pd.set_option('display.max_columns', 110) # 110 is the number of columns in the dataset
pd.set_option('display.max_rows', 1000)

DATASET_PATH = pathlib.Path("..") / "data" / "raw" / "players.csv"
MAIN_COLUMNS = [7, 14, 15, 31, 32, 33, 35, 40, 41, 42, 43, 44, 45, 46]
DETAILED_COLUMNS =  list(range(47, 76))
USED_COLUMNS = MAIN_COLUMNS + DETAILED_COLUMNS

print(USED_COLUMNS)

#### Load Data 

In [None]:
df = pd.read_csv(DATASET_PATH, usecols=USED_COLUMNS)

### Proccess positions

In [None]:
_POSITIONS_MAPPER = {
    "ST": "ST", "CF": "ST",
    "LW": "LW", "LF": "LW", "LM": "LW",
    "RW": "RW", "RM": "RW", "RF": "RW",
    "CM": "CM", "CAM": "CM", "CDM": "CM",
    "RB": "RB", "RWB": "RB",
    "LB": "LB", "LWB": "LB",
    "CB": "CB"
}


def _drop_GKs(df: pd.DataFrame) -> None:
    """
    Drops all goalkeepers from the dataset inplace
    """
    df.drop(df[df["player_positions"].str.contains("GK")].index, inplace=True)


def process_positions(df: pd.DataFrame) -> None:
    """
    Processes the player_positions column inplace
    Effect:
        - Drops all goalkeepers
        - Maps the positions to the main positions (first position in the string)
        - Move the column to the end of the dataframe
        - Renames the column to position to "label"
    """
    # drop goalkeepers
    _drop_GKs(df)
    # map positions
    df["player_positions"] = df["player_positions"].str.split(",").str[0].map(_POSITIONS_MAPPER)
    # move the column to the end
    df["position"] = df.pop("player_positions")

##### Process height & weight

In [None]:
def process_height(df: pd.DataFrame) -> None:
    """
    Processes the height column inplace
    Effect:
        - Normalizes the height_cm [0, 1]
    """
    df["height_cm"] = scaler.fit_transform(df[["height_cm"]])
    
def process_weight(df: pd.DataFrame) -> None:
    """
    Processes the weight column inplace
    Effect:
        - Normalizes the weight_kg [0, 1]
    """
    df["weight_kg"] = scaler.fit_transform(df[["weight_kg"]])

##### Process preferred foot, weak foot & skill moves

In [None]:
# TODO: one hot encode
def process_preferred_foot(df: pd.DataFrame) -> None:
    """
    Processes the preferred_foot column inplace
    Effect:
        - Maps the preferred_foot column to 0/1
        - 0 for left footed
        - 1 for right footed
    """
    df["preferred_foot"] = df["preferred_foot"].map({"Left": 0, "Right": 1})

def process_weak_foot(df: pd.DataFrame) -> None:
    """ 
    Processes the weak_foot column inplace
    Effect:
        - Normalizes the weak_foot column [0, 1]
    """
    df["weak_foot"] = scaler.fit_transform(df[["weak_foot"]])
    
def process_skill_moves(df: pd.DataFrame) -> None:
    """ 
    Processes the skill_moves column inplace
    Effect:
        - Normalizes the skill_moves column [0, 1]
    """
    df["skill_moves"] = scaler.fit_transform(df[["skill_moves"]])

##### Process pace, shooting, passing, dribbling, defending & physic

In [None]:
def process_pace(df: pd.DataFrame) -> None:
    """ 
    Processes the pace column inplace
    Effect:
        - Normalizes the pace column [0, 1]
    """
    df["pace"] = scaler.fit_transform(df[["pace"]])

def process_shooting(df: pd.DataFrame) -> None:
    """ 
    Processes the shooting column inplace
    Effect:
        - Normalizes the shooting column [0, 1]
    """
    df["shooting"] = scaler.fit_transform(df[["shooting"]])
    
def process_passing(df: pd.DataFrame) -> None:
    """ 
    Processes the passing column inplace
    Effect:
        - Normalizes the passing column [0, 1]
    """
    df["passing"] = scaler.fit_transform(df[["passing"]])
    
def process_dribbling(df: pd.DataFrame) -> None:
    """ 
    Processes the dribbling column inplace
    Effect:
        - Normalizes the dribbling column [0, 1]
    """
    df["dribbling"] = scaler.fit_transform(df[["dribbling"]])
    
def process_defending(df: pd.DataFrame) -> None:
    """ 
    Processes the defending column inplace
    Effect:
        - Normalizes the defending column [0, 1]
    """
    df["defending"] = scaler.fit_transform(df[["defending"]])
    
def process_physic(df: pd.DataFrame) -> None:
    """ 
    Processes the physic column inplace
    Effect:
        - Normalizes the physic column [0, 1]
    """
    df["physic"] = scaler.fit_transform(df[["physic"]])

#### Process Data

In [None]:
""" 
encode:
    nation_position with custom positions (target)
    work rate with custom work rate
    player traits with custom traits
remove:
    player_positions
    body type
    goalkeepers
    goalkeeping attributes
"""

process_positions(df)
process_height(df)
process_weight(df)
process_preferred_foot(df)
process_weak_foot(df)
process_skill_moves(df)
process_pace(df)
process_shooting(df)
process_passing(df)
process_dribbling(df)
process_defending(df)
process_physic(df)

df.head()

#### Save Data