
# Inference

In [452]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, RobustScaler
import lightgbm as lgb
import numpy as np
import joblib

In [453]:
model_path = './models/rating_model.pkl'
scaler_path = "./models/data_scaler.pkl"

# funzioni per il caricamento di model e scaler
def load_model(model_path, scaler_path):
    model = joblib.load(model_path)
    scaler = joblib.load(scaler_path)
    return model, scaler

def predict(model, data):
    return model.predict(data)

In [454]:
file_path = './data/fixture_player_stats.json'
df = pd.read_json(file_path)

df.head()

Unnamed: 0,away_team_id,captain,cards_red,cards_yellow,dribbles_attempts,dribbles_past,dribbles_success,duels_total,duels_won,fixture_id,fouls_committed,fouls_drawn,games_minutes,games_substitute,goals_assists,goals_away,goals_conceded,goals_home,goals_saves,goals_total,home_team_id,league_round,offsides,passes_accuracy,passes_key,passes_total,penalty_committed,penalty_missed,penalty_saved,penalty_scored,penalty_won,player_id,position,rating,result_status,shots_on,shots_total,tackles_blocks,tackles_interceptions,tackles_total,team_id
0,3,False,0,0,,,,,,492,,,,True,,3,0,3,,,4,12,,,,,,0,,0,,8261,G,,D,,,,,,4
1,3,False,0,0,1.0,,,8.0,5.0,492,,,90.0,False,1.0,3,0,3,,,4,12,,49.0,1.0,56.0,,0,,0,,8273,D,6.9,D,1.0,2.0,,1.0,1.0,4
2,3,False,0,0,,,,,,492,,,,True,,3,0,3,,,4,12,,,,,,0,,0,,8528,D,,D,,,,,,4
3,3,False,0,0,2.0,,2.0,7.0,5.0,492,,,79.0,False,0.0,3,0,3,,1.0,4,12,,17.0,1.0,19.0,,0,,0,,8662,M,7.3,D,2.0,2.0,,1.0,2.0,4
4,3,False,0,0,3.0,,3.0,6.0,5.0,492,,1.0,65.0,False,0.0,3,0,3,,,4,12,1.0,19.0,2.0,20.0,,0,,0,,8693,M,7.2,D,,1.0,,,1.0,3


In [455]:
print(df.columns)

Index(['away_team_id', 'captain', 'cards_red', 'cards_yellow',
       'dribbles_attempts', 'dribbles_past', 'dribbles_success', 'duels_total',
       'duels_won', 'fixture_id', 'fouls_committed', 'fouls_drawn',
       'games_minutes', 'games_substitute', 'goals_assists', 'goals_away',
       'goals_conceded', 'goals_home', 'goals_saves', 'goals_total',
       'home_team_id', 'league_round', 'offsides', 'passes_accuracy',
       'passes_key', 'passes_total', 'penalty_committed', 'penalty_missed',
       'penalty_saved', 'penalty_scored', 'penalty_won', 'player_id',
       'position', 'rating', 'result_status', 'shots_on', 'shots_total',
       'tackles_blocks', 'tackles_interceptions', 'tackles_total', 'team_id'],
      dtype='object')


In [456]:
len(df)

459

## Data Preparation

In [457]:
#fatto prima per il post processing
df_prep = df.fillna(0)
df_prep.head()

Unnamed: 0,away_team_id,captain,cards_red,cards_yellow,dribbles_attempts,dribbles_past,dribbles_success,duels_total,duels_won,fixture_id,fouls_committed,fouls_drawn,games_minutes,games_substitute,goals_assists,goals_away,goals_conceded,goals_home,goals_saves,goals_total,home_team_id,league_round,offsides,passes_accuracy,passes_key,passes_total,penalty_committed,penalty_missed,penalty_saved,penalty_scored,penalty_won,player_id,position,rating,result_status,shots_on,shots_total,tackles_blocks,tackles_interceptions,tackles_total,team_id
0,3,False,0,0,0.0,0.0,0.0,0.0,0.0,492,0.0,0.0,0.0,True,0.0,3,0,3,0.0,0.0,4,12,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,8261,G,0.0,D,0.0,0.0,0.0,0.0,0.0,4
1,3,False,0,0,1.0,0.0,0.0,8.0,5.0,492,0.0,0.0,90.0,False,1.0,3,0,3,0.0,0.0,4,12,0.0,49.0,1.0,56.0,0.0,0,0.0,0,0.0,8273,D,6.9,D,1.0,2.0,0.0,1.0,1.0,4
2,3,False,0,0,0.0,0.0,0.0,0.0,0.0,492,0.0,0.0,0.0,True,0.0,3,0,3,0.0,0.0,4,12,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,8528,D,0.0,D,0.0,0.0,0.0,0.0,0.0,4
3,3,False,0,0,2.0,0.0,2.0,7.0,5.0,492,0.0,0.0,79.0,False,0.0,3,0,3,0.0,1.0,4,12,0.0,17.0,1.0,19.0,0.0,0,0.0,0,0.0,8662,M,7.3,D,2.0,2.0,0.0,1.0,2.0,4
4,3,False,0,0,3.0,0.0,3.0,6.0,5.0,492,0.0,1.0,65.0,False,0.0,3,0,3,0.0,0.0,4,12,1.0,19.0,2.0,20.0,0.0,0,0.0,0,0.0,8693,M,7.2,D,0.0,1.0,0.0,0.0,1.0,3


In [458]:
# Filtro righe con games_minutes > 0
df_filtered_rows = df_prep[df_prep['games_minutes'] > 0]
df_filtered_rows.head()

Unnamed: 0,away_team_id,captain,cards_red,cards_yellow,dribbles_attempts,dribbles_past,dribbles_success,duels_total,duels_won,fixture_id,fouls_committed,fouls_drawn,games_minutes,games_substitute,goals_assists,goals_away,goals_conceded,goals_home,goals_saves,goals_total,home_team_id,league_round,offsides,passes_accuracy,passes_key,passes_total,penalty_committed,penalty_missed,penalty_saved,penalty_scored,penalty_won,player_id,position,rating,result_status,shots_on,shots_total,tackles_blocks,tackles_interceptions,tackles_total,team_id
1,3,False,0,0,1.0,0.0,0.0,8.0,5.0,492,0.0,0.0,90.0,False,1.0,3,0,3,0.0,0.0,4,12,0.0,49.0,1.0,56.0,0.0,0,0.0,0,0.0,8273,D,6.9,D,1.0,2.0,0.0,1.0,1.0,4
3,3,False,0,0,2.0,0.0,2.0,7.0,5.0,492,0.0,0.0,79.0,False,0.0,3,0,3,0.0,1.0,4,12,0.0,17.0,1.0,19.0,0.0,0,0.0,0,0.0,8662,M,7.3,D,2.0,2.0,0.0,1.0,2.0,4
4,3,False,0,0,3.0,0.0,3.0,6.0,5.0,492,0.0,1.0,65.0,False,0.0,3,0,3,0.0,0.0,4,12,1.0,19.0,2.0,20.0,0.0,0,0.0,0,0.0,8693,M,7.2,D,0.0,1.0,0.0,0.0,1.0,3
5,3,False,0,0,2.0,1.0,2.0,5.0,3.0,492,0.0,0.0,82.0,False,0.0,3,0,3,0.0,2.0,4,12,0.0,24.0,1.0,31.0,0.0,0,0.0,0,0.0,8696,M,8.3,D,2.0,3.0,0.0,0.0,1.0,3
6,3,False,0,0,0.0,0.0,0.0,0.0,0.0,492,0.0,0.0,8.0,True,0.0,3,0,3,0.0,0.0,4,12,0.0,4.0,0.0,5.0,0.0,0,0.0,0,0.0,8699,F,6.9,D,1.0,1.0,0.0,0.0,0.0,3


In [459]:
print(len(df_filtered_rows))

312


In [460]:
# Filtro colonne
columns_to_keep = [
    'rating', 'position', 'result_status', 'games_minutes',
    'games_substitute', 'offsides', 'shots_total', 'shots_on',
    'goals_total', 'goals_conceded', 'goals_assists', 'goals_saves',
    'passes_total', 'passes_key', 'passes_accuracy', 'tackles_total',
    'tackles_blocks', 'tackles_interceptions', 'duels_total', 'duels_won',
    'dribbles_attempts', 'dribbles_success', 'dribbles_past', 'fouls_drawn',
    'fouls_committed', 'cards_yellow', 'cards_red', 'penalty_won',
    'penalty_committed', 'penalty_scored', 'penalty_missed',
    'penalty_saved'
]

# Filtrare il DataFrame mantenendo solo le colonne specificate
df_filtered = df_filtered_rows[columns_to_keep]

df_filtered.head()

Unnamed: 0,rating,position,result_status,games_minutes,games_substitute,offsides,shots_total,shots_on,goals_total,goals_conceded,goals_assists,goals_saves,passes_total,passes_key,passes_accuracy,tackles_total,tackles_blocks,tackles_interceptions,duels_total,duels_won,dribbles_attempts,dribbles_success,dribbles_past,fouls_drawn,fouls_committed,cards_yellow,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved
1,6.9,D,D,90.0,False,0.0,2.0,1.0,0.0,0,1.0,0.0,56.0,1.0,49.0,1.0,0.0,1.0,8.0,5.0,1.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0
3,7.3,M,D,79.0,False,0.0,2.0,2.0,1.0,0,0.0,0.0,19.0,1.0,17.0,2.0,0.0,1.0,7.0,5.0,2.0,2.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0
4,7.2,M,D,65.0,False,1.0,1.0,0.0,0.0,0,0.0,0.0,20.0,2.0,19.0,1.0,0.0,0.0,6.0,5.0,3.0,3.0,0.0,1.0,0.0,0,0,0.0,0.0,0,0,0.0
5,8.3,M,D,82.0,False,0.0,3.0,2.0,2.0,0,0.0,0.0,31.0,1.0,24.0,1.0,0.0,0.0,5.0,3.0,2.0,2.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0
6,6.9,F,D,8.0,True,0.0,1.0,1.0,0.0,0,0.0,0.0,5.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0


In [461]:
# Mappatura della colonna 'position'
position_mapping = {
    "G": "Goalkeeper",
    "D": "Defender",
    "M": "Midfielder",
    "F": "Attacker"
}

# Applicare la mappatura alla colonna 'position'
df_filtered.loc[:, 'position'] = df_filtered['position'].map(position_mapping)
df_filtered.head()

Unnamed: 0,rating,position,result_status,games_minutes,games_substitute,offsides,shots_total,shots_on,goals_total,goals_conceded,goals_assists,goals_saves,passes_total,passes_key,passes_accuracy,tackles_total,tackles_blocks,tackles_interceptions,duels_total,duels_won,dribbles_attempts,dribbles_success,dribbles_past,fouls_drawn,fouls_committed,cards_yellow,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved
1,6.9,Defender,D,90.0,False,0.0,2.0,1.0,0.0,0,1.0,0.0,56.0,1.0,49.0,1.0,0.0,1.0,8.0,5.0,1.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0
3,7.3,Midfielder,D,79.0,False,0.0,2.0,2.0,1.0,0,0.0,0.0,19.0,1.0,17.0,2.0,0.0,1.0,7.0,5.0,2.0,2.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0
4,7.2,Midfielder,D,65.0,False,1.0,1.0,0.0,0.0,0,0.0,0.0,20.0,2.0,19.0,1.0,0.0,0.0,6.0,5.0,3.0,3.0,0.0,1.0,0.0,0,0,0.0,0.0,0,0,0.0
5,8.3,Midfielder,D,82.0,False,0.0,3.0,2.0,2.0,0,0.0,0.0,31.0,1.0,24.0,1.0,0.0,0.0,5.0,3.0,2.0,2.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0
6,6.9,Attacker,D,8.0,True,0.0,1.0,1.0,0.0,0,0.0,0.0,5.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0


In [462]:
df_filtered['position'].unique()

array(['Defender', 'Midfielder', 'Attacker', 'Goalkeeper'], dtype=object)

### Feature engineering

In [463]:
df_filtered = df_filtered.copy()

# # colonne positive e negative
positive_cols = ['goals_total', 'goals_assists', 'penalty_scored', 'penalty_saved', 'penalty_won']
negative_cols = ['penalty_missed', 'goals_conceded', 'penalty_committed', 'cards_red', 'cards_yellow']

# Pesi per le colonne positive e negative
positive_weights = {
    'goals_total': 3,        # Gol +3
    'goals_assists': 1,      # Assist +1
    'penalty_scored': 3,     # Rigore segnato +3
    'penalty_saved': 3,      # Rigore parato +3
    'penalty_won': 1         # Rigore guadagnato +1
}

negative_weights = {
    'penalty_missed': -3,    # Rigore sbagliato -3
    'goals_conceded': -1,    # Gol subito -1
    'penalty_committed': -1, # Rigore causato -1
    'cards_red': -1,         # Espulsione -1
    'cards_yellow': -0.5     # Ammonizione -0.5
}

# Calcola le contribuzioni positive moltiplicando per i pesi e sommando
df_filtered['positive_contributions'] = sum(df_filtered[col] * positive_weights[col] for col in positive_cols)

# Calcola le contribuzioni negative moltiplicando per i pesi e sommando
df_filtered['negative_contributions'] = sum(df_filtered[col] * negative_weights[col] for col in negative_cols)



df_filtered.head()


Unnamed: 0,rating,position,result_status,games_minutes,games_substitute,offsides,shots_total,shots_on,goals_total,goals_conceded,goals_assists,goals_saves,passes_total,passes_key,passes_accuracy,tackles_total,tackles_blocks,tackles_interceptions,duels_total,duels_won,dribbles_attempts,dribbles_success,dribbles_past,fouls_drawn,fouls_committed,cards_yellow,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved,positive_contributions,negative_contributions
1,6.9,Defender,D,90.0,False,0.0,2.0,1.0,0.0,0,1.0,0.0,56.0,1.0,49.0,1.0,0.0,1.0,8.0,5.0,1.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,1.0,0.0
3,7.3,Midfielder,D,79.0,False,0.0,2.0,2.0,1.0,0,0.0,0.0,19.0,1.0,17.0,2.0,0.0,1.0,7.0,5.0,2.0,2.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,3.0,0.0
4,7.2,Midfielder,D,65.0,False,1.0,1.0,0.0,0.0,0,0.0,0.0,20.0,2.0,19.0,1.0,0.0,0.0,6.0,5.0,3.0,3.0,0.0,1.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0
5,8.3,Midfielder,D,82.0,False,0.0,3.0,2.0,2.0,0,0.0,0.0,31.0,1.0,24.0,1.0,0.0,0.0,5.0,3.0,2.0,2.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,6.0,0.0
6,6.9,Attacker,D,8.0,True,0.0,1.0,1.0,0.0,0,0.0,0.0,5.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0


In [464]:
# 'games_minutes' step
# Definisci gli intervalli e le etichette
bins = [0, 16, 45, 58, float('inf')]  # Inf significa tutti i minuti superiori a 45
labels = [0, 1, 2, 4]  # Le categorie corrispondenti ai vari step

# Creare la nuova colonna con i livelli
df_filtered['games_minutes_step'] = pd.cut(df_filtered['games_minutes'], bins=bins, labels=labels, right=False)

# Visualizza il risultato
print(df_filtered[['games_minutes', 'games_minutes_step']].head(10))
df_filtered.head()

    games_minutes games_minutes_step
1            90.0                  4
3            79.0                  4
4            65.0                  4
5            82.0                  4
6             8.0                  0
7             9.0                  0
9            66.0                  4
10           81.0                  4
12           25.0                  1
13            9.0                  0


Unnamed: 0,rating,position,result_status,games_minutes,games_substitute,offsides,shots_total,shots_on,goals_total,goals_conceded,goals_assists,goals_saves,passes_total,passes_key,passes_accuracy,tackles_total,tackles_blocks,tackles_interceptions,duels_total,duels_won,dribbles_attempts,dribbles_success,dribbles_past,fouls_drawn,fouls_committed,cards_yellow,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved,positive_contributions,negative_contributions,games_minutes_step
1,6.9,Defender,D,90.0,False,0.0,2.0,1.0,0.0,0,1.0,0.0,56.0,1.0,49.0,1.0,0.0,1.0,8.0,5.0,1.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,1.0,0.0,4
3,7.3,Midfielder,D,79.0,False,0.0,2.0,2.0,1.0,0,0.0,0.0,19.0,1.0,17.0,2.0,0.0,1.0,7.0,5.0,2.0,2.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,3.0,0.0,4
4,7.2,Midfielder,D,65.0,False,1.0,1.0,0.0,0.0,0,0.0,0.0,20.0,2.0,19.0,1.0,0.0,0.0,6.0,5.0,3.0,3.0,0.0,1.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,4
5,8.3,Midfielder,D,82.0,False,0.0,3.0,2.0,2.0,0,0.0,0.0,31.0,1.0,24.0,1.0,0.0,0.0,5.0,3.0,2.0,2.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,6.0,0.0,4
6,6.9,Attacker,D,8.0,True,0.0,1.0,1.0,0.0,0,0.0,0.0,5.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0


### Encoding

In [465]:
# Carica il modello e lo scaler
model, scaler = load_model(model_path, scaler_path)

In [466]:
# Encoding
df_encoded = pd.get_dummies(df_filtered, columns=['position'], drop_first=False)
df_encoded['result_status_encoded'] = OrdinalEncoder(categories=[['L', 'D', 'W']]).fit_transform(df_encoded[['result_status']])
df_encoded['games_substitute_encoded'] = LabelEncoder().fit_transform(df_encoded['games_substitute'])

df_encoded['games_substitute'] = df_encoded['games_substitute_encoded'].astype(int)
df_encoded['result_status'] = df_encoded['result_status_encoded'].astype(int)
df_encoded['games_minutes'] = df_encoded['games_minutes_step'].astype(int)

df_encoded = df_encoded.drop('games_substitute_encoded', axis=1)
df_encoded = df_encoded.drop('result_status_encoded', axis=1)
df_encoded = df_encoded.drop('games_minutes_step', axis=1)


# Normalizzazione
df_scaled = scaler.transform(df_encoded)
df_preprocessed = pd.DataFrame(df_scaled, columns=df_encoded.columns)

df_preprocessed.head(100)

Unnamed: 0,rating,result_status,games_minutes,games_substitute,offsides,shots_total,shots_on,goals_total,goals_conceded,goals_assists,goals_saves,passes_total,passes_key,passes_accuracy,tackles_total,tackles_blocks,tackles_interceptions,duels_total,duels_won,dribbles_attempts,dribbles_success,dribbles_past,fouls_drawn,fouls_committed,cards_yellow,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved,positive_contributions,negative_contributions,position_Attacker,position_Defender,position_Goalkeeper,position_Midfielder
0,0.000000,0.0,0.000000,0.0,0.0,2.0,1.0,0.0,0.0,1.0,0.0,1.178571,1.0,1.278351,0.0,0.0,1.0,0.333333,0.50,1.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
1,0.666667,0.0,0.000000,0.0,0.0,2.0,2.0,1.0,0.0,0.0,0.0,-0.142857,1.0,-0.041237,0.5,0.0,1.0,0.166667,0.50,2.0,2.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0
2,0.500000,0.0,0.000000,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,-0.107143,2.0,0.041237,0.0,0.0,0.0,0.000000,0.50,3.0,3.0,0.0,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2.333333,0.0,0.000000,0.0,0.0,3.0,2.0,2.0,0.0,0.0,0.0,0.285714,1.0,0.247423,0.0,0.0,0.0,-0.166667,0.00,2.0,2.0,1.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,1.0
4,0.000000,0.0,-1.333333,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,-0.642857,0.0,-0.577320,-0.5,0.0,0.0,-1.000000,-0.75,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,-1.166667,0.5,-1.000000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.750000,0.0,-0.659794,-0.5,0.0,0.0,-0.666667,-0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
96,0.000000,-0.5,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.928571,3.0,0.783505,0.0,3.0,0.0,0.666667,0.25,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
97,-1.666667,-0.5,0.000000,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,0.607143,0.0,0.371134,-0.5,0.0,0.0,-1.000000,-0.75,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-3.0,0.0,0.0,1.0,0.0
98,0.500000,-0.5,0.000000,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.071429,0.0,1.278351,0.0,3.0,5.0,0.666667,0.75,0.0,0.0,0.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [467]:
df_preprocessed.columns

Index(['rating', 'result_status', 'games_minutes', 'games_substitute',
       'offsides', 'shots_total', 'shots_on', 'goals_total', 'goals_conceded',
       'goals_assists', 'goals_saves', 'passes_total', 'passes_key',
       'passes_accuracy', 'tackles_total', 'tackles_blocks',
       'tackles_interceptions', 'duels_total', 'duels_won',
       'dribbles_attempts', 'dribbles_success', 'dribbles_past', 'fouls_drawn',
       'fouls_committed', 'cards_yellow', 'cards_red', 'penalty_won',
       'penalty_committed', 'penalty_scored', 'penalty_missed',
       'penalty_saved', 'positive_contributions', 'negative_contributions',
       'position_Attacker', 'position_Defender', 'position_Goalkeeper',
       'position_Midfielder'],
      dtype='object')

## Inference


In [468]:
# Inferenza
predictions = predict(model, df_preprocessed)

df_filtered['fbrating'] = predictions
df_filtered



Unnamed: 0,rating,position,result_status,games_minutes,games_substitute,offsides,shots_total,shots_on,goals_total,goals_conceded,goals_assists,goals_saves,passes_total,passes_key,passes_accuracy,tackles_total,tackles_blocks,tackles_interceptions,duels_total,duels_won,dribbles_attempts,dribbles_success,dribbles_past,fouls_drawn,fouls_committed,cards_yellow,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved,positive_contributions,negative_contributions,games_minutes_step,fbrating
1,6.9,Defender,D,90.0,False,0.0,2.0,1.0,0.0,0,1.0,0.0,56.0,1.0,49.0,1.0,0.0,1.0,8.0,5.0,1.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,1.0,0.0,4,6.387718
3,7.3,Midfielder,D,79.0,False,0.0,2.0,2.0,1.0,0,0.0,0.0,19.0,1.0,17.0,2.0,0.0,1.0,7.0,5.0,2.0,2.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,3.0,0.0,4,6.988884
4,7.2,Midfielder,D,65.0,False,1.0,1.0,0.0,0.0,0,0.0,0.0,20.0,2.0,19.0,1.0,0.0,0.0,6.0,5.0,3.0,3.0,0.0,1.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,4,6.097414
5,8.3,Midfielder,D,82.0,False,0.0,3.0,2.0,2.0,0,0.0,0.0,31.0,1.0,24.0,1.0,0.0,0.0,5.0,3.0,2.0,2.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,6.0,0.0,4,7.444759
6,6.9,Attacker,D,8.0,True,0.0,1.0,1.0,0.0,0,0.0,0.0,5.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0,5.805921
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
449,6.9,Midfielder,L,45.0,False,1.0,0.0,0.0,0.0,0,0.0,0.0,14.0,0.0,9.0,2.0,0.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,2,5.593724
454,6.9,Defender,L,90.0,False,0.0,0.0,0.0,0.0,0,0.0,0.0,99.0,0.0,93.0,1.0,1.0,1.0,6.0,3.0,1.0,0.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,4,5.460370
455,6.7,Defender,W,65.0,False,0.0,0.0,0.0,0.0,0,0.0,0.0,22.0,0.0,18.0,0.0,0.0,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,4,5.907806
457,6.9,Midfielder,W,65.0,False,0.0,0.0,0.0,0.0,0,0.0,0.0,27.0,0.0,24.0,1.0,1.0,1.0,8.0,4.0,2.0,1.0,0.0,2.0,1.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,4,6.390205


## Post processing

In [None]:
df = df_filtered.copy()


significant_columns = ['goals_total', 'goals_assists', 'penalty_scored', 'penalty_saved', 
                       'penalty_missed', 'penalty_committed', 'penalty_won', 'cards_red', 'cards_yellow']

non_significant_columns = [col for col in df.columns if col not in significant_columns + ['position', 'rating', 'result_status', 'games_substitute', 'games_minutes', 'fbrating']]

# Mappa per sottrarre il valore basato sulla posizione
position_penalties = {
    'F': 1.05,
    'D': 0.83,
    'G': 0.28,
    'M': 0.96
}


# Logica per gestire gli outliers
df['fbrating'] = np.where(
    (df['games_minutes'] < 22) & (df['fbrating'] < 6),
    # Se games_minutes < 22 e fbrating < 6
    np.where(
        (df[significant_columns] > 0).any(axis=1) | (df[non_significant_columns].sum(axis=1) >= 20),  
        # Se almeno una colonna significativa è > 0 o la somma delle non significative è >= 20
        df['rating'] - df['position'].map(position_penalties),  # Sottrai il valore in base alla posizione
        0  # Altrimenti assegna 0
    ),
    df['fbrating']  # Mantieni il valore attuale se la condizione principale non si applica
)

# Arrotondamento al mezzo punto più vicino per i valori predetti
df['fbrating'] = np.where(
    df['fbrating'] == 0,  # Condizione: valore è 0
    None,                 # Assegna None se 0
    np.round(df['fbrating'] * 2) / 2  # Arrotonda altrimenti
)

pd.set_option('display.max_columns', None)  # Mostra tutte le colonne

# Stampa del dataset ricostruito
df.head(10)

Unnamed: 0,rating,position,result_status,games_minutes,games_substitute,offsides,shots_total,shots_on,goals_total,goals_conceded,goals_assists,goals_saves,passes_total,passes_key,passes_accuracy,tackles_total,tackles_blocks,tackles_interceptions,duels_total,duels_won,dribbles_attempts,dribbles_success,dribbles_past,fouls_drawn,fouls_committed,cards_yellow,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved,positive_contributions,negative_contributions,games_minutes_step,fbrating
1,6.9,Defender,D,90.0,False,0.0,2.0,1.0,0.0,0,1.0,0.0,56.0,1.0,49.0,1.0,0.0,1.0,8.0,5.0,1.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,1.0,0.0,4,6.5
3,7.3,Midfielder,D,79.0,False,0.0,2.0,2.0,1.0,0,0.0,0.0,19.0,1.0,17.0,2.0,0.0,1.0,7.0,5.0,2.0,2.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,3.0,0.0,4,7.0
4,7.2,Midfielder,D,65.0,False,1.0,1.0,0.0,0.0,0,0.0,0.0,20.0,2.0,19.0,1.0,0.0,0.0,6.0,5.0,3.0,3.0,0.0,1.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,4,6.0
5,8.3,Midfielder,D,82.0,False,0.0,3.0,2.0,2.0,0,0.0,0.0,31.0,1.0,24.0,1.0,0.0,0.0,5.0,3.0,2.0,2.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,6.0,0.0,4,7.5
6,6.9,Attacker,D,8.0,True,0.0,1.0,1.0,0.0,0,0.0,0.0,5.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0,
7,6.6,Midfielder,D,9.0,True,0.0,0.0,0.0,0.0,0,0.0,0.0,3.0,1.0,3.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0,
9,6.7,Attacker,D,66.0,False,1.0,1.0,1.0,0.0,0,0.0,0.0,19.0,1.0,16.0,2.0,0.0,0.0,9.0,3.0,1.0,0.0,1.0,0.0,1.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,4,5.5
10,6.5,Midfielder,D,81.0,False,0.0,2.0,1.0,0.0,0,0.0,0.0,31.0,1.0,26.0,0.0,1.0,0.0,9.0,3.0,1.0,0.0,1.0,3.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,4,6.0
12,6.7,Midfielder,D,25.0,True,0.0,0.0,0.0,0.0,0,0.0,0.0,9.0,1.0,9.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,1,6.0
13,6.2,Defender,D,9.0,True,0.0,0.0,0.0,0.0,0,0.0,0.0,5.0,0.0,5.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0,


In [475]:
# aggiungi le colonne id
missing_columns = [col for col in df_filtered_rows.columns if col not in df.columns]

for col in missing_columns:
    df[col] = df_filtered_rows[col]

In [476]:
sorted_df = df.sort_values(by='fbrating', ascending=True)
sorted_df.head(50)

Unnamed: 0,rating,position,result_status,games_minutes,games_substitute,offsides,shots_total,shots_on,goals_total,goals_conceded,goals_assists,goals_saves,passes_total,passes_key,passes_accuracy,tackles_total,tackles_blocks,tackles_interceptions,duels_total,duels_won,dribbles_attempts,dribbles_success,dribbles_past,fouls_drawn,fouls_committed,cards_yellow,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved,positive_contributions,negative_contributions,games_minutes_step,fbrating,away_team_id,captain,fixture_id,goals_away,goals_home,home_team_id,league_round,player_id,team_id
205,6.5,Defender,L,77.0,False,0.0,0.0,0.0,0.0,0,0.0,0.0,25.0,0.0,17.0,5.0,0.0,0.0,11.0,8.0,0.0,0.0,1.0,0.0,1.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,4,5.0,6,False,491,1,2,10,12,9072,6
225,6.7,Defender,L,90.0,False,0.0,0.0,0.0,0.0,0,0.0,0.0,46.0,0.0,40.0,0.0,0.0,3.0,4.0,2.0,1.0,1.0,0.0,0.0,2.0,1,0,0.0,0.0,0,0,0.0,0.0,-0.5,4,5.0,6,False,491,1,2,10,12,22844,6
388,5.9,Attacker,L,75.0,False,0.0,3.0,1.0,0.0,0,0.0,0.0,20.0,1.0,13.0,0.0,0.0,0.0,11.0,2.0,1.0,0.0,0.0,1.0,2.0,1,0,0.0,0.0,0,0,0.0,0.0,-0.5,4,5.0,1,False,498,1,0,20,12,8877,20
165,6.6,Midfielder,L,90.0,False,0.0,1.0,0.0,0.0,0,0.0,0.0,51.0,0.0,39.0,1.0,0.0,1.0,9.0,5.0,0.0,0.0,0.0,2.0,3.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,4,5.0,11,False,499,3,2,9,12,8870,9
437,6.5,Defender,W,25.0,True,0.0,0.0,0.0,0.0,0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,1,5.0,117,False,500,2,1,116,12,22698,117
134,6.2,Midfielder,W,23.0,True,0.0,0.0,0.0,0.0,0,0.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,1,5.0,11,False,499,3,2,9,12,8410,11
16,6.2,Defender,D,90.0,False,0.0,0.0,0.0,0.0,0,0.0,0.0,67.0,0.0,65.0,0.0,0.0,0.0,7.0,1.0,0.0,0.0,0.0,0.0,1.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,4,5.0,3,False,492,3,3,4,12,8723,3
261,6.5,Defender,L,90.0,False,0.0,1.0,0.0,0.0,0,0.0,0.0,43.0,0.0,35.0,2.0,0.0,1.0,10.0,5.0,1.0,0.0,0.0,0.0,1.0,1,0,0.0,0.0,0,0,0.0,0.0,-0.5,4,5.0,14,False,493,1,3,12,12,8945,14
301,6.6,Midfielder,D,90.0,False,0.0,0.0,0.0,0.0,0,0.0,0.0,42.0,2.0,34.0,2.0,1.0,1.0,8.0,3.0,1.0,0.0,0.0,0.0,1.0,0,0,0.0,1.0,0,0,0.0,0.0,-1.0,4,5.0,5,False,495,1,1,15,12,8676,5
108,6.3,Defender,L,64.0,False,0.0,0.0,0.0,0.0,0,0.0,0.0,39.0,0.0,29.0,0.0,0.0,1.0,4.0,0.0,0.0,0.0,1.0,0.0,1.0,1,0,0.0,0.0,0,0,0.0,0.0,-0.5,4,5.0,13,False,496,0,2,8,12,8962,13
