In [None]:
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog, playercareerstats, boxscoretraditionalv2, commonplayerinfo
from nba_api.stats.library.parameters import SeasonAll
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from pprint import pprint
from tqdm.notebook import tqdm_notebook
import os

from pulp import LpMaximize, LpProblem, LpVariable, const, lpSum

In [None]:
def is_player_starter(game_id, player_id):
    # Ottieni il box score tradizionale per la partita specificata
    boxscore = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
    
    # Estrai le statistiche dei giocatori dalla partita
    player_stats = boxscore.get_data_frames()[0]
    
    # Verifica se il giocatore è nel quintetto titolare
    is_starter = player_stats[(player_stats['PLAYER_ID'] == player_id) & (player_stats['START_POSITION'] != '') & (player_stats['START_POSITION'].notna())].shape[0] > 0
    
    return is_starter

is_player_starter('0021300078', 203500) # Prima partita di Steven Adams

In [None]:
# Funzione per ottenere le statistiche di carriera di un giocatore
def get_player_career_stats(player_id:int) -> pd.DataFrame:
    player_career = playercareerstats.PlayerCareerStats(player_id=player_id)
    player_career_df = player_career.get_data_frames()[0]
    return player_career_df

get_player_career_stats(203500)

In [None]:
def triple_bonus(triples):
    if triples == 3:
        return 3
    elif triples == 4:
        return 4
    elif triples >= 5:
        return 5
    return 0

def double_bonus(pts, oreb, dreb, ast, blk, stl) -> int:
    is_pts = pts >= 10
    is_reb = oreb + dreb >= 10
    is_ast = ast >= 10
    is_blk = blk >= 10
    is_stl = stl >= 10
    stat_sum = is_pts + is_reb + is_ast + is_blk + is_stl

    if stat_sum == 2:
        return 5
    elif stat_sum == 3:
        return 10
    elif stat_sum >= 4:
        return 50
    return 0

stats = [12, 8, 2, 10, 5, 2]
double_bonus(*stats)

In [None]:
def calculate_PDK(game_stats:pd.DataFrame) -> int:
    PDK = game_stats['PTS'] +\
          game_stats['OREB'] * 1.5 +\
          game_stats['DREB'] +\
          game_stats['AST'] * 1.25 +\
          game_stats['STL'] * 1.5 +\
          game_stats['TOV'] * -1.5 +\
          game_stats['BLK'] * 1.5 +\
          double_bonus(game_stats['PTS'], game_stats['OREB'], game_stats['DREB'], game_stats['AST'], game_stats['BLK'], game_stats['STL']) +\
          triple_bonus(game_stats['FG3M']) +\
          (game_stats['FGA'] - game_stats['FGM']) * -1 +\
          (game_stats['FTA']- game_stats['FTM']) * -1 +\
          (game_stats['PF'] >= 6) * -5 \
          #+ game_stats['STARTING_LINEUP']
    
    if game_stats['WL']:
        PDK += PDK * 0.05
    return PDK

In [None]:
features_PDK = [
    'PTS',
    'DREB',
    'OREB',
    'AST',
    'STL',
    'TOV',
    'BLK',
    'FG3M',
    'WL',
    'FGM',
    'FGA',
    'FTM',
    'FTA',
    'PF'
]

features = [
    'PTS',
    'DREB',
    'OREB',
    'AST',
    'STL',
    'TOV',
    'BLK',
    'FG3M',
    'WL',
    'FGM',
    'FGA',
    'FTM',
    'FTA',
    'PF',
    'PDK',
    'PDK_differenza'
]

target = 'PDK_domani'

In [None]:
# Funzione per ottenere le statistiche delle partite per un giocatore
def get_player_game_stats(player_id:int, date:str) -> pd.DataFrame:
    player_stats = playergamelog.PlayerGameLog(player_id=player_id, season = SeasonAll.all, date_from_nullable=date)
    player_stats_df = player_stats.get_data_frames()[0]

    # Aggiungi la colonna della squadra e della squadra avversaria
    # player_stats_df['OPPONENT'] = player_stats_df['MATCHUP'].apply(lambda x: x.split(' ')[2])
    # player_stats_df['TEAM'] = player_stats_df['MATCHUP'].apply(lambda x: x.split(' ')[0])

    # Modifica la colonna WL
    player_stats_df['WL'] = player_stats_df['WL'].apply(lambda x: True if x == 'W' else False)

    # Se il giocatore ha giocato nel quintetto titolare (processo troppo lungo)
    #player_stats_df['STARTING_LINEUP'] = player_stats_df.apply(lambda x: is_player_starter(x['Game_ID'], x['Player_ID']), axis=1)

    # Calcola PDK ed ottieni la differenza col giorno precedente e quello del giorno successivo
    player_stats_df['PDK'] = player_stats_df.apply(lambda x: calculate_PDK(x[features_PDK]), axis=1)
    player_stats_df['PDK_differenza'] = player_stats_df.groupby('Player_ID')['PDK'].diff(-1)
    player_stats_df[target] = player_stats_df.groupby('Player_ID')['PDK'].shift(1)

    player_stats_df.drop('VIDEO_AVAILABLE', inplace=True, axis=1)


    return player_stats_df

steven_adams_stats = get_player_game_stats(203500, '10/1/2013')
print(f"Steven adams games: {steven_adams_stats.size}")
print(list(steven_adams_stats.columns))
steven_adams_stats.tail() # Steve Adams

In [None]:
def get_player_position(player_id:int):
    player_info = commonplayerinfo.CommonPlayerInfo(player_id=player_id).get_data_frames()[0]
    return player_info.iloc[0]['POSITION']

get_player_position(203500)

In [None]:
# Funzione per ottenere la lista di giocatori attivi
def get_active_players_info() -> pd.DataFrame:
    active_players = players.get_active_players()
    active_players = pd.DataFrame(active_players).set_index('id')
    positions = []
    for index, _ in tqdm_notebook(active_players.iterrows(), total=active_players.shape[0], desc="Retrieve player info:"):
        positions.append(get_player_position(index))
        # valutare la possibilità di ottenere lo stato di salute dei giocatori da https://api.fantasynerds.com/getting-started
    active_players.insert(4, 'POSITION', positions, True)
    return active_players

# Lista dei giocatori attivi
players_file = 'players_list.pkl'
if os.path.exists(players_file):
    with open(players_file, 'rb') as f:
        active_players_list = pd.read_pickle(f)
else:
    active_players_list = get_active_players_info()
    active_players_list.to_pickle(players_file)
    active_players_list.to_csv('player_list.csv')
    
print(f'Active players: {active_players_list.shape[0]}')
active_players_list.head()

In [None]:
# Creazione del DataFrame complessivo
all_player_data = []

# Anno da cui si inizia a prendere le statistiche dei giocatori (2001 = stagione 2001-02)
date = '10/1/2013'

for index, player_info in tqdm_notebook(active_players_list.iterrows(), total=active_players_list.shape[0], desc="Retrieve player stats:"):
    player_id = player_info.name
    player_stats = get_player_game_stats(player_id, date)

    # Aggiungi la colonna 'PLAYER_NAME'
    player_stats.insert(2, 'PLAYER_NAME', player_info['full_name'].replace(' ', '_'))
    player_stats.insert(3, 'POSITION', player_info['POSITION'])

    if not player_stats.empty:
        all_player_data.append(player_stats)

player_stats.head()

In [None]:
players_ID = []
players_name = []
positions = []
MSEs = []
PDKs = []

minPlayesGames = 10

for player_stats in tqdm_notebook(all_player_data, desc="Train player models:"):
    # Dividi il dataset in set di addestramento e set di test
    datamodel = player_stats.iloc[1:-1]
    if datamodel.shape[0] < minPlayesGames:
        continue
    X_train, X_test, y_train, y_test = train_test_split(datamodel[features], datamodel[target], test_size=0.2, random_state=42)

    # Crea e addestra il modello XGBoost
    model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
    model.fit(X_train, y_train)

    # Effettua le predizioni sul set di test
    y_pred = model.predict(X_test)

    # Valuta le prestazioni del modello (es. Mean Squared Error)
    mse = mean_squared_error(y_test, y_pred)

    # Ottieni la previsione della partita successiva
    last_game = player_stats.head(1)
    last_game_PDK = model.predict(last_game[features])[0]

    players_ID.append(last_game.loc[0, 'Player_ID'])
    players_name.append(last_game.loc[0, 'PLAYER_NAME'])
    positions.append(last_game.loc[0, 'POSITION'])
    MSEs.append(mse)
    PDKs.append(last_game_PDK)

all_player_results = pd.DataFrame({'Player_ID':players_ID, 'PlayerName':players_name, 'Position':positions, 'MSE':MSEs, 'PDK':PDKs})
all_player_results.set_index('Player_ID')
all_player_results.sort_values(by='PDK', ascending=False, inplace=True)
all_player_results.head(10)

In [None]:
# Creazione del problema di ottimizzazione
prob = LpProblem("FantasyBasketballOptimization", LpMaximize)

# Definizione delle variabili binarie (se un giocatore è selezionato o meno)
selected_players = LpVariable.dicts("SelectedPlayers", all_player_results['PlayerName'], 0, 1, const.LpBinary)

In [None]:
# Funzione obiettivo: massimizzare la somma dei punteggi predetti dei giocatori selezionati
prob += lpSum(all_player_results.loc[player, 'PDK'] * selected_players[player] for player in all_player_results['PlayerName'])

In [None]:
# DA DEFINIRE
# Vincoli: limitare il numero totale di giocatori e il numero di giocatori per ruolo
total_players = all_player_results.shape[0]
max_players = 5
max_centers = 1
max_guards = 2
max_forwards = 2

prob += lpSum(selected_players[player] for player in ranked_players['PLAYER_NAME']) == max_players
prob += lpSum(selected_players[player] for player in ranked_players['PLAYER_NAME'] if player in centers) == max_centers
prob += lpSum(selected_players[player] for player in ranked_players['PLAYER_NAME'] if player in guards) == max_guards
prob += lpSum(selected_players[player] for player in ranked_players['PLAYER_NAME'] if player in forwards) == max_forwards

In [None]:
# Risoluzione del problema
prob.solve()

# Estrai i giocatori selezionati
selected_players_list = [player for player in ranked_players['PLAYER_NAME'] if selected_players[player].value() == 1]

# Visualizza i giocatori selezionati
print("Giocatori selezionati:")
print(selected_players_list)