In [2]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import tkinter as tk
from tkinter import ttk

def run_simulation(team1, team2, B2B_team1, B2B_team2, model, scaler, df, nbsimu=5000):
    pct_team1, pct_team2 = calculate_win_probabilities(team1, team2, B2B_team1, B2B_team2, model, scaler, df, nbsimu)

    result_label.config(text=f"{team1} has a probability of winning: {pct_team1:.2%}")
    result_label2.config(text=f"{team2} has a probability of winning: {pct_team2:.2%}")

def calculate_win_probabilities(team1, team2, B2B_team1, B2B_team2, model, scaler, df, nb):
    team1_stats_next_match = df[df['T'] == team1][['ORtg', 'DRtg', 'Pace', 'FTr', '3PAr', 'TS%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'eFG%', 'TOV%', 'ORB%', 'FT/FGA', '@', 'B2B']]
    team2_stats_next_match= df[df['OppT'] == team2][['ORtg', 'DRtg', 'Pace', 'FTr', '3PAr', 'TS%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'eFG%', 'TOV%', 'ORB%', 'FT/FGA', '@', 'B2B']]

    nb_simulations = nb
    victories_team1 = 0
    victories_team2 = 0

    for _ in range(nb_simulations):
        next_match_stats = {}
        
        for feature in team1_stats_next_match.columns:
            random_factor = np.random.uniform(0.8, 1.2)
            team1_sum = 0
            team2_sum = 0
            for index in range(len(team1_stats_next_match[feature])):
                team1_sum += team1_stats_next_match[feature].iloc[index] * (1 + ((index + 1 - len(team1_stats_next_match[feature]) / 2)) / len(team1_stats_next_match[feature]))

            for index in range(len(team2_stats_next_match[feature])):
                team2_sum += team2_stats_next_match[feature].iloc[index]

            team1_avg = team1_sum / len(team1_stats_next_match[feature])
            team2_avg = team2_sum / len(team2_stats_next_match[feature])

            next_match_stats[feature] = (team1_avg + team2_avg) / 2 * random_factor
        
        next_match_stats['@'] = 1
        next_match_stats['B2B'] = B2B_team1

        next_match_stats = pd.DataFrame([next_match_stats])

        prediction = model.predict(scaler.transform(next_match_stats))

        if prediction == 1:
            victories_team1 += 1
        else:
            victories_team2 += 1
    
    team1_stats_next_match = df[df['T'] == team2][['ORtg', 'DRtg', 'Pace', 'FTr', '3PAr', 'TS%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'eFG%', 'TOV%', 'ORB%', 'FT/FGA', '@', 'B2B']]
    team2_stats_next_match = df[df['OppT'] == team1][['ORtg', 'DRtg', 'Pace', 'FTr', '3PAr', 'TS%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'eFG%', 'TOV%', 'ORB%', 'FT/FGA', '@', 'B2B']]

    for _ in range(nb_simulations):
        next_match_stats = {}
        
        for feature in team1_stats_next_match.columns:
            random_factor = np.random.uniform(0.8, 1.2)
            team1_sum = 0
            team2_sum = 0
            for index in range(len(team1_stats_next_match[feature])):
                if index < len(team1_stats_next_match[feature]):
                    team1_sum += team1_stats_next_match[feature].iloc[index] * (1 + ((index + 1 - len(team1_stats_next_match[feature]) / 2)) / len(team1_stats_next_match[feature]))
             
            for index in range(len(team2_stats_next_match[feature])):
                team2_sum += team2_stats_next_match[feature].iloc[index]

            team1_avg = team1_sum / len(team1_stats_next_match[feature])
            team2_avg = team2_sum / len(team2_stats_next_match[feature])

            next_match_stats[feature] = (team1_avg + team2_avg) / 2 * random_factor
        
        next_match_stats['@'] = 0
        next_match_stats['B2B'] = B2B_team2

        next_match_stats = pd.DataFrame([next_match_stats])

        prediction = model.predict(scaler.transform(next_match_stats))

        if prediction == 1:
            victories_team2 += 1
        else:
            victories_team1 += 1

    pct_team1 = victories_team1 / (nb_simulations * 2)
    pct_team2 = victories_team2 / (nb_simulations * 2)

    return pct_team1, pct_team2

def test_accuracy(model, scaler, df):
    
    test_df = df.iloc[-100:]

    accuracy = 0

    for index, row in test_df.iterrows():
        
        team1 = row['T']
        team2 = row['OppT']
        
        B2B_team1 = row['B2B']
        B2B_team2 = row['B2B']

        pct_team1, _ = calculate_win_probabilities(team1, team2, B2B_team1, B2B_team2, model, scaler, df, 51)

        prediction = 1 if pct_team1 > 0.5 else 0

        if (row['W/L'] == 'W' and prediction == 1) or (row['W/L'] == 'L' and prediction == 0):
            accuracy += 1

    accuracy_pct = accuracy / len(test_df)
    accuracy_label.config(text=f"Algorithm Accuracy: {accuracy_pct:.2%}")

def on_go_button_click():
    selected_team1 = team1_combobox.get()
    selected_team2 = team2_combobox.get()
    B2B_team1 = team1_b2b_var.get()
    B2B_team2 = team2_b2b_var.get()

    run_simulation(selected_team1, selected_team2, B2B_team1, B2B_team2, model, scaler, df)

def main():
    global df, model, scaler, teams
    global team1_var, team2_var, team1_b2b_var, team2_b2b_var, team1_combobox, team2_combobox, result_label, result_label2,accuracy_label

    main_window = tk.Tk()
    main_window.title("NBA Match Simulator")

    df = pd.read_csv("nba_games.csv", sep=';')
    df['@'] = df['@'].apply(lambda x: 0 if x == '@' else 1)
    df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
    df.sort_values(by=['T', 'Date'], inplace=True)
    df['BackToBack'] = df.groupby('T')['Date'].diff().dt.days == 1
    df['BackToBack'] = df['BackToBack'].astype(int)
    df['B2B'] = np.where(df['BackToBack'] == 1, 1, 0)  
    df['B2B'] = np.where(df['BackToBack'].shift(-1) == 1, -1, df['B2B'])  

    

    X = df[['ORtg', 'DRtg', 'Pace', 'FTr', '3PAr', 'TS%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'eFG%', 'TOV%', 'ORB%', 'FT/FGA', '@', 'B2B']]
    y = df['W/L'].apply(lambda x: 1 if x == 'W' else 0)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X)

    model = LogisticRegression()
    model.fit(X_train_scaled, y)

    teams = df['T'].str.strip('"').unique().tolist()

    team1_var = tk.StringVar()
    team2_var = tk.StringVar()
    team1_b2b_var = tk.IntVar()
    team2_b2b_var = tk.IntVar()

    team1_label = tk.Label(main_window, text="Home Team:")
    team1_combobox = ttk.Combobox(main_window, textvariable=team1_var, values=teams, state="readonly")
    team2_label = tk.Label(main_window, text="Away Team:")
    team2_combobox = ttk.Combobox(main_window, textvariable=team2_var, values=teams, state="readonly")
    team1_b2b_checkbox = tk.Checkbutton(main_window, text="B2B for Home Team", variable=team1_b2b_var)
    team2_b2b_checkbox = tk.Checkbutton(main_window, text="B2B for Away Team", variable=team2_b2b_var)
    go_button = tk.Button(main_window, text="GO", command=on_go_button_click)
    result_label = tk.Label(main_window, text="")
    result_label2 = tk.Label(main_window, text="")
    accuracy_label = tk.Label(main_window, text="")
    team1_label.grid(row=0, column=0, padx=10, pady=5)
    team1_combobox.grid(row=0, column=1, padx=10, pady=5)
    team1_b2b_checkbox.grid(row=0, column=2, padx=10, pady=5)
    team2_label.grid(row=1, column=0, padx=10, pady=5)
    team2_combobox.grid(row=1, column=1, padx=10, pady=5)
    team2_b2b_checkbox.grid(row=1, column=2, padx=10, pady=5)
    go_button.grid(row=2, column=0, columnspan=3, pady=10)
    result_label.grid(row=3, column=0, columnspan=3)
    result_label2.grid(row=4, column=0, columnspan=3)
    accuracy_label.grid(row=5, column=0, columnspan=3)
    test_accuracy(model, scaler, df)  

    main_window.mainloop()

if __name__ == "__main__":
    main()

