In [1]:
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LogisticRegression, LinearRegression
# from sklearn.metrics import accuracy_score, mean_squared_error
# from colorama import Fore, Style
# from tabulate import tabulate
# from prettytable import PrettyTable

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from colorama import Fore, Style
from prettytable import PrettyTable

In [2]:
df_existing = pd.read_excel('NBA2324.xlsx')

In [3]:
df_existing

Unnamed: 0,Away team,Away Pts,Home team,Home Pts,Overtime,Attend.,Arena,Win,Loss
0,Los Angeles Lakers,107,Denver Nuggets,119,No,19842,Ball Arena,Denver Nuggets,Los Angeles Lakers
1,Phoenix Suns,108,Golden State Warriors,104,No,18064,Chase Center,Phoenix Suns,Golden State Warriors
2,Houston Rockets,86,Orlando Magic,116,No,18846,Amway Center,Orlando Magic,Houston Rockets
3,Boston Celtics,108,New York Knicks,104,No,19812,Madison Square Garden (IV),Boston Celtics,New York Knicks
4,Washington Wizards,120,Indiana Pacers,143,No,16004,Gainbridge Fieldhouse,Indiana Pacers,Washington Wizards
...,...,...,...,...,...,...,...,...,...
629,Miami Heat,87,Orlando Magic,105,No,17832,Amway Center,Orlando Magic,Miami Heat
630,Denver Nuggets,113,Washington Wizards,104,No,17832,Capital One Arena,Denver Nuggets,Washington Wizards
631,Boston Celtics,116,Houston Rockets,107,No,17832,Toyota Center,Boston Celtics,Houston Rockets
632,Indiana Pacers,110,Phoenix Suns,117,No,17832,Footprint Center,Phoenix Suns,Indiana Pacers


In [4]:
#checking if the 'Win' column exists before proceeding
winning_team = 'Win'  # Update with the actual column name
if winning_team in df_existing.columns:
    #creating a binary target variable indicating whether the home team wins
    df_existing['HomeWin'] = (df_existing['Home team'] == df_existing[winning_team]).astype(int)

    #specifying the categorical columns for one-hot encoding
    categorical_columns = ['Away team', 'Home team', 'Overtime', 'Arena']

    #checking if categorical columns exist in the DataFrame
    missing_columns = [col for col in categorical_columns if col not in df_existing.columns]

    if not missing_columns:
        #extracting the necessary columns for the model
        X_completed = df_existing[categorical_columns]
        y_completed = df_existing['HomeWin']

        #one-hot encode categorical columns
        X_completed_encoded = pd.get_dummies(X_completed, drop_first=True)

        #splitting the completed data into training and testing sets
        X_train_completed, _, y_train_completed, _ = train_test_split(
            X_completed_encoded, y_completed, test_size=0.2, random_state=42
        )

        #choosing a model for completed games (Logistic Regression in this example)
        model_completed = LogisticRegression()

        #training the model on the completed data
        model_completed.fit(X_train_completed, y_train_completed)
    else:
        print(f"Error: Columns {missing_columns} not found in DataFrame.")
else:
    print(f"Error: Column '{winning_team}' not found in DataFrame.")

In [5]:
#upcoming game data
upcoming_game_data = {
    'Away team': ['Milwaukee Bucks', 'Cleveland Cavaliers', 'San Antonio Spurs', 'Memphis Grizzlies', 'Charlotte Hornets', 'Boston Celtics', 'Chicago Bulls', 'Atlanta Hawks'],
    'Home team': ['Detroit Pistons', 'Orlando Magic', 'Philadelphia 76ers', 'Toronto Raptors', 'Minnesota Timberwolves', 'Dallas Mavericks', 'Phoenix Suns', 'Sacramento Kings'],
    'Overtime': [0, 0, 0, 0, 0, 0, 0, 0],  #placeholder for upcoming games
    'Arena': ['Arena1', 'Arena2', 'Arena3', 'Arena4', 'Arena5', 'Arena6', 'Arena7', 'Arena8'],
}

In [6]:
#creating a DataFrame for the upcoming game
df_upcoming_game = pd.DataFrame(upcoming_game_data)

In [7]:
#ensuring the columns in df_upcoming_game_encoded match the columns used during training
df_upcoming_game_encoded = pd.get_dummies(df_upcoming_game)
missing_columns = set(X_train_completed.columns) - set(df_upcoming_game_encoded.columns)
for column in missing_columns:
    df_upcoming_game_encoded[column] = 0

In [8]:
#reordering columns to match the order during training
df_upcoming_game_encoded = df_upcoming_game_encoded[X_train_completed.columns]

#making predictions using the trained model for binary outcome
predicted_probabilities = model_completed.predict_proba(df_upcoming_game_encoded)

In [13]:
prediction_date = 'Monday 22 Jan 2024'

In [14]:
#ANSI escape codes for blue color
blue_color = '\033[94m'
reset_color = '\033[0m'

#data for PrettyTable
table_new = PrettyTable()
table_new.field_names = [f"{blue_color}NBA, {prediction_date}{reset_color}", f"{blue_color}Projected Winners{reset_color}"]
table_new.align["Projected Winners"] = "l"
table_new.horizontal_char = '-'  # Use a horizontal line as a separator

#defining color codes for text
green_color = '\033[92m'
reset_color = '\033[0m'

In [15]:
for i, team in enumerate(df_upcoming_game.itertuples(), start=1):  #starting from index 1 to skip the header row
    team1_probability = predicted_probabilities[i-1][1] * 100
    team2_probability = 100 - team1_probability

    #determining the team with the higher win probability as the projected winner
    if team1_probability > 50:
        predicted_winner = team[1]
        projected_win_percentage = team1_probability
    else:
        predicted_winner = team[2]
        projected_win_percentage = team2_probability

    team1_info = f"{green_color}{team[1]} ({df_existing['Win'].eq(team[1]).sum()}-{df_existing['Loss'].eq(team[1]).sum()}){reset_color}"
    team2_info = f"{green_color}{team[2]} ({df_existing['Win'].eq(team[2]).sum()}-{df_existing['Loss'].eq(team[2]).sum()}){reset_color}"

    team_info = f"{team1_info} vs {team2_info}"

    table_new.add_row([team_info, green_color + f"{predicted_winner} ({projected_win_percentage:.2f}%) {reset_color}"])

In [16]:
print(table_new)

+------------------------------------------------------------+-------------------------------+
|                  [94mNBA, Monday 22 Jan 2024[0m                   |       [94mProjected Winners[0m       |
+------------------------------------------------------------+-------------------------------+
|     [92mMilwaukee Bucks (29-13)[0m vs [92mDetroit Pistons (4-38)[0m      |   [92mDetroit Pistons (75.20%) [0m   |
|    [92mCleveland Cavaliers (25-15)[0m vs [92mOrlando Magic (23-20)[0m    | [92mCleveland Cavaliers (56.85%) [0m |
|   [92mSan Antonio Spurs (8-34)[0m vs [92mPhiladelphia 76ers (28-13)[0m   |  [92mSan Antonio Spurs (81.42%) [0m  |
|    [92mMemphis Grizzlies (15-27)[0m vs [92mToronto Raptors (16-27)[0m    |   [92mToronto Raptors (51.71%) [0m   |
| [92mCharlotte Hornets (9-31)[0m vs [92mMinnesota Timberwolves (30-12)[0m |  [92mCharlotte Hornets (92.57%) [0m  |
|     [92mBoston Celtics (33-10)[0m vs [92mDallas Mavericks (24-18)[0m     |    [9