In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import accuracy_score, mean_squared_error
from colorama import Fore, Style
from tabulate import tabulate
from prettytable import PrettyTable

In [2]:
df_existing = pd.read_excel('NBA2324.xlsx')

In [3]:
df_existing

Unnamed: 0,Away team,Away Pts,Home team,Home Pts,Overtime,Attend.,Arena,Win,Loss
0,Los Angeles Lakers,107,Denver Nuggets,119,No,19842,Ball Arena,Denver Nuggets,Los Angeles Lakers
1,Phoenix Suns,108,Golden State Warriors,104,No,18064,Chase Center,Phoenix Suns,Golden State Warriors
2,Houston Rockets,86,Orlando Magic,116,No,18846,Amway Center,Orlando Magic,Houston Rockets
3,Boston Celtics,108,New York Knicks,104,No,19812,Madison Square Garden (IV),Boston Celtics,New York Knicks
4,Washington Wizards,120,Indiana Pacers,143,No,16004,Gainbridge Fieldhouse,Indiana Pacers,Washington Wizards
...,...,...,...,...,...,...,...,...,...
623,Cleveland Cavaliers,116,Atlanta Hawks,95,No,17832,State Farm Arena,Cleveland Cavaliers,Atlanta Hawks
624,Toronto Raptors,100,New York Knicks,126,No,17832,Madison Square Garden (IV),New York Knicks,Toronto Raptors
625,Memphis Grizzlies,96,Chicago Bulls,125,No,17832,United Center,Chicago Bulls,Memphis Grizzlies
626,Utah Jazz,126,Houston Rockets,127,No,17832,Toyota Center,Houston Rockets,Utah Jazz


In [4]:
# Check if the 'Win' column exists before proceeding
winning_team = 'Win'  # Update with the actual column name
if winning_team in df_existing.columns:
    # Create a binary target variable indicating whether the home team wins
    df_existing['HomeWin'] = (df_existing['Home team'] == df_existing[winning_team]).astype(int)

    # Specify the categorical columns for one-hot encoding
    categorical_columns = ['Away team', 'Home team', 'Overtime', 'Arena']

    # Check if categorical columns exist in the DataFrame
    missing_columns = [col for col in categorical_columns if col not in df_existing.columns]
    
    if not missing_columns:
        # Extract the necessary columns for the model
        X_completed = df_existing[categorical_columns]
        y_completed = df_existing['HomeWin']

        # One-hot encode categorical columns
        X_completed_encoded = pd.get_dummies(X_completed, drop_first=True)

        # Split the completed data into training and testing sets
        X_train_completed, X_test_completed, y_train_completed, y_test_completed = train_test_split(
            X_completed_encoded, y_completed, test_size=0.2, random_state=42
        )

        # Choose a model for completed games (Logistic Regression in this example)
        model_completed = LogisticRegression()

        # Train the model on the completed data
        model_completed.fit(X_train_completed, y_train_completed)

        # Make predictions on the test set for completed games
        y_pred_completed = model_completed.predict(X_test_completed)

        # Evaluate the model on the completed data
        accuracy_completed = accuracy_score(y_test_completed, y_pred_completed)
        print(f'Accuracy on Completed Games Data: {accuracy_completed}')

        # Save the completed games data with predictions to a new Excel file
        df_completed = pd.concat([X_test_completed, pd.Series(y_pred_completed, name='Predicted_HomeWin')], axis=1)
        df_completed.to_excel('completed_games_with_predictions.xlsx', index=False)
    else:
        print(f"Error: Columns {missing_columns} not found in DataFrame.")
else:
    print(f"Error: Column '{winning_team}' not found in DataFrame.")

Accuracy on Completed Games Data: 0.6984126984126984


In [5]:
# Upcoming game data
upcoming_game_data = {
    'Away team': ['Denver Nuggets'],
    'Home team': ['Boston Celtics'],
    'Overtime': [0],  # Placeholder for upcoming game
    'Arena': ['TD Garden'],  # Placeholder for upcoming game
}

# Create a DataFrame for the upcoming game
df_upcoming_game = pd.DataFrame(upcoming_game_data)

# Ensure the columns in df_upcoming_game_encoded match the columns used during training
df_upcoming_game_encoded = pd.get_dummies(df_upcoming_game)
missing_columns = set(X_train_completed.columns) - set(df_upcoming_game_encoded.columns)
for column in missing_columns:
    df_upcoming_game_encoded[column] = 0

# Reorder columns to match the order during training
df_upcoming_game_encoded = df_upcoming_game_encoded[X_train_completed.columns]

# Make predictions using the trained model
predicted_winner = model_completed.predict(df_upcoming_game_encoded)

# Display the predicted winner
print("Predicted Winner:", "Home Team (Boston Celtics)" if predicted_winner[0] == 1 else "Away Team (Denver Nuggets)")


Predicted Winner: Home Team (Boston Celtics)


In [6]:
# Check if the 'Win' and 'Home Pts' columns exist before proceeding
winning_team = 'Win'  # Update with the actual column name
points_column = 'Home Pts'  # Update with the actual column name

if winning_team in df_existing.columns and points_column in df_existing.columns:
    # Create a binary target variable indicating whether the home team wins
    df_existing['HomeWin'] = (df_existing['Home team'] == df_existing[winning_team]).astype(int)

    # Specify the features and target variables
    features = ['Away team', 'Home team', 'Overtime', 'Arena']
    target_binary = 'HomeWin'
    target_points = points_column

    # Extract the necessary columns for the model
    X_completed = df_existing[features]
    y_completed_binary = df_existing[target_binary]
    y_completed_points = df_existing[target_points]

    # One-hot encode categorical columns
    X_completed_encoded = pd.get_dummies(X_completed, drop_first=True)

    # Split the completed data into training and testing sets
    X_train_completed, X_test_completed, y_train_completed_binary, y_test_completed_binary, y_train_completed_points, y_test_completed_points = train_test_split(
        X_completed_encoded, y_completed_binary, y_completed_points, test_size=0.2, random_state=42
    )

    # Choose a model for completed games (Logistic Regression in this example)
    model_completed_binary = LogisticRegression()

    # Train the model on the completed data for binary outcome
    model_completed_binary.fit(X_train_completed, y_train_completed_binary)

    # Make predictions on the test set for binary outcome
    y_pred_completed_binary = model_completed_binary.predict(X_test_completed)

    # Evaluate the model on the completed data for binary outcome
    accuracy_completed_binary = accuracy_score(y_test_completed_binary, y_pred_completed_binary)
    print(f'Accuracy on Completed Games Data (Binary Outcome): {accuracy_completed_binary}')

    # Choose a model for completed games (Linear Regression in this example) for points
    model_completed_points = LinearRegression()

    # Train the model on the completed data for points
    model_completed_points.fit(X_train_completed, y_train_completed_points)

    # Make predictions on the test set for points
    y_pred_completed_points = model_completed_points.predict(X_test_completed)

    # Evaluate the model on the completed data for points
    mse_completed_points = mean_squared_error(y_test_completed_points, y_pred_completed_points)
    print(f'Mean Squared Error on Completed Games Data (Points): {mse_completed_points}')

    # Save the completed games data with predictions to a new Excel file
    df_completed = pd.concat([
        X_test_completed,
        pd.Series(y_pred_completed_binary, name='Predicted_HomeWin'),
        pd.Series(y_pred_completed_points, name='Predicted_HomePts')
    ], axis=1)
    df_completed.to_excel('completed_games_with_predictions.xlsx', index=False)
else:
    print(f"Error: Columns '{winning_team}' or '{points_column}' not found in DataFrame.")

Accuracy on Completed Games Data (Binary Outcome): 0.6984126984126984
Mean Squared Error on Completed Games Data (Points): 2.390051297000779e+28


In [8]:
# Check if the 'Win' column exists before proceeding
winning_team = 'Win'  # Update with the actual column name

if winning_team in df_existing.columns:
    # Create a binary target variable indicating whether the home team wins
    df_existing['HomeWin'] = (df_existing['Home team']
                              == df_existing[winning_team]).astype(int)

    # Specify the features and target variables
    features = ['Away team', 'Home team', 'Overtime', 'Arena']
    target_binary = 'HomeWin'

    # Extract the necessary columns for the model
    X_completed = df_existing[features]
    y_completed_binary = df_existing[target_binary]

    # One-hot encode categorical columns
    X_completed_encoded = pd.get_dummies(X_completed, drop_first=True)

    # Split the completed data into training and testing sets
    X_train_completed, _, y_train_completed_binary, _, = train_test_split(
        X_completed_encoded, y_completed_binary, test_size=0.2, random_state=42
    )

    # Choose a model for completed games (Logistic Regression in this example) for binary outcome
    model_completed_binary = LogisticRegression()

    # Train the model on the completed data for binary outcome
    model_completed_binary.fit(X_train_completed, y_train_completed_binary)

    # Upcoming games data
    upcoming_games_data = {
        'Away team': ['Brooklyn Nets', 'Miami Heat', 'Denver Nuggets', 'Boston Celtics', 'Indiana Pacers', 'Portland Trail Blazers'],
        'Home team': ['Los Angeles Clippers', 'Orlando Magic', 'Washington Wizards', 'Houston Rockets', 'Phoenix Suns', 'Los Angeles Lakers'],
        'Overtime': [0, 0, 0, 0, 0, 0],  # Placeholder for upcoming games
        # Placeholder for upcoming games
        'Arena': ['Arena1', 'Arena2', 'Arena3', 'Arena4', 'Arena5', 'Arena6'],
    }

    # Create a DataFrame for the upcoming games
    df_upcoming_games = pd.DataFrame(upcoming_games_data)

    # Ensure the columns in df_upcoming_games_encoded match the columns used during training
    df_upcoming_games_encoded = pd.get_dummies(df_upcoming_games)
    missing_columns = set(X_train_completed.columns) - \
        set(df_upcoming_games_encoded.columns)
    for column in missing_columns:
        df_upcoming_games_encoded[column] = 0

    # Reorder columns to match the order during training
    df_upcoming_games_encoded = df_upcoming_games_encoded[X_train_completed.columns]

    # Make predictions using the trained model for binary outcome
    predicted_winners = model_completed_binary.predict(
        df_upcoming_games_encoded)

prediction_date = 'Sunday 21 Jan 2024'

# ANSI escape codes for blue color
blue_color = '\033[94m'
reset_color = '\033[0m'

# Prepare data for PrettyTable
table = PrettyTable()
table.field_names = [f"{blue_color}NBA, {prediction_date}{reset_color}", f"{blue_color}Projected Winners{reset_color}"]
table.align["Projected Winners"] = "l"
table.horizontal_char = '-'  # Use a horizontal line as a separator

# Define color codes for text
green_color = '\033[92m'
reset_color = '\033[0m'

for i, team in enumerate(df_upcoming_games.itertuples(), start=1):  # Start from index 1 to skip the header row
    predicted_winner = team[2] if predicted_winners[i-1] == 1 else team[1]
    
    # Count occurrences of each team in 'Win' and 'Loss' columns
    team1 = team[1]
    team1_wins_count = df_existing['Win'].eq(team1).sum()
    team1_losses_count = df_existing['Loss'].eq(team1).sum()
    
    team2 = team[2]
    team2_wins_count = df_existing['Win'].eq(team2).sum()
    team2_losses_count = df_existing['Loss'].eq(team2).sum()
    
    team1_info = f"{green_color}{team1} ({team1_wins_count}-{team1_losses_count}){reset_color}"
    team2_info = f"{green_color}{team2} ({team2_wins_count}-{team2_losses_count}){reset_color}"
    
    team_info = f"{team1_info} vs {team2_info}"
    
    table.add_row([team_info, green_color + predicted_winner + reset_color])

# Print the PrettyTable
print(table)

+--------------------------------------------------------------+----------------------+
|                   [94mNBA, Sunday 21 Jan 2024[0m                    |  [94mProjected Winners[0m   |
+--------------------------------------------------------------+----------------------+
|    [92mBrooklyn Nets (17-24)[0m vs [92mLos Angeles Clippers (26-14)[0m     | [92mLos Angeles Clippers[0m |
|         [92mMiami Heat (24-18)[0m vs [92mOrlando Magic (22-20)[0m          |    [92mOrlando Magic[0m     |
|     [92mDenver Nuggets (29-14)[0m vs [92mWashington Wizards (7-34)[0m      |    [92mDenver Nuggets[0m    |
|      [92mBoston Celtics (32-10)[0m vs [92mHouston Rockets (20-21)[0m       |   [92mHouston Rockets[0m    |
|        [92mIndiana Pacers (24-19)[0m vs [92mPhoenix Suns (23-18)[0m        |    [92mIndiana Pacers[0m    |
| [92mPortland Trail Blazers (12-29)[0m vs [92mLos Angeles Lakers (22-22)[0m |  [92mLos Angeles Lakers[0m  |
+---------------------------

In [10]:
# # Check if the 'Win' column exists before proceeding
# winning_team = 'Win'  # Update with the actual column name

# if winning_team in df_existing.columns:
#     # Create a binary target variable indicating whether the home team wins
#     df_existing['HomeWin'] = (df_existing['Home team']
#                               == df_existing[winning_team]).astype(int)

#     # Specify the features and target variables
#     features = ['Away team', 'Home team', 'Overtime', 'Arena']
#     target_binary = 'HomeWin'

#     # Extract the necessary columns for the model
#     X_completed = df_existing[features]
#     y_completed_binary = df_existing[target_binary]

#     # One-hot encode categorical columns
#     X_completed_encoded = pd.get_dummies(X_completed, drop_first=True)

#     # Split the completed data into training and testing sets
#     X_train_completed, _, y_train_completed_binary, _, = train_test_split(
#         X_completed_encoded, y_completed_binary, test_size=0.2, random_state=42
#     )

#     # Choose a model for completed games (Logistic Regression in this example) for binary outcome
#     model_completed_binary = LogisticRegression()

#     # Train the model on the completed data for binary outcome
#     model_completed_binary.fit(X_train_completed, y_train_completed_binary)

#     # Upcoming games data
#     upcoming_games_data = {
#         'Away team': ['Boston Celtics'],
#         'Home team': ['Chicago Bulls'],
#         'Overtime': [0],  # Placeholder for upcoming games
#         # Placeholder for upcoming games
#         'Arena': ['Arena1'],
#     }

#     # Create a DataFrame for the upcoming games
#     df_upcoming_games = pd.DataFrame(upcoming_games_data)

#     # Ensure the columns in df_upcoming_games_encoded match the columns used during training
#     df_upcoming_games_encoded = pd.get_dummies(df_upcoming_games)
#     missing_columns = set(X_train_completed.columns) - \
#         set(df_upcoming_games_encoded.columns)
#     for column in missing_columns:
#         df_upcoming_games_encoded[column] = 0

#     # Reorder columns to match the order during training
#     df_upcoming_games_encoded = df_upcoming_games_encoded[X_train_completed.columns]

#     # Make predictions using the trained model for binary outcome
#     predicted_winners = model_completed_binary.predict(
#         df_upcoming_games_encoded)

# # Define the date for the predictions
# prediction_date = 'Saturday 23 Feb 2024'

# # Display the predicted winners for each upcoming game
# print(f"\033[92mNBA Results Projections, {prediction_date}\033[0m")

# for i, team in enumerate(df_upcoming_games.itertuples(), start=0):
#     predicted_winner = team[2] if predicted_winners[i] == 1 else team[1]
#     print(
#         f"\033[92m{team[1]} vs {team[2]} - Projected Winner: {predicted_winner}\033[0m")

# else:
#     print(f"Error: Column '{winning_team}' not found in DataFrame.")

In [1]:
!pip install tabulate



In [2]:
from tabulate import tabulate

In [11]:
# Check if the 'Win' column exists before proceeding
winning_team = 'Win'  # Update with the actual column name

if winning_team in df_existing.columns:
    # Create a binary target variable indicating whether the home team wins
    df_existing['HomeWin'] = (df_existing['Home team']
                              == df_existing[winning_team]).astype(int)

    # Specify the features and target variables
    features = ['Away team', 'Home team', 'Overtime', 'Arena']
    target_binary = 'HomeWin'

    # Extract the necessary columns for the model
    X_completed = df_existing[features]
    y_completed_binary = df_existing[target_binary]

    # One-hot encode categorical columns
    X_completed_encoded = pd.get_dummies(X_completed, drop_first=True)

    # Split the completed data into training and testing sets
    X_train_completed, _, y_train_completed_binary, _, = train_test_split(
        X_completed_encoded, y_completed_binary, test_size=0.2, random_state=42
    )

    # Choose a model for completed games (Logistic Regression in this example) for binary outcome
    model_completed_binary = LogisticRegression()

    # Train the model on the completed data for binary outcome
    model_completed_binary.fit(X_train_completed, y_train_completed_binary)

    # Upcoming games data
    upcoming_games_data = {
        'Away team': ['Boston Celtics'],
        'Home team': ['Chicago Bulls'],
        'Overtime': [0],  # Placeholder for upcoming games
        # Placeholder for upcoming games
        'Arena': ['Arena1'],
    }

    # Create a DataFrame for the upcoming games
    df_upcoming_games = pd.DataFrame(upcoming_games_data)

    # Ensure the columns in df_upcoming_games_encoded match the columns used during training
    df_upcoming_games_encoded = pd.get_dummies(df_upcoming_games)
    missing_columns = set(X_train_completed.columns) - \
        set(df_upcoming_games_encoded.columns)
    for column in missing_columns:
        df_upcoming_games_encoded[column] = 0

    # Reorder columns to match the order during training
    df_upcoming_games_encoded = df_upcoming_games_encoded[X_train_completed.columns]

    # Make predictions using the trained model for binary outcome
    predicted_winners = model_completed_binary.predict(
        df_upcoming_games_encoded)

# Define the date for the predictions
prediction_date = 'Saturday 23 Feb 2024'

# Prepare data for tabulate
table_data = [["NBA Results Projections", prediction_date]]

for i, team in enumerate(df_upcoming_games.itertuples(), start=0):
    predicted_winner = team[2] if predicted_winners[i] == 1 else team[1]
    table_data.append([f"{team[1]} vs {team[2]}", f"Projected Winner: {predicted_winner}"])

# Display the tabulated data
print(tabulate(table_data, headers=["Game", "Result"]))

Game                             Result
-------------------------------  -------------------------------
NBA Results Projections          Saturday 23 Feb 2024
Boston Celtics vs Chicago Bulls  Projected Winner: Chicago Bulls
