In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [2]:
csv_file_path = 'C:/Users/Home/NBA_Playoffs/playoff_teams.csv'
playoff_teams_df = pd.read_csv(csv_file_path)
playoff_teams_df.head()

Unnamed: 0,Rk,Team,Age,W,L,PW,PL,MOV,SOS,SRS,...,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1,Arena,Attend.,Attend./G
0,1.0,Boston Celtics*,27.4,57.0,25.0,57,25,6.52,-0.15,6.38,...,12.0,22.1,0.197,0.528,11.3,78.5,0.18,TD Garden,766240,18689
1,2.0,Cleveland Cavaliers*,25.4,51.0,31.0,55,27,5.38,-0.15,5.23,...,12.3,23.6,0.206,0.535,14.4,76.3,0.21,Rocket Mortgage Fieldhouse,777280,18958
2,3.0,Philadelphia 76ers*,28.2,54.0,28.0,52,30,4.32,0.06,4.37,...,12.6,21.6,0.25,0.541,13.0,77.2,0.217,Wells Fargo Center,839261,20470
3,4.0,Memphis Grizzlies*,24.4,51.0,31.0,51,31,3.94,-0.34,3.6,...,11.7,26.5,0.19,0.526,13.1,75.9,0.206,FedEx Forum,707836,17264
4,5.0,Milwaukee Bucks*,29.8,58.0,24.0,50,32,3.63,-0.02,3.61,...,12.7,25.0,0.184,0.52,10.4,77.8,0.175,Fiserv Forum,718786,17531


In [3]:
playoff_teams_df.isnull()

Unnamed: 0,Rk,Team,Age,W,L,PW,PL,MOV,SOS,SRS,...,TOV%,ORB%,FT/FGA,eFG%.1,TOV%.1,DRB%,FT/FGA.1,Arena,Attend.,Attend./G
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [4]:
print(playoff_teams_df.dtypes)

Rk           float64
Team          object
Age          float64
W            float64
L            float64
PW             int64
PL             int64
MOV          float64
SOS          float64
SRS          float64
ORtg         float64
DRtg         float64
NRtg         float64
Pace         float64
FTr          float64
3PAr         float64
TS%          float64
eFG%         float64
TOV%         float64
ORB%         float64
FT/FGA       float64
eFG%.1       float64
TOV%.1       float64
DRB%         float64
FT/FGA.1     float64
Arena         object
Attend.        int64
Attend./G      int64
dtype: object


In [5]:
# Create a DataFrame containing the 'Team' column
team_names_df = playoff_teams_df[['Team']]

In [6]:
# Drop the 'Arena' column from the DataFrame
playoff_teams_df = playoff_teams_df.drop('Arena', axis=1)

In [7]:
# One-hot encode the 'Team' column
encoded_teams_df = pd.get_dummies(playoff_teams_df['Team'], prefix='Team')

In [8]:
# Concatenate the one-hot encoded columns to the original DataFrame
playoff_teams_df = pd.concat([playoff_teams_df, encoded_teams_df], axis=1)

In [9]:
# Drop the original 'Team' column from the DataFrame
playoff_teams_df = playoff_teams_df.drop('Team', axis=1)


In [10]:
# Split the data into training and testing sets
X = playoff_teams_df.drop('W', axis=1)  # Replace 'number_of_wins' with the name of the column containing the number of wins for each team
y = playoff_teams_df['W']  # Replace 'number_of_wins' with the name of the column containing the number of wins for each team
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Instantiate the model
model = LinearRegression()

In [12]:
# Train the model
model.fit(X_train, y_train)

LinearRegression()

In [16]:
def get_encoded_team_stats(team_name, team_names_df, original_df, encoded_df):
    # Get the index for the specified team
    team_index = team_names_df.loc[team_names_df['Team'] == team_name].index
    print(f"Team index: {team_index}")

    # Check if the index is not empty
    if not team_index.empty:
        team_index = team_index[0]

        # Get the statistics for the specified team
        team_stats = original_df.loc[team_index]

        # Drop the original 'Team' column, the 'Arena' column, and the 'W' column
        team_stats = team_stats.drop(['Team', 'Arena', 'W'], errors='ignore')

        # Set the one-hot encoded team columns to 0
        team_stats.loc[encoded_df.columns] = 0

        # Set the one-hot encoded team column for the specified team to 1
        team_stats[f'Team_{team_name}'] = 1

        return team_stats
    else:
        print(f"Team '{team_name}' not found in the dataset.")
        return None


In [17]:
def simulate_matchup(team_a_stats, team_b_stats, model):
    # Combine the team statistics into a single DataFrame
    matchup_stats = pd.concat([team_a_stats, team_b_stats], axis=1).transpose()
    
    print("Matchup stats columns:", matchup_stats.columns)  # Add this print statement
    print("Model expected features:", model.coef_.shape)  # Add this print statement

    # Make predictions using the trained model
    predicted_wins = model.predict(matchup_stats)

    # Determine the winner based on the predicted number of wins
    if predicted_wins[0] > predicted_wins[1]:
        winner = "Team A"
    else:
        winner = "Team B"

    return winner, predicted_wins[0], predicted_wins[1]


In [22]:
# Example: Simulating a single matchup between Team A and Team B
team_a_name = 'Golden State Warriors*'  # Replace this with the name of Team A
team_b_name = 'Sacramento Kings*'  # Replace this with the name of Team B

team_a_stats = get_encoded_team_stats(team_a_name, team_names_df, playoff_teams_df, encoded_teams_df)
team_b_stats = get_encoded_team_stats(team_b_name, team_names_df, playoff_teams_df, encoded_teams_df)

if team_a_stats is not None and team_b_stats is not None:
    winner, team_a_wins, team_b_wins = simulate_matchup(team_a_stats, team_b_stats, model)
    print(f"Winner: {winner}\nTeam A predicted wins: {team_a_wins}\nTeam B predicted wins: {team_b_wins}")
else:
    print("Simulation cannot be run due to missing team data.")


Team index: Int64Index([9], dtype='int64')
Team index: Int64Index([7], dtype='int64')
Matchup stats columns: Index(['Rk', 'Age', 'L', 'PW', 'PL', 'MOV', 'SOS', 'SRS', 'ORtg', 'DRtg',
       'NRtg', 'Pace', 'FTr', '3PAr', 'TS%', 'eFG%', 'TOV%', 'ORB%', 'FT/FGA',
       'eFG%.1', 'TOV%.1', 'DRB%', 'FT/FGA.1', 'Attend.', 'Attend./G',
       'Team_Atlanta Hawks*', 'Team_Boston Celtics*', 'Team_Brooklyn Nets*',
       'Team_Cleveland Cavaliers*', 'Team_Denver Nuggets*',
       'Team_Golden State Warriors*', 'Team_Los Angeles Clippers*',
       'Team_Los Angeles Lakers*', 'Team_Memphis Grizzlies*',
       'Team_Miami Heat*', 'Team_Milwaukee Bucks*',
       'Team_Minnesota Timberwolves*', 'Team_New York Knicks*',
       'Team_Philadelphia 76ers*', 'Team_Phoenix Suns*',
       'Team_Sacramento Kings*'],
      dtype='object')
Model expected features: (41,)
Winner: Team B
Team A predicted wins: 44.000000000000284
Team B predicted wins: 48.000000000000455
