# Goal: Build a linear regression model that predicts the team that will win the regular season

In [1]:
# Load necessary libraries
import pandas as pd

In [2]:
# Load the data and combine the CSVs together
file_1 = "/kaggle/input/nba-teams-standing-dataset-for-the-2023-2024/NBA_2024_Eastern_Conference_Standing.csv"
file_2 = "/kaggle/input/nba-teams-standing-dataset-for-the-2023-2024/NBA_2024_Western_Conference_Standing.csv"
files = [file_1, file_2]

df = pd.concat(map(pd.read_csv, files))

In [3]:
# Clean the data 
df['Eastern Conference'].fillna('', inplace=True)
df['Western Conference'].fillna('', inplace=True)
df['Team Name'] = df['Eastern Conference'] + df['Western Conference']

In [4]:
# Select features that will be used in the linear regression model
features = ['Team Name','W', 'L', 'SOS']
df = df[features]
print(f"Shape of the dataframe: {df.shape}")

Shape of the dataframe: (30, 4)


In [5]:
# Set input and output for model
x = df[['W', 'L', 'SOS']]

# Convert to one hot encoding since categorical
y = df['Team Name']
y_encoded = pd.get_dummies(y, prefix='Team')

In [6]:
# Set up linear regression model 
from sklearn import linear_model

model = linear_model.LinearRegression()
model.fit(x, y_encoded)

In [7]:
# Make a prediction using the model
import numpy as np

new_x = np.array([60, 20, 2.0])
new_x = new_x[np.newaxis, :]
weights = model.coef_
predictions = model.predict(new_x)



In [8]:
y = y.to_numpy()

In [9]:
predicted_value = y[np.argmax(predictions)]
predicted_value

'Philadelphia 76ers'