In [319]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [320]:
initial_data = pd.read_json('./playerStats.json')
initial_data.shape[0]

106

In [321]:
initial_data = initial_data.drop(['player', 'team', 'game', 'pos', 'min', 'pFouls', 'steals', 'turnovers', 'blocks', 'plusMinus', 'comment'], axis = 1)
initial_data.dropna(inplace=True)
initial_data.reset_index(drop=True, inplace=True)

In [322]:
last_5_games_avg = initial_data.rolling(window=5).agg('mean')
last_5_games_avg = last_5_games_avg.add_prefix("last_5_avg_")
last_5_games_avg.dropna(inplace=True)
last_5_games_avg.reset_index(drop=True, inplace=True)
last_5_games_avg

Unnamed: 0,last_5_avg_points,last_5_avg_fgm,last_5_avg_fga,last_5_avg_fgp,last_5_avg_ftm,last_5_avg_fta,last_5_avg_ftp,last_5_avg_tpm,last_5_avg_tpa,last_5_avg_tpp,last_5_avg_offReb,last_5_avg_defReb,last_5_avg_totReb,last_5_avg_assists
0,14.2,4.2,9.6,43.52,3.8,4.6,82.48,2.0,6.0,34.44,0.2,3.6,3.8,4.0
1,19.0,6.0,12.2,48.44,4.0,4.4,90.48,3.0,7.4,40.44,0.4,4.0,4.4,4.4
2,23.2,7.6,14.8,49.86,4.0,4.4,90.48,4.0,8.8,42.74,0.8,3.8,4.6,3.8
3,25.8,8.8,16.8,52.36,4.0,4.6,87.14,4.2,9.8,40.74,0.8,4.0,4.8,3.6
4,26.0,9.0,17.2,52.54,3.4,4.0,85.00,4.6,10.0,44.30,0.8,2.4,3.2,4.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,30.8,11.0,20.0,55.38,3.4,3.8,74.28,5.4,11.2,49.28,1.0,5.0,6.0,5.8
96,29.2,9.6,18.2,52.86,5.2,5.2,80.00,4.8,10.2,47.80,1.0,5.6,6.6,7.4
97,28.6,9.6,19.4,48.98,4.8,4.8,80.00,4.6,10.2,45.58,1.8,6.0,7.8,7.2
98,27.2,8.8,19.0,44.48,5.2,5.4,76.00,4.4,10.6,41.44,1.8,6.0,7.8,8.6


In [323]:
new_df = initial_data.iloc[5:]
new_df.reset_index(drop=True, inplace=True)
new_df

Unnamed: 0,points,fgm,fga,fgp,ftm,fta,ftp,tpm,tpa,tpp,offReb,defReb,totReb,assists
0,32.0,11.0,19.0,57.9,4.0,4.0,100.0,6.0,12.0,50.0,1.0,4.0,5.0,5.0
1,35.0,12.0,21.0,57.1,3.0,3.0,100.0,8.0,13.0,61.5,2.0,2.0,4.0,2.0
2,26.0,9.0,18.0,50.0,5.0,6.0,83.3,3.0,10.0,30.0,0.0,3.0,3.0,4.0
3,23.0,8.0,19.0,42.1,3.0,4.0,75.0,4.0,10.0,40.0,1.0,0.0,1.0,8.0
4,28.0,9.0,17.0,52.9,5.0,5.0,100.0,5.0,8.0,62.5,0.0,1.0,1.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,28.0,11.0,22.0,50.0,0.0,0.0,0.0,6.0,11.0,54.5,1.0,5.0,6.0,10.0
95,32.0,7.0,17.0,41.2,14.0,14.0,100.0,4.0,11.0,36.4,2.0,8.0,10.0,11.0
96,26.0,8.0,19.0,42.1,5.0,5.0,100.0,5.0,9.0,55.6,5.0,8.0,13.0,6.0
97,14.0,4.0,13.0,30.8,4.0,5.0,80.0,2.0,9.0,22.2,1.0,4.0,5.0,10.0


In [324]:
if new_df.shape[0] > last_5_games_avg.shape[0]:
    new_df = new_df[:-1]
else:
    last_5_games_avg = last_5_games_avg[:-1]

In [325]:
new_df = new_df[['points', 'totReb', 'assists']]
df = pd.concat([last_5_games_avg, new_df], axis = 1)

In [326]:
X = df.iloc[:, :-3]
y = df.iloc[:, -3:]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [327]:
model = LinearRegression(fit_intercept=True)
model.fit(X_train, y_train)

In [330]:
y_pred = model.predict(X_test)

In [329]:
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R-squared:", r2_score(y_test, y_pred))

Mean Squared Error: 24.64426866445063
R-squared: -0.41705914305062763


In [339]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Feature scaling
    ('model', Ridge(alpha=2))    # Ridge Regression with regularization
])

pipeline.fit(X_train, y_train)

# Make predictions
prediction = pipeline.predict(X_test)
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R-squared:", r2_score(y_test, y_pred))

print("Mean Squared Error:", mean_squared_error(y_test, prediction))
print("R-squared:", r2_score(y_test, prediction))

Mean Squared Error: 24.64426866445063
R-squared: -0.41705914305062763
Mean Squared Error: 24.30083027810925
R-squared: -0.3070362438911792
