In [1]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd


# Suppress warnings
import warnings 
warnings.filterwarnings('ignore')

# Display settings
pd.set_option('display.max_columns', None)
from IPython.display import display

In [2]:
df_x=pd.read_csv('parsed_data/podaci_za_predvidjanje.csv')
df_y=pd.read_csv('parsed_data/predvidjanja.csv')

df_x.sort_values(by=['game_id'], inplace=True)
df_y.sort_values(by=['game_id'], inplace=True)

df_x.drop(['game_id'], axis=1, inplace=True)
df_x.drop(['team_id'], axis=1, inplace=True)
df_x.drop(['opponent'], axis=1, inplace=True)
df_y.drop(['game_id'], axis=1, inplace=True)

display(df_x.head(), df_y.head())

Unnamed: 0,points_scored,fast_break_points_for,fast_break_points_against,turnover_points_for,turnover_points_against,second_chance_points_for,second_chance_points_against,defensive_rebounds_for,defensive_rebounds_against,offensive_rebounds_for,offensive_rebounds_against,turnovers_for,turnovers_against,steals_for,steals_against,assists_for,assists_against,3P%_for,3P%_against,2P%_for,2P%_against,FT%_for,PFR_for,PFD_against,3PR_for,3PR_against,FTR_for,FTR_against,PACE,ORtg,DRtg,eFG%,TS%
0,89,1,0,10,13,12,20,19,20,7,17,9,7,2,4,21,14,37.745569,37.745569,51.864171,51.864171,75.662177,29.706255,29.436574,36.961091,36.961091,32.024197,32.024197,71.726654,110.499731,110.499731,53.592021,57.54507
1,86,0,1,13,10,20,12,20,19,17,7,7,9,4,2,14,21,37.745569,37.745569,51.864171,51.864171,75.662177,29.706255,29.436574,36.961091,36.961091,32.024197,32.024197,71.726654,110.499731,110.499731,53.592021,57.54507
2,80,2,4,30,16,7,14,19,29,7,13,14,19,11,8,16,18,34.285714,47.368421,56.666667,51.219512,76.923077,26.455026,23.809524,53.846154,31.666667,20.0,28.333333,75.6,105.820106,107.142857,53.846154,56.561086
3,81,4,2,16,30,14,7,29,19,13,7,19,14,8,11,18,16,47.368421,34.285714,51.219512,56.666667,70.588235,25.132275,26.455026,31.666667,53.846154,28.333333,20.0,75.6,107.142857,105.820106,57.5,60.017783
4,82,7,11,13,14,9,15,25,24,10,13,12,9,5,8,17,20,37.745569,37.745569,51.864171,51.864171,75.662177,29.706255,29.436574,36.961091,36.961091,32.024197,32.024197,71.726654,110.499731,110.499731,53.592021,57.54507


Unnamed: 0,score
0,89
1,86
2,80
3,81
4,82


In [3]:
X_train, X_temp, y_train, y_temp = train_test_split(df_x, df_y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [4]:
rf_model = RandomForestRegressor(n_estimators=200, random_state=42)

In [5]:
rf_model.fit(X_train, y_train)

In [6]:
y_pred = rf_model.predict(X_test)

In [7]:
mse = mean_squared_error(y_test, y_pred)

In [8]:
r2=r2_score(y_test, y_pred)

In [9]:
y_val_pred = rf_model.predict(X_val)

In [10]:
mse_val = mean_squared_error(y_val, y_val_pred)

In [11]:
r2_val=r2_score(y_val, y_val_pred)

In [12]:
print(f'Mean Squared Error test: {mse}')
print(f'R2 test: {r2}')
print(f'Mean Squared Error validation: {mse_val}')
print(f'R2 validation: {r2_val}')

Mean Squared Error test: 0.10861696428571438
R2 test: 0.9991891033063707
Mean Squared Error validation: 0.1412893884892085
R2 validation: 0.9985183156543809


In [13]:
feature_importances = pd.DataFrame(rf_model.feature_importances_,
                                   index=df_x.columns,
                                   columns=['importance']).sort_values('importance', ascending=False)

print("\nFeature Importances:")
print(feature_importances)


Feature Importances:
                              importance
points_scored                   0.999229
3P%_for                         0.000060
second_chance_points_against    0.000053
FTR_against                     0.000044
FTR_for                         0.000040
assists_against                 0.000038
ORtg                            0.000036
PACE                            0.000036
offensive_rebounds_for          0.000036
2P%_against                     0.000032
defensive_rebounds_for          0.000032
fast_break_points_for           0.000031
second_chance_points_for        0.000029
DRtg                            0.000028
offensive_rebounds_against      0.000028
fast_break_points_against       0.000027
assists_for                     0.000023
steals_for                      0.000022
PFD_against                     0.000021
2P%_for                         0.000019
turnovers_against               0.000018
3PR_for                         0.000018
defensive_rebounds_against      0.0