In [1]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd


# Suppress warnings
import warnings 
warnings.filterwarnings('ignore')

# Display settings
pd.set_option('display.max_columns', None)
from IPython.display import display

In [2]:
df_x=pd.read_csv('parsed_data/podaci_za_predvidjanje.csv')
df_y=pd.read_csv('parsed_data/predvidjanja.csv')

df_x.sort_values(by=['game_id'], inplace=True)
df_y.sort_values(by=['game_id'], inplace=True)

df_x.drop(['game_id'], axis=1, inplace=True)
df_x.drop(['team_id'], axis=1, inplace=True)
df_x.drop(['opponent'], axis=1, inplace=True)
df_x.drop(['opponent_previous_game'], axis=1, inplace=True)
df_y.drop(['game_id'], axis=1, inplace=True)

display(df_x.head(), df_y.head())

Unnamed: 0,points_scored,fast_break_points_for,fast_break_points_against,turnover_points_for,turnover_points_against,second_chance_points_for,second_chance_points_against,defensive_rebounds_for,defensive_rebounds_against,offensive_rebounds_for,offensive_rebounds_against,turnovers_for,turnovers_against,steals_for,steals_against,assists_for,assists_against,3P%_for,3P%_against,2P%_for,2P%_against,FT%_for,PFR_for,PFD_against,3PR_for,3PR_against,FTR_for,FTR_against,PACE,ORtg,DRtg,eFG%,TS%,opponent_steals_for,opponent_steals_against,opponent_defensive_rebounds_for,opponent_defensive_rebounds_against,opponent_offensive_rebounds_for,opponent_offensive_rebounds_against,opponent_turnovers_for,opponent_turnovers_against,opponent_PFD_for,opponent_PFD_against,opponent_PACE,opponent_ORtg,opponent_DRtg
0,66,2,5,7,4,2,10,28,26,7,7,11,9,5,5,17,16,37.745569,37.745569,51.864171,51.864171,75.662177,29.706255,29.436574,36.961091,36.961091,32.024197,32.024197,71.726654,110.499731,110.499731,53.592021,57.54507,3.0,10.0,27.0,22.0,12.0,12.0,15.0,11.0,29.197324,29.710963,71.904855,111.23213,110.718735
1,63,2,4,11,13,14,15,27,22,12,12,15,11,3,10,17,20,37.745569,37.745569,51.864171,51.864171,75.662177,29.706255,29.436574,36.961091,36.961091,32.024197,32.024197,71.726654,110.499731,110.499731,53.592021,57.54507,5.0,5.0,28.0,26.0,7.0,7.0,11.0,9.0,24.195844,29.888984,70.26,105.323086,111.016225
2,73,13,4,18,11,10,15,23,24,6,9,11,16,9,4,12,17,34.285714,47.368421,56.666667,51.219512,76.923077,26.455026,23.809524,53.846154,31.666667,20.0,28.333333,75.6,105.820106,107.142857,53.846154,56.561086,6.0,3.0,21.0,22.0,13.0,11.0,9.0,12.0,23.535565,20.920502,76.48,91.527197,108.525105
3,99,6,4,14,10,22,16,21,22,13,11,9,12,6,3,29,15,47.368421,34.285714,51.219512,56.666667,70.588235,25.132275,26.455026,31.666667,53.846154,28.333333,20.0,75.6,107.142857,105.820106,57.5,60.017783,9.0,4.0,23.0,24.0,6.0,9.0,11.0,16.0,29.197324,29.710963,71.904855,111.23213,110.718735
4,97,4,6,10,14,16,22,22,21,11,13,12,9,3,6,15,29,37.745569,37.745569,51.864171,51.864171,75.662177,29.706255,29.436574,36.961091,36.961091,32.024197,32.024197,71.726654,110.499731,110.499731,53.592021,57.54507,6.0,4.0,29.0,25.0,10.0,10.0,10.0,12.0,29.197324,29.710963,71.904855,111.23213,110.718735


Unnamed: 0,score
0,89
1,86
2,80
3,81
4,82


In [3]:
X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=42)

In [4]:
rf_model = RandomForestRegressor(n_estimators=2000, random_state=42)

In [5]:
rf_model.fit(X_train, y_train)

In [6]:
y_pred = rf_model.predict(X_test)

In [7]:
mse = mean_squared_error(y_test, y_pred)

In [8]:
r2=r2_score(y_test, y_pred)

In [9]:
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

Mean Squared Error: 108.82874940591398
R-squared: 0.08787457269321297


In [10]:
feature_importances = pd.DataFrame(rf_model.feature_importances_,
                                   index=df_x.columns,
                                   columns=['importance']).sort_values('importance', ascending=False)

print("\nFeature Importances:")
print(feature_importances)


Feature Importances:
                                     importance
ORtg                                   0.080212
opponent_defensive_rebounds_against    0.040329
turnover_points_against                0.034770
points_scored                          0.034033
opponent_turnovers_against             0.033681
assists_for                            0.033205
opponent_turnovers_for                 0.032286
second_chance_points_for               0.031790
second_chance_points_against           0.031251
turnovers_against                      0.031167
defensive_rebounds_against             0.030905
opponent_defensive_rebounds_for        0.030847
opponent_offensive_rebounds_against    0.030766
opponent_offensive_rebounds_for        0.030441
assists_against                        0.030074
fast_break_points_against              0.029941
turnover_points_for                    0.029309
offensive_rebounds_against             0.028948
defensive_rebounds_for                 0.028408
fast_break_points_