# Évaluer sur l'ensemble de test (10 %) 

In [61]:
from comet_ml import API
import joblib

In [62]:
from dotenv import load_dotenv
import os
import pandas as pd
import numpy as np
from plots import * 

### Loading models

In [63]:
load_dotenv(r".env")
comet_api_key = os.environ.get('COMET_API_KEY')

In [115]:
api = API(api_key=comet_api_key)

#get the Model object
model_reg_logist_1 = api.get_model(workspace="ift6758-a02", model_name='reg_logistique_angle')
model_reg_logist_2 = api.get_model(workspace="ift6758-a02", model_name='reg_logistique-distance')
model_reg_logist_3 = api.get_model(workspace="ift6758-a02", model_name='reg_logistique_dist_angle')
model_xgb = api.get_model(workspace="ift6758-a02", model_name='best-xgboost-model')
model_forest = api.get_model(workspace="ift6758-a02", model_name='mod-le-for-t-al-atoire-original')

#Download a Registry Model:
model_reg_logist_1.download("1.0.0", expand=True)
model_reg_logist_2.download("1.0.0", expand=True)
model_reg_logist_3.download("1.0.0", expand=True)
model_xgb.download("1.0.0", expand=True)
model_forest.download("1.0.0", expand=True)

In [117]:
loaded_model_reg_logist_1 = joblib.load('None/logistic_angle.pkl')
loaded_model_reg_logist_2 = joblib.load('None/logistic_distance.pkl')
loaded_model_reg_logist_3 = joblib.load('None/logistic_distance_angle.pkl')
loaded_model_xgb = joblib.load('None/best_XGBoost_all_features.json')
loaded_model_forest = joblib.load('None/random_forest_origin.pkl')

### Loading data

In [118]:
data_path = os.environ.get('DATA_DIR')

In [119]:
test_data_path = data_path+"\\nhl_data_2020_previous_v2_0.csv"
test_data=pd.read_csv(test_data_path)

In [120]:
test_data.sample(3)

Unnamed: 0,period,periodTimeInSeconds,isGoal,typeDeTir,x,y,distanceToNet,relativeAngleToNet,previousEventTypeId,previousX,previousY,distanceFromPrevious,timeDiff,rebond,angleChange,vitesse,season
25869,1,250,0,Backhand,40.0,31.0,58.830264,13.412358,FACEOFF,0.0,0.0,50.61,29,False,0.0,1.745172,2020
39811,3,934,0,Wrist Shot,-88.0,-5.0,5.385165,-68.198591,SHOT,-83.0,-4.0,5.1,1,True,38.453709,5.1,2020
42110,4,79,1,Wrist Shot,59.0,10.0,32.572995,3.839596,PERIOD_END,,,,0,False,0.0,,2020


In [121]:
test_data.dropna(inplace=True)
test_data = test_data.replace([np.inf, -np.inf], np.nan).dropna()

In [122]:
dummy_object = pd.get_dummies(test_data[['typeDeTir', 'previousEventTypeId']])
data = test_data.merge(dummy_object, left_index=True, right_index=True)
test_data_fin = data.drop(labels = ['typeDeTir', 'previousEventTypeId'], axis = 1)

In [130]:
X_test_angle = test_data_fin[['relativeAngleToNet']]
X_test_dist = test_data_fin[['distanceToNet']]
X_test_dist_angle = test_data_fin[['distanceToNet','relativeAngleToNet']]
X_test_total = test_data_fin.drop('isGoal', axis=1) 
y_test = test_data_fin['isGoal']

In [132]:
y_pred_reg_log_1 = loaded_model_reg_logist_1.predict(X_test_angle.values.reshape(-1,1))
y_pred_reg_log_2 = loaded_model_reg_logist_2.predict(X_test_dist.values.reshape(-1,1))
y_pred_reg_log_3 = loaded_model_reg_logist_3.predict(X_test_dist_angle.values.reshape(-1,2))
y_pred_xgb = loaded_model_xgb.predict(X_test_total)



In [133]:
print(y_pred_reg_log_1)
print(y_pred_reg_log_2)
print(y_pred_reg_log_3)
print(y_pred_xgb)

[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]


In [None]:
models = [loaded_model_reg_logist_1, loaded_model_reg_logist_2, loaded_model_reg_logist_3, loaded_model_xgb, None]
X_list = [X_test_angle, X_test_dist, X_test_dist_angle, X_test_total, None]
y_list = [y_test, y_test, y_test, y_test, None]
nom_models = ['reg_logistique_angle', 'reg_logistique-distance', 'reg_logistique_dist_angle', 'best-xgboost-model', 'Random Model' ]

plot_roc_auc(models, X_list, y_list, nom_models)

In [None]:
plot_taux_buts_par_centile(models, X_list, y_list, nom_models)

In [None]:
cumulative_goal_rate(models, X_list, y_list, nom_models)

In [None]:
plot_calibration_curve(models, X_list, y_list, nom_models)

# New Section Saison éliminatoire

In [None]:
#essaie 2
test_data = pd.read_csv('/content/data/nhl_2020_prevELIMINATOIRE.csv')
adv_data = pd.read_csv('/content/drive/MyDrive/NHL2/nhl_2020_adv.csv')

#clean up de df
test_data.dropna(inplace=True)
test_data = test_data.replace([np.inf, -np.inf], np.nan).dropna()
dummy_object = pd.get_dummies(test_data[['typeDeTir', 'previousEventTypeId']])
data = test_data.merge(dummy_object, left_index=True, right_index=True)
test_data_fin = data.drop(labels = ['typeDeTir', 'previousEventTypeId'], axis = 1)

#clean up de df
adv_data.dropna(inplace=True)
adv_data = adv_data.replace([np.inf, -np.inf], np.nan).dropna()
dummy_object = pd.get_dummies(adv_data[['typeDeTir', 'previousEventTypeId']])
data = adv_data.merge(dummy_object, left_index=True, right_index=True)
adv_data_fin = data.drop(labels = ['typeDeTir', 'previousEventTypeId'], axis = 1)

#création d'ensemble de test
X_test_angle = test_data_fin[['relativeAngleToNet']]
X_test_dist = test_data_fin[['distanceToNet']]
X_test_dist_angle = test_data_fin[['distanceToNet','relativeAngleToNet']]
X_test_total = test_data_fin.drop('isGoal', axis=1)
y_test = test_data_fin['isGoal']
X_test_adv = adv_data_fin.drop('isGoal', axis=1) 
y_test_adv = adv_data_fin['isGoal']

#loading de model
y_pred_reg_log_1 = loaded_model_reg_logist_1.predict(X_test_angle.values.reshape(-1,1))
y_pred_reg_log_2 = loaded_model_reg_logist_2.predict(X_test_dist.values.reshape(-1,1))
y_pred_reg_log_3 = loaded_model_reg_logist_3.predict(X_test_dist_angle.values.reshape(-1,2))
y_pred_xgb = loaded_model_xgb.predict(X_test_total)
y_pred_forest = loaded_model_forest.predict(X_test_adv)

models = [loaded_model_reg_logist_1, loaded_model_reg_logist_2, loaded_model_reg_logist_3, loaded_model_xgb, loaded_model_forest, None]
X_list = [X_test_angle, X_test_dist, X_test_dist_angle, X_test_total, X_test_adv, None]
y_list = [y_test, y_test, y_test, y_test, y_test_adv, None]
nom_models = ['reg_logistique_angle', 'reg_logistique-distance', 'reg_logistique_dist_angle', 'best-xgboost-model', 'Forest Model' , 'Random Model' ]

plot_roc_auc(models, X_list, y_list, nom_models)

In [None]:
plot_taux_buts_par_centile(models, X_list, y_list, nom_models)

In [None]:
cumulative_goal_rate(models, X_list, y_list, nom_models)

In [None]:
plot_calibration_curve(models, X_list, y_list, nom_models)