In [2]:
%load_ext autoreload
%autoreload 2
import sys
import os
dir_ = os.path.dirname(os.getcwd())
if dir_ not in sys.path:
    sys.path.append(dir_)
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from comet_ml import Experiment
from comet_ml import API

from sklearn.model_selection import train_test_split
sns.set_theme()

from ift6758.models.utils import preprocess, predict_model,save_metrics_and_models_on_comet,compute_metrics
from ift6758.models.plotter import *
from dotenv import load_dotenv

from sklearn.neural_network import MLPClassifier
from sklearn.utils import resample , shuffle

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
url = 'https://drive.google.com/file/d/1kM__riNHRPx5GsyuOH3yhiql3OZvwmuP/view?usp=sharing'
path = 'https://drive.google.com/uc?export=download&id='+url.split('/')[-2]
df = pd.read_csv(path)

In [4]:
df_train = df[df['season']!=20192020]

In [5]:
list_features = ['empty_net', 'periodTime','period', 'x_coord', 'y_coord','distance','angle','shot_type',\
    'last_event_type', 'last_x_coord', 'last_y_coord','distance_from_last', 'seconds_since_last', \
        'rebound', 'angle_change','speed']
# preprocess
X, Y ,df_train_preprocessed,_ =  preprocess(df_train,features = list_features, standarize=True)

### Oversampling

In [93]:
X_train, X_val, y_train, y_val = train_test_split(X,Y,random_state = 42, shuffle=True)
X_oversampled, y_oversampled = resample(X_train[y_train == 1],
                                        y_train[y_train == 1],
                                        replace=True,
                                        n_samples=X_train[y_train == 0].shape[0],
                                        random_state=42)

X_train = np.vstack((X_train[y_train == 0], X_oversampled))
y_train = np.hstack((y_train[y_train == 0], y_oversampled))

### Undersampling

In [77]:
X_train, X_val, y_train, y_val = train_test_split(X,Y,random_state = 42, shuffle=True)
X_undersampled, y_undersampled = resample(X_train[y_train == 0],
                                          y_train[y_train == 0],
                                          replace=True,
                                          n_samples=X_train[y_train == 1].shape[0],
                                           random_state=42)
X_train = np.vstack((X_train[y_train == 1], X_undersampled))
y_train = np.hstack((y_train[y_train == 1], y_undersampled))
X_train, y_train = shuffle(X_train,y_train,random_state=42)

### SMOTE

In [6]:
from imblearn.over_sampling import SMOTE

X_train, X_val, y_train, y_val = train_test_split(X,Y,random_state = 42, shuffle=True)

print("Before OverSampling, counts of label '1': {}".format(sum(y_train==1)))
print("Before OverSampling, counts of label '0': {} \n".format(sum(y_train==0)))

sm = SMOTE(random_state=42)
X_train, y_train = sm.fit_resample(X_train, y_train.ravel())

print('After OverSampling, the shape of train_X: {}'.format(X_train.shape))
print('After OverSampling, the shape of train_y: {} \n'.format(y_train.shape))

print("After OverSampling, counts of label '1': {}".format(sum(y_train==1)))
print("After OverSampling, counts of label '0': {}".format(sum(y_train==0)))

Before OverSampling, counts of label '1': 22642
Before OverSampling, counts of label '0': 218464 

After OverSampling, the shape of train_X: (436928, 30)
After OverSampling, the shape of train_y: (436928,) 

After OverSampling, counts of label '1': 218464
After OverSampling, counts of label '0': 218464


### MLP

In [7]:
model = MLPClassifier(hidden_layer_sizes=(100,50,50,20),learning_rate_init=0.01,warm_start=True,random_state=1, max_iter=200,verbose=True)
model.fit(X_train, y_train)

Iteration 1, loss = 0.55581045
Iteration 2, loss = 0.53803718
Iteration 3, loss = 0.52972295
Iteration 4, loss = 0.52076837
Iteration 5, loss = 0.51124708
Iteration 6, loss = 0.50306970
Iteration 7, loss = 0.49796981
Iteration 8, loss = 0.49007892
Iteration 9, loss = 0.48294281
Iteration 10, loss = 0.47685355
Iteration 11, loss = 0.47117544
Iteration 12, loss = 0.46493839
Iteration 13, loss = 0.46118383
Iteration 14, loss = 0.45893813
Iteration 15, loss = 0.45455045
Iteration 16, loss = 0.45195840
Iteration 17, loss = 0.45099783
Iteration 18, loss = 0.44819766
Iteration 19, loss = 0.44694525
Iteration 20, loss = 0.44445247
Iteration 21, loss = 0.44327139
Iteration 22, loss = 0.44128827
Iteration 23, loss = 0.43960387
Iteration 24, loss = 0.43833623
Iteration 25, loss = 0.43773168
Iteration 26, loss = 0.43694846
Iteration 27, loss = 0.43583890
Iteration 28, loss = 0.43493959
Iteration 29, loss = 0.43344150
Iteration 30, loss = 0.43188635
Iteration 31, loss = 0.43073554
Iteration 32, los



In [10]:
y_val_prob = model.predict_proba(X_val)
y_val_pred = model.predict(X_val)

df_metrics_results = compute_metrics(y_val,[y_val_pred],model_names=['NN'])
df_metrics_results

Unnamed: 0,model_name,Accuracy,Recall,Precision,f_score
0,NN,0.757966,0.642756,0.565883,0.566772


In [9]:
name_experiment = "question6_SMOTE_NeuralNet"
model_dir = "NN"

save_metrics_and_models_on_comet(model,y_val,y_val_pred,y_val_prob[:,1],model_names=name_experiment,model_dir=model_dir,name_experiment=name_experiment,register_model = True ,sklearn_model=False)

COMET INFO: Experiment is live on comet.com https://www.comet.com/princesslove/itf-6758-team-4/770a47028599496998454370f581e427



AttributeError: 'MLPClassifier' object has no attribute 'save_model'