In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

import os
import io
import sys
import ast
sys.path.append(os.path.join(os.getcwd(), '..','..'))
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from final_project.cnn.preprocess import generate_cnn_data, split_preprocess_cnn_data, preprocess_cnn_data
from final_project.rnn.model import build_train_rnn, full_rnn_pipeline
from final_project.cnn.evaluate import gridsearch_analysis
import random
import pickle
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import torch


from config import STANDARD_CAT_FEATURES, STANDARD_NUM_FEATURES, NUM_FEATURES_DICT


# Run Full Training

In [48]:
SEED = 444

random.seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)

DATA_DIR = os.path.join(os.getcwd(), '..', 'data', 'clean_data')

full_rnn_pipeline(DATA_DIR,
                season = ['2020-21', '2021-22'], 
                position = 'GK', 
                window_size=6,
                num_filters=64,
                num_dense=64,
                bidirectional=True,
                temporal_attention=True,
                batch_size = 32,
                epochs = 2000,  
                drop_low_playtime = True,
                low_playtime_cutoff = 1e-6,
                num_features = ['total_points', 'ict_index', 'clean_sheets', 'goals_conceded', 'bps', 'matchup_difficulty', 'goals_scored', 'assists', 'yellow_cards', 'red_cards'],
                cat_features = STANDARD_CAT_FEATURES, 
                stratify_by = 'stdev', 
                conv_activation = 'relu',
                dense_activation = 'relu',
                optimizer='adam',
                learning_rate= 0.00001,  
                loss = 'mse',
                metrics = ['mae'],
                verbose = True,
                regularization = 0.01, 
                early_stopping = True, 
                tolerance = 1e-5, # only used if early stopping is turned on, threshold to define low val loss decrease
                patience = 20,   # num of iterations before early stopping bc of low val loss decrease
                plot = True, 
                draw_model = False,
                standardize= True,)

Dropping Players with Avg. Playtime < 1e-06...

Total players of type GK = 163.
82 players dropped due to low average playtime.
Generated windowed dataframe for CNN of shape: (2502, 7).
Generated combined features dataframe for preprocessing of shape: (2988, 11).



=== Stratifying Split by : Stdev ===
Shape of windowed_df: (2502, 7)
Shape of a given window (prior to preprocessing): (6, 11)
stdev Distribution of Players:


Mean of Standard Scaler:
[ 1.91043017e+00  1.20188568e+00  1.49675899e-01  7.44843842e-01
  9.65822039e+00 -5.24454920e-02  0.00000000e+00  1.76782557e-03
  2.06246317e-02  1.17855038e-03]

Standard Deviation of Standard Scaler:
[ 2.71102698  1.51262882  0.35675345  1.18885945 10.44252919  1.38395817
  1.          0.04200834  0.14212409  0.03430979]
Transforming features using StandardScaler + OHE Pipeline.


Epoch 1/2000, Train Loss: 11.019338003100852, Val Loss: 12.111951635381498, Val MAE: 2.0854873657226562
Epoch 2/2000, Train Loss: 10.89229346194123, Val Loss: 1

KeyboardInterrupt: 

# GridSearch for Best Model

In [47]:
from final_project.rnn.experiment import gridsearch_rnn

gridsearch_rnn(experiment_name = "rnn_eval_big", verbose = True)

#PERFORMING VIA COMMAND LINE SCRIPT NOW FOR EFFICIENCY

===== Total Number of Iterations:  60


  0%|          | 0/60 [00:00<?, ?it/s]

===== Running Experiment for Parameters: =====
 {'seed': 444, 'position': 'GK', 'window_size': 3, 'num_dense': 64, 'tolerance': 0.0001, 'amt_num_features': 'large', 'stratify_by': 'stdev'}

seed 444
position GK
window_size 3
num_dense 64
tolerance 0.0001
amt_num_features large
stratify_by stdev
Running Iteration:  0
Dropping Players with Avg. Playtime < 1e-06...

Total players of type GK = 163.
82 players dropped due to low average playtime.
Generated windowed dataframe for CNN of shape: (2745, 7).
Generated combined features dataframe for preprocessing of shape: (2988, 9).



=== Stratifying Split by : Stdev ===
Shape of windowed_df: (2745, 7)
Shape of a given window (prior to preprocessing): (3, 9)
stdev Distribution of Players:


Mean of Standard Scaler:
[1.91043017e+00 4.76146140e+01 0.00000000e+00 1.76782557e-03
 1.49675899e-01 9.65822039e+00 2.06246317e-02 1.17855038e-03]

Standard Deviation of Standard Scaler:
[2.71102698e+00 4.48347714e+01 1.00000000e+00 4.20083369e-02
 3.56753

  0%|          | 0/60 [00:08<?, ?it/s]

Epoch 16/2000, Train Loss: 9.22098416652031, Val Loss: 10.457892734744433, Val MAE: 1.8766132593154907





KeyboardInterrupt: 