In [1]:
import sys
import os
sys.path.append('..')

In [2]:
import pickle
import numpy as np
import time
from libraries.data_separation import separate_data
from libraries.gridsearch import perform_GridSearchCV

In [3]:
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifierCV

# File Paths

In [8]:
Catch22_windowed_feature_dataset_path = '../dataset/feature_datasets/Catch24_feature_data_6_behaviours.pkl'
train_validation_test_split_information_data_path = '../dataset/information_datasets/train_validation_test_set_info_6_labels.pkl'
xgb_gs_outcomes_save_path = './results/Catch22/Catch22_xgb_gs_outcomes.pkl'
rf_gs_outcomes_save_path = './results/Catch22/Catch22_rf_gs_outcomes.pkl'
rcv_gs_outcomes_save_path = './results/Catch22/Catch22_rcv_gs_outcomes.pkl'

# Importing feature data

In [5]:
with open(Catch22_windowed_feature_dataset_path, 'rb') as f:
    feature_data = pickle.load(f)

# Import Train, Validation, Test set information

In [6]:
with open(train_validation_test_split_information_data_path, 'rb') as f:
    calf_split_info = pickle.load(f)

# Train / Test  data genration & Train / Validation set separation

In [7]:
# X_test and y_test won't be used
X_train, y_train, X_test, y_test, train_index_sets, vaildation_index_sets = separate_data(calf_split_info, 
                                                                                         feature_data)

# XGB

## GridSearch

In [None]:
%%time

# label encoding for XGB
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

# classifier
xgb_classifier = XGBClassifier()

# experimented params
xgb_params= {
        'n_estimators': [100, 200],
        'eta': [0, 0.5, 1],
        'gamma': [0, 5, 10],
        'max_depth': [0, None, 10],
        'class_weight': [None, 'balanced'] 
    },


# GridSearch
xgb_gridsearch_results = perform_GridSearchCV(xgb_classifier, xgb_params, X_train, y_train_encoded, 
                                          train_index_sets, vaildation_index_sets)

# Check if the save directory exists, and if not, create it
xgb_directory = os.path.dirname(xgb_gs_outcomes_save_path)

if not os.path.exists(xgb_directory):
    os.makedirs(xgb_directory)

# save data
with open(xgb_gs_outcomes_save_path, 'wb') as f:
    pickle.dump(xgb_gridsearch_results, f)

print('XGB GridSearch Complete!')

# RandomForest

## GridSearch

In [None]:
%%time

# classifier
rf_classifier = RandomForestClassifier()

# experimented params
rf_params= {
        'n_estimators': [100, 200],
        'max_depth': [None, 10],
        'min_samples_split': [2, 5],
        'max_features': [None, 'log2', 'sqrt'],
        'criterion': ['gini', 'entropy'],
        'class_weight': [None, 'balanced'] 
    },

# GridSearch
rf_gridsearch_results = perform_GridSearchCV(rf_classifier, rf_params, X_train, y_train, 
                                          train_index_sets, vaildation_index_sets)

# Check if the save directory exists, and if not, create it
rf_directory = os.path.dirname(rf_gs_outcomes_save_path)

if not os.path.exists(rf_directory):
    os.makedirs(rf_directory)

# save data
with open(rf_gs_outcomes_save_path, 'wb') as f:
    pickle.dump(rf_gridsearch_results, f)

print('Random Forest GridSearch Complete!')

# RidgeClassifierCV

## GridSearch

In [10]:
%%time

# classifier
rcv_classifier = RidgeClassifierCV()

# experimented params
rcv_params= {
        'fit_intercept': [True, False],
        'class_weight': [None, 'balanced'],
        'alphas': [np.logspace(-3,3,100), np.logspace(-1,10,100)]
    }

# GridSearch
rcv_gridsearch_results = perform_GridSearchCV(rcv_classifier, rcv_params, X_train, y_train, 
                                          train_index_sets, vaildation_index_sets)

# Check if the save directory exists, and if not, create it
rcv_directory = os.path.dirname(rcv_gs_outcomes_save_path)

if not os.path.exists(rcv_directory):
    os.makedirs(rcv_directory)

# save data
with open(rcv_gs_outcomes_save_path, 'wb') as f:
    pickle.dump(rcv_gridsearch_results, f)

print('RidgeClassifier GridSearch Complete!')

Fitting 10 folds for each of 8 candidates, totalling 80 fits
RidgeClassifier GridSearch Complete!
CPU times: user 21min 43s, sys: 1min 57s, total: 23min 41s
Wall time: 5min 12s
