In [None]:
import sys
import os
sys.path.append('..')

In [None]:
import pickle
import numpy as np
import time
from libraries.functions import combine_feature_data
from libraries.data_separation import separate_data
from libraries.evaluation import evaluate_model

In [None]:
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifierCV

# File Paths

In [None]:
ROCKET_windowed_feature_dataset_path = '../dataset/feature_datasets/ROCKET_feature_data_6_behaviours.pkl'
train_validation_test_split_information_data_path = '../dataset/information_datasets/train_validation_test_set_info_6_labels.pkl'

# GridSearch Outcomes load paths
xgb_gs_outcomes_load_path = '../hyperparameter_optimization/results/ROCKET/rocket_xgb_gs_outcomes.pkl'
rf_gs_outcomes_load_path = '../hyperparameter_optimization/results/ROCKET/rocket_rf_gs_outcomes.pkl'
rcv_gs_outcomes_load_path = '../hyperparameter_optimization/results/ROCKET/rocket_rcv_gs_outcomes.pkl'

# Evaluation results save paths
xgb_test_results_save_path = './results/ROCKET/rocket_xgb_test_results.pkl'
rf_test_results_save_path = './results/ROCKET/rocket_rf_test_results.pkl'
rcv_test_results_save_path = './results/ROCKET/rocket_rcv_test_results.pkl'

# Importing Feature Data

In [None]:
with open(ROCKET_windowed_feature_dataset_path, 'rb') as f:
    feature_data = pickle.load(f)

# Getting Train / Test Data

In [None]:
with open(train_validation_test_split_information_data_path, 'rb') as f:
    calf_split_info = pickle.load(f)

In [None]:
# Validation index sets won't be used
X_train, y_train, X_test, y_test, train_index_sets, vaildation_index_sets = separate_data(calf_split_info, 
                                                                                         feature_data)

# Best XGB Model Testing

## Improting GridSearch Results

In [None]:
with open(xgb_gs_outcomes_load_path, 'rb') as f:
    xgb_gs_outcomes = pickle.load(f)

## Model Testing

In [None]:
# best performing model
best_xgb_params = xgb_gs_outcomes['best_params']

# model training
train_start_time = time.time()

# label encoding for XGB
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

best_xgbClassifier = XGBClassifier(**best_xgb_params)
best_xgbClassifier.fit(X_train, y_train_encoded)

train_stop_time = time.time()

# model testing
test_start_time = time.time()

xgb_y_pred = best_xgbClassifier.predict(X_test)

test_stop_time = time.time()

# decode the y_pred to evaluate
xgb_y_pred_decoded = label_encoder.inverse_transform(xgb_y_pred)

# evaluate the test data
xgb_eval_results = evaluate_model(y_test, xgb_y_pred_decoded)

# time durations
train_time_seconds = train_stop_time - train_start_time
test_time_seconds = test_stop_time - test_start_time

time_results = {
    'train_time' : train_time_seconds,
    'test_time' : test_time_seconds
}

# save data
xgb_results = {
    'test_results': xgb_eval_results,
    'time_results': time_results
}

xgb_directory = os.path.dirname(xgb_test_results_save_path)

# Check if the directory exists, and if not, create it
if not os.path.exists(xgb_directory):
    os.makedirs(xgb_directory)

with open(xgb_test_results_save_path, 'wb') as f:
    pickle.dump(xgb_results, f)

# Best RandomForest Model Testing

## Importing GridSearch Results

In [None]:
with open(rf_gs_outcomes_load_path, 'rb') as f:
    rf_gs_outcomes = pickle.load(f)

## Model Testing

In [None]:
# best performing model
best_rf_params = rf_gs_outcomes['best_params']

# model training
train_start_time = time.time()

best_rfClassifier = RandomForestClassifier(**best_rf_params)
best_rfClassifier.fit(X_train, y_train)

train_stop_time = time.time()

# model testing
test_start_time = time.time()

rf_y_pred = best_rfClassifier.predict(X_test)

test_stop_time = time.time()

# evaluate the test data
rf_eval_results = evaluate_model(y_test, rf_y_pred)

# time durations
train_time_seconds = train_stop_time - train_start_time
test_time_seconds = test_stop_time - test_start_time

time_results = {
    'train_time' : train_time_seconds,
    'test_time' : test_time_seconds
}

# save data
rf_results = {
    'test_results': rf_eval_results,
    'time_results': time_results
}

rf_directory = os.path.dirname(rf_test_results_save_path)

# Check if the directory exists, and if not, create it
if not os.path.exists(rf_directory):
    os.makedirs(rf_directory)

with open(rf_test_results_save_path, 'wb') as f:
    pickle.dump(rf_results, f)

# Best RidgeClassifierCV Model Testing

## Importing GridSearch results

In [None]:
with open(rcv_gs_outcomes_load_path, 'rb') as f:
    rcv_gs_outcomes = pickle.load(f)

## Model Testing

In [None]:
# best performing model
best_rcv_params = rcv_gs_outcomes['best_params']

# model training
train_start_time = time.time()

best_ridgeclassifiercv = RidgeClassifierCV(**best_rcv_params)
best_ridgeclassifiercv.fit(X_train, y_train)

train_stop_time = time.time()

# model testing
test_start_time = time.time()

rcv_y_pred = best_ridgeclassifiercv.predict(X_test)

test_stop_time = time.time()

# evaluate the test data
rcv_eval_results = evaluate_model(y_test, rcv_y_pred)

# time durations
train_time_seconds = train_stop_time - train_start_time
test_time_seconds = test_stop_time - test_start_time

time_results = {
    'train_time' : train_time_seconds,
    'test_time' : test_time_seconds
}

# save data
rcv_results = {
    'test_results': rcv_eval_results,
    'time_results': time_results
}

rcv_directory = os.path.dirname(rcv_test_results_save_path)

# Check if the directory exists, and if not, create it
if not os.path.exists(rcv_directory):
    os.makedirs(rcv_directory)

with open(rcv_test_results_save_path, 'wb') as f:
    pickle.dump(rcv_results, f)