# Tuning Script

In [4]:
# Parameters subject to change in every script - involved in naming

MODEL = 'svc(rbf)'
FEATURE_IMPORTANCE_ORDERING = 'xgb'
PREPROCESSING = 'Baseline'
TUNER = 'jiaochengb'
FEATURE_ENGINEERING = 'tfidf'

---

In [5]:
# detect whether this is a google environment

COLAB_ENVIRONMENT = False

try:
    from google.colab import drive
    drive.mount('/content/drive')
    COLAB_ENVIRONMENT = True
except:
    pass

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
import sys
import os
import pickle
from scipy.sparse import load_npz

if COLAB_ENVIRONMENT:
    py_file_location = "./drive/MyDrive/LAB/COMP90051-A1__Groupwork__Py/PrivatePackages" # my private packages are stored here
    home_directory = './drive/MyDrive/LAB/COMP90051-A1__Groupwork__Py/' # my home directory is stored in ./LAB of google drive
    if MODEL == 'ebc':
        !pip install interpret==0.5.0
    elif MODEL == 'cbc':
        !pip install catboost
else:
    py_file_location = '../../PrivatePackages'
    home_directory = '../../'

sys.path.append(os.path.abspath(py_file_location))

from Tuners import JiaoChengB

import pandas as pd

In [7]:
## Parameters that are same for every script

SEED = 2608

TASK_TYPE = 'Classification' # this task is a classification task

TUNE_FEATURES_AS_HYPERPARAMETERS = True # we have decided to tune features as hp

---

In [8]:
# model to use to tune
from sklearn.svm import SVC as clf

# what values to try for each hyperparameter
parameter_choices = {
    'C': (0.0001, 0.001, 0.01, 0.1, 1, 10),
    'tol': (0.0001, 0.001, 0.01, 0.1, 1, 10),
    'max_iter': (50, 100, 200, 400, 800, 1600),
    'gamma': ('scale', 'auto')
}

# what values to set non-tuneable parameters/hyperparameters
non_tunable_hyperparameters_dict = {
                                    'verbose': False,
                                    'decision_function_shape': 'ovr',
                                    'random_state': 42,
                                    'kernel': 'rbf'
                                    }

tuning_order = [
    'features',
    'C',
    'max_iter',
    'gamma',
    'tol',
]

default_hyperparameter_values = {'C': 1, 'tol': 0.001, 'max_iter': 800, 'gamma': 'auto', 'features': 0}

---

Read in and Prepare Data

In [9]:
train_x = load_npz(home_directory + f'data/curated/baseline/train_{FEATURE_ENGINEERING}_x.npz')
val_x = load_npz(home_directory + f'data/curated/baseline/val_{FEATURE_ENGINEERING}_x.npz')
test_x = load_npz(home_directory + f'data/curated/baseline/test_{FEATURE_ENGINEERING}_x.npz')

with open(home_directory + 'data/curated/baseline/train_y.pkl', 'rb') as f:
    train_y = pickle.load(f)
with open(home_directory + 'data/curated/baseline/val_y.pkl', 'rb') as f:
    val_y = pickle.load(f)
with open(home_directory + 'data/curated/baseline/test_y.pkl', 'rb') as f:
    test_y = pickle.load(f)

In [10]:
# Import feature importance ordering

with open(f'{home_directory}models/{FEATURE_IMPORTANCE_ORDERING}_{FEATURE_ENGINEERING}_feature_importance_ordering_{PREPROCESSING}.pickle', 'rb') as f:
    feature_importance_ordering = pickle.load(f)

Set parameters

In [11]:
# initialisation
tuner = JiaoChengB.JiaoChengB()

print('---')

# define what model we are tuning
tuner.read_in_model(clf, TASK_TYPE)

print('---')

# read in the data for training and validation
tuner.read_in_data(train_x, train_y, val_x, val_y, test_x, test_y)

print('---')

# set what hp values to tune
tuner.set_hyperparameters(parameter_choices)
# WARNING: this may take a while if no. tuneable hyperparameters are large

print('---')

# set up hp values that need to be changed from default but NOT to be tuned
tuner.set_non_tuneable_hyperparameters(non_tunable_hyperparameters_dict)

print('---')

# set up feature importance ordering

if TUNE_FEATURES_AS_HYPERPARAMETERS:
  tuner.set_features(feature_importance_ordering)
  # WARNING: this may take a while if no. tuneable hyperparameters are large

print('---')

# set up the order of hyperparameters when iteratively tuning using JiaoCheng
tuner.set_tuning_order(tuning_order)

print('---')

# set up the default hp values for first iteration of tuning JiaoCheng
tuner.set_hyperparameter_default_values(default_hyperparameter_values)

print('---')

try: # try to read in previous results to continue tuning ...
  tuner.read_in_tuning_result_df(f'{home_directory}models/tuning/{TUNER}_{MODEL}_{FEATURE_IMPORTANCE_ORDERING}_{FEATURE_ENGINEERING}_{PREPROCESSING}.csv')
except:
  print('No previous tuning data read in')

print('---')

# set up where to save the tuning result csv
tuner.set_tuning_result_saving_address(f'{home_directory}models/tuning/{TUNER}_{MODEL}_{FEATURE_IMPORTANCE_ORDERING}_{FEATURE_ENGINEERING}_{PREPROCESSING}.csv')

print('---')

# set up where to save the current best model
tuner.set_best_model_saving_address(f'{home_directory}models/tmp_models/{TUNER}_{MODEL}_{FEATURE_IMPORTANCE_ORDERING}_{FEATURE_ENGINEERING}_{PREPROCESSING}')

JiaoCheng Initialised
---
Successfully read in model <class 'sklearn.svm._classes.SVC'>, which is a Classification model
---
Read in Train X data
Read in Train y data
Read in Val X data
Read in Val y data
Read in Test X data
Read in Test y data
---
Successfully recorded hyperparameter choices
---
Successfully recorded non_tuneable_hyperparameter choices
---
Successfully recorded tuneable feature combination choices and updated relevant internal structures
---
---
---
No previous tuning data read in
---
Successfully set tuning output address
---
Successfully set best model output address


In [12]:
# ignore warnings

import warnings
warnings.filterwarnings("ignore")

In [13]:
# begin tuning ... (or continue training)

tuner.tune()


FEATURE_INDEX: 0 (C) ROUND 1

Round 1 
Hyperparameter: features (index: 4) 

Trained and Tested combination 1 of 40176: (4, 1, 4, 0, 0), taking 0.62 seconds to get val score of 0.2759
        Current best combo: (4, 1, 4, 0, 0) with val score 0.2759
Trained and Tested combination 2 of 40176: (4, 1, 4, 0, 1), taking 0.58 seconds to get val score of 0.23
        Current best combo: (4, 1, 4, 0, 0) with val score 0.2759
Trained and Tested combination 3 of 40176: (4, 1, 4, 0, 2), taking 0.92 seconds to get val score of 0.2226
        Current best combo: (4, 1, 4, 0, 0) with val score 0.2759
Trained and Tested combination 4 of 40176: (4, 1, 4, 0, 3), taking 0.99 seconds to get val score of 0.2252
        Current best combo: (4, 1, 4, 0, 0) with val score 0.2759
Trained and Tested combination 5 of 40176: (4, 1, 4, 0, 4), taking 0.67 seconds to get val score of 0.2248
        Current best combo: (4, 1, 4, 0, 0) with val score 0.2759
Trained and Tested combination 6 of 40176: (4, 1, 4, 0, 5),