In [1]:
import pickle

import numpy as np
import plotly.express as px
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from xgboost import XGBClassifier

# Loading the data

In [2]:
with open("data/randomized/train_data", "rb") as f:
    train_data = pickle.load(f)

train_features = train_data[0]
train_targets  = train_data[1]

In [3]:
with open("data/randomized/test_data", "rb") as f:
    test_data = pickle.load(f)

test_features = test_data[0]
test_targets  = test_data[1]

# Training a baseline model

In [4]:
def reshape_to_train(unshaped: list) -> np.array:
    shaped = np.array([np.array(sample).reshape(-1, 1) for sample in unshaped])
    shaped = shaped.reshape(shaped.shape[0], shaped.shape[1])

    return shaped

In [5]:
xgb_model = XGBClassifier(objective="multi:softprob")

shaped_train_features = reshape_to_train(unshaped=train_features)

xgb_model.fit(shaped_train_features, train_targets)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, objective='multi:softprob',
              predictor='auto', random_state=0, reg_alpha=0, ...)

# Testing the baseline model

In [6]:
shaped_test_features = reshape_to_train(unshaped=test_features)

predictions = xgb_model.predict(shaped_test_features)

# Metrics

In [9]:
acc = accuracy_score(y_true=test_targets, y_pred=predictions)
accuracy = round(acc, 4)

# print(f"The accuracy is {round(accuracy*100, 4)}%")

# print(classification_report(y_true=test_targets, y_pred=predictions, digits=4))

conf_matrix = confusion_matrix(y_true=test_targets, y_pred=predictions)
px.imshow(conf_matrix, color_continuous_scale="turbo")

# Saving the model

In [8]:
with open("models/xgboost_model", "wb") as f:
    pickle.dump(xgb_model, f)