In [1]:
from sklearn.metrics import accuracy_score, f1_score
import pickle
import pathlib
import os
from xgboost import XGBClassifier
import numpy as np


In [2]:
PROJECT_PATH = os.getcwd()
PROJECT_PATH = os.path.join(PROJECT_PATH, "..")
MODELS_DIR = pathlib.Path(PROJECT_PATH) / "store" / "models"

DATASET_DIR = pathlib.Path(PROJECT_PATH) / "data"

In [3]:
def load_data(dataset_name: str, split_ratio: float):
    path = DATASET_DIR / dataset_name / f"dataset_{split_ratio}.pkl"

    with open(path, "rb") as f:
        return pickle.load(f)

In [4]:
def one_hot_labels(num_classes: int, labels: np.ndarray) -> np.ndarray:
    if np.any(labels >= num_classes) or np.any(labels < 0):
        raise ValueError(f"Labels must be in the range [0, {num_classes - 1}]")

    # Initialize a 2D array of zeros
    one_hot_matrix = np.zeros((labels.size, num_classes))

    # Set the appropriate elements to 1
    one_hot_matrix[np.arange(labels.size), labels] = 1

    return one_hot_matrix

# HELOC

In [4]:
X_train, y_train, X_test, y_test = load_data("heloc", 0.2)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1883, 23), (1883,), (1046, 23), (1046,))

In [9]:
clf = XGBClassifier()
clf.fit(X_train, y_train)

In [10]:
preds = clf.predict(X_test)
accuracy_score(y_test, preds), f1_score(y_test, preds, average='weighted')

(0.7026768642447419, 0.7021980988422838)

In [14]:
num_classes = len(np.unique(y_test))
y_train_one_hot = one_hot_labels(num_classes, y_train)
y_test_one_hot = one_hot_labels(num_classes, y_test)

In [15]:
import tensorflow as tf
from tensorflow.keras import backend as K
from keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from keras import Metric
from keras.src.metrics import F1Score

        
# Build the model
model = Sequential()

# Input layer and a hidden layer
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

# Additional hidden layer
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))

# Output layer
model.add(Dense(num_classes, activation='softmax'))

# Compile the model with F1 Score
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy', F1Score()])

# Summary of the model
model.summary()



In [17]:
model.fit(X_train, y_train_one_hot, batch_size=8, epochs=15)

Epoch 1/15
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5708 - f1_score: 0.5701 - loss: 0.6970
Epoch 2/15
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6966 - f1_score: 0.6959 - loss: 0.6060
Epoch 3/15
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 800us/step - accuracy: 0.7175 - f1_score: 0.7169 - loss: 0.5929
Epoch 4/15
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7029 - f1_score: 0.7025 - loss: 0.5809
Epoch 5/15
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 950us/step - accuracy: 0.7109 - f1_score: 0.7100 - loss: 0.5724
Epoch 6/15
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 855us/step - accuracy: 0.7100 - f1_score: 0.7096 - loss: 0.5787
Epoch 7/15
[1m236/236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 850us/step - accuracy: 0.7032 - f1_score: 0.7015 - loss: 0.5841
Epoch 8/15
[1m236/

<keras.src.callbacks.history.History at 0x191d35e6d50>

In [21]:
preds = model.predict(X_test)
preds = preds.argmax(axis=-1)

accuracy_score(y_test, preds), f1_score(y_test, preds, average='weighted')

[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


(0.6998087954110899, 0.6994454331219924)

# Gesture Phase

In [8]:
X_train, y_train, X_test, y_test = load_data("gesture_phase", 0.2)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1777, 32), (1777,), (988, 32), (988,))

In [9]:
clf = XGBClassifier()
clf.fit(X_train, y_train)


In [12]:
preds = clf.predict(X_test)
accuracy_score(y_test, preds), f1_score(y_test, preds, average='weighted')

(0.6153846153846154, 0.5984407447266358)