In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict

# Load Datasets

In [2]:
def load_data(file_path):
    A = np.loadtxt(file_path)
    X = A[:, :9]    # Input features
    y = A[:, 9:]    # Output labels
    return X, y

In [3]:
# Load all 3 datasets
X_final, y_final = load_data('data/tictac_final.txt')
y_final = y_final[:, 0]

X_single, y_single = load_data('data/tictac_single.txt')
y_single = y_single[:, 0]

X_multi, y_multi = load_data('data/tictac_multi.txt')

# SVM Classifier

In [4]:
def svm_train(X, y):
    # Split into training and testing data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

    # Define and train model
    model = SVC(kernel='linear')
    model.fit(X_train, y_train)

    # Get accuracy
    accuracy = cross_val_score(model, X_train, y_train, cv=10, scoring="accuracy")
    mean_accuracy = np.mean(accuracy)

    # Get confusion matrix
    y_pred = model.predict(X_test)
    confusion_mtrx = confusion_matrix(y_test, y_pred)

    return mean_accuracy, confusion_mtrx


In [5]:
def print_results(accuracy, confusion_mtrx, dataset_name):
    print(f"Performance of Linear SVM on {dataset_name}:")
    print("Accuracy = ", accuracy)
    print("Confusion Matrix:")
    print(confusion_mtrx)
    print("\n")

## Final Dataset

In [6]:
accuracy_final, confusion_mtrx_final = svm_train(X_final, y_final)
print_results(accuracy_final, confusion_mtrx_final, "Final Dataset")

Performance of Linear SVM on Final Dataset:
Accuracy =  0.9869617224880383
Confusion Matrix:
[[ 61   6]
 [  0 125]]




## Single Dataset

In [7]:
accuracy_single, confusion_mtrx_single = svm_train(X_single, y_single)
print_results(accuracy_single, confusion_mtrx_single, "Single Dataset")

Performance of Linear SVM on Single Dataset:
Accuracy =  0.3662213740458015
Confusion Matrix:
[[323   0   0   0   0   0   0   0   0]
 [ 90  16  29   0  33   0   0   0   0]
 [112   0  43   0  32   0   0   0   0]
 [ 73   6  21   1  16   0   0   0   0]
 [122   0   0   0  80   0   0   0   0]
 [ 41   7  20   0   8   0   0   0   0]
 [ 55   8  20   2  14   0   0   0   0]
 [ 30   9   9   0   2   0   0   0   0]
 [ 56   5  19   0   9   0   0   0   0]]




## Multi Dataset

In [8]:
# TODO: This is prob wrong, idk how to properly train models for this dataset yet
# Create 9 classification models for each output vector
# for i in range(9):
#     y_data = y_multi[:, i]

#     accuracy_multi, confusion_mtrx_multi = svm_train(X_multi, y_data)
#     print_results(accuracy_multi, confusion_mtrx_multi, f"Multi Dataset {i}")
#     print("\n")