In [56]:
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix, multilabel_confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.multioutput import MultiOutputClassifier

# Load Datasets

In [10]:
def load_data(file_path):
    A = np.loadtxt(file_path)
    X = A[:, :9]    # Input features
    y = A[:, 9:]    # Output labels
    return X, y

In [11]:
# Load all 3 datasets
X_final, y_final = load_data('data/tictac_final.txt')
y_final = y_final[:, 0]

X_single, y_single = load_data('data/tictac_single.txt')
y_single = y_single[:, 0]

X_multi, y_multi = load_data('data/tictac_multi.txt')

# SVM Classifier

In [12]:
def svm_train(X, y):
    # Split into training and testing data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

    # Define and train model
    model = LinearSVC(dual="auto")
    model.fit(X_train, y_train)

    # Get accuracy
    accuracy = cross_val_score(model, X_train, y_train, cv=10, scoring="accuracy")
    mean_accuracy = np.mean(accuracy)

    # Get confusion matrix
    y_pred = model.predict(X_test)
    confusion_mtrx = confusion_matrix(y_test, y_pred, normalize="all")

    return mean_accuracy, confusion_mtrx


In [13]:
def print_results(accuracy, confusion_mtrx, dataset_name):
    print(f"Performance of Linear SVM Classification on {dataset_name}:")
    print("Accuracy = ", accuracy)
    print("Confusion Matrix:")
    print(confusion_mtrx.round(decimals=3)) # Round to 3 decimal places
    print("\n")

## Final Dataset

In [14]:
accuracy_final, confusion_mtrx_final = svm_train(X_final, y_final)
print_results(accuracy_final, confusion_mtrx_final, "Final Dataset")

Performance of Linear SVM Classification on Final Dataset:
Accuracy =  0.9869617224880383
Confusion Matrix:
[[0.318 0.031]
 [0.    0.651]]




## Single Dataset

In [15]:
accuracy_single, confusion_mtrx_single = svm_train(X_single, y_single)
print_results(accuracy_single, confusion_mtrx_single, "Single Dataset")

Performance of Linear SVM Classification on Single Dataset:
Accuracy =  0.23549618320610688
Confusion Matrix:
[[0.231 0.    0.001 0.    0.014 0.    0.    0.    0.   ]
 [0.121 0.001 0.004 0.    0.003 0.    0.    0.    0.   ]
 [0.14  0.    0.001 0.    0.002 0.    0.    0.    0.   ]
 [0.082 0.    0.004 0.    0.003 0.    0.    0.    0.   ]
 [0.143 0.001 0.003 0.    0.008 0.    0.    0.    0.   ]
 [0.054 0.    0.002 0.    0.002 0.    0.    0.    0.   ]
 [0.069 0.001 0.002 0.    0.005 0.    0.    0.    0.   ]
 [0.032 0.    0.001 0.    0.005 0.    0.    0.    0.   ]
 [0.067 0.001 0.    0.    0.    0.    0.    0.    0.   ]]




## Multi Dataset

In [61]:
def svm_train_multi(X, y):
    # Split into training and testing data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

    # Define and train model
    base_classifier = LinearSVC(dual="auto")
    model = MultiOutputClassifier(base_classifier)
    model.fit(X_train, y_train)

    # Get accuracy
    accuracy = cross_val_score(model, X_train, y_train, cv=10, scoring="accuracy")
    mean_accuracy = np.mean(accuracy)

    # Get confusion matrix
    y_pred = model.predict(X_test)
    confusion_matrices = multilabel_confusion_matrix(y_test, y_pred)

    return mean_accuracy, confusion_matrices

In [60]:
def print_results_multi(accuracy, confusion_matrices, dataset_name):
    print(f"Performance of Linear SVM Classification on {dataset_name}:")
    print("Accuracy = ", accuracy)

    for i, confusion_matrix in enumerate(confusion_matrices):
        print(f"Confusion Matrix for Label {i}:")
        print(confusion_matrix.round(decimals=3))

    print("\n")

In [64]:
accuracy_multi, confusion_matrices = svm_train_multi(X_multi, y_multi)
print_results_multi(accuracy_multi, confusion_matrices, "Multi Dataset")

Performance of Linear SVM Classification on Multi Dataset:
Accuracy =  0.0
Confusion Matrix for Label 0:
[[988   0]
 [323   0]]
Confusion Matrix for Label 1:
[[1086    0]
 [ 225    0]]
Confusion Matrix for Label 2:
[[998   0]
 [313   0]]
Confusion Matrix for Label 3:
[[1061    0]
 [ 250    0]]
Confusion Matrix for Label 4:
[[926   0]
 [385   0]]
Confusion Matrix for Label 5:
[[1077    0]
 [ 234    0]]
Confusion Matrix for Label 6:
[[1021    0]
 [ 290    0]]
Confusion Matrix for Label 7:
[[1069    0]
 [ 242    0]]
Confusion Matrix for Label 8:
[[1006    0]
 [ 305    0]]


