In [2]:
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import KFold
import sklearn.svm as svm
from sklearn.metrics import accuracy_score
from typing import NamedTuple, List, Tuple, Dict

In [3]:
train_X, train_y = load_svmlight_file("./train.txt")
test_X, test_y = load_svmlight_file("./test.txt", n_features=123)

In [4]:
k_fold = 3

class Fold(NamedTuple):
    train_X: any
    train_y: any
    val_X: any
    val_y: any

folds:List[Fold] = []
kf = KFold(n_splits=k_fold, shuffle=True, random_state=42)

for train_idx, valid_idx in kf.split(train_X, train_y):
    folds.append(
        Fold(
            train_X = train_X[train_idx], 
            train_y = train_y[train_idx], 
            val_X   = train_X[valid_idx], 
            val_y   = train_y[valid_idx]
        )
    )

In [4]:
C_LIST = [0.01, 0.05, 0.1, 0.5, 1.0]

validation_acc = {C: [] for C in C_LIST}
train_acc = {C: [] for C in C_LIST}

for C in C_LIST:
    print(f"Running Model for C = {C}")
    
    for num_fold in range(k_fold):
        train_X = folds[num_fold].train_X
        train_y = folds[num_fold].train_y
        val_X   = folds[num_fold].val_X
        val_y   = folds[num_fold].val_y

        # new model each fold
        model = svm.SVC(C=C, kernel="linear")
        model.fit(train_X, train_y)

        # Train accuracy
        train_preds = model.predict(train_X)
        train_acc[C].append(accuracy_score(train_y, train_preds))

        # Validation accuracy
        val_preds = model.predict(val_X)
        validation_acc[C].append(accuracy_score(val_y, val_preds))

# Print average results
for C in C_LIST:
    print(f"C={C:.2f} | "
          f"Train Acc: {sum(train_acc[C]) / len(train_acc[C]):.4f} | "
          f"Val Acc: {sum(validation_acc[C]) / len(validation_acc[C]):.4f}")


Running Model for C = 0.01
Running Model for C = 0.05
Running Model for C = 0.1
Running Model for C = 0.5
Running Model for C = 1.0
C=0.01 | Train Acc: 0.8454 | Val Acc: 0.8448
C=0.05 | Train Acc: 0.8481 | Val Acc: 0.8472
C=0.10 | Train Acc: 0.8491 | Val Acc: 0.8479
C=0.50 | Train Acc: 0.8498 | Val Acc: 0.8479
C=1.00 | Train Acc: 0.8499 | Val Acc: 0.8479


In [5]:
C_LIST = [0.01, 0.05, 0.1, 0.5, 1.0]
g_LIST = [0.01, 0.05, 0.1, 0.5, 1.0]
k_fold = 3

# Store results for each (C,gamma) pair
validation_acc_rbf = {(C, g): [] for C in C_LIST for g in g_LIST}
train_acc_rbf = {(C, g): [] for C in C_LIST for g in g_LIST}

for C in C_LIST:
    for g in g_LIST:
        print(f"Running RBF Model for C={C}, gamma={g}")
        
        for num_fold in range(k_fold):
            train_X = folds[num_fold].train_X
            train_y = folds[num_fold].train_y
            val_X   = folds[num_fold].val_X
            val_y   = folds[num_fold].val_y

            # New model per fold
            model = svm.SVC(C=C, kernel="rbf", gamma=g)
            model.fit(train_X, train_y)

            # Train accuracy
            train_preds = model.predict(train_X)
            train_acc_rbf[(C, g)].append(accuracy_score(train_y, train_preds))

            # Validation accuracy
            val_preds = model.predict(val_X)
            validation_acc_rbf[(C, g)].append(accuracy_score(val_y, val_preds))

# Print average results
for C in C_LIST:
    for g in g_LIST:
        avg_train = sum(train_acc_rbf[(C, g)]) / k_fold
        avg_val   = sum(validation_acc_rbf[(C, g)]) / k_fold
        print(f"C={C:.2f}, gamma={g:.2f} | Train Acc: {avg_train:.4f} | Val Acc: {avg_val:.4f}")


Running RBF Model for C=0.01, gamma=0.01
Running RBF Model for C=0.01, gamma=0.05
Running RBF Model for C=0.01, gamma=0.1
Running RBF Model for C=0.01, gamma=0.5
Running RBF Model for C=0.01, gamma=1.0
Running RBF Model for C=0.05, gamma=0.01
Running RBF Model for C=0.05, gamma=0.05
Running RBF Model for C=0.05, gamma=0.1
Running RBF Model for C=0.05, gamma=0.5
Running RBF Model for C=0.05, gamma=1.0
Running RBF Model for C=0.1, gamma=0.01
Running RBF Model for C=0.1, gamma=0.05
Running RBF Model for C=0.1, gamma=0.1
Running RBF Model for C=0.1, gamma=0.5
Running RBF Model for C=0.1, gamma=1.0
Running RBF Model for C=0.5, gamma=0.01
Running RBF Model for C=0.5, gamma=0.05
Running RBF Model for C=0.5, gamma=0.1
Running RBF Model for C=0.5, gamma=0.5
Running RBF Model for C=0.5, gamma=1.0
Running RBF Model for C=1.0, gamma=0.01
Running RBF Model for C=1.0, gamma=0.05
Running RBF Model for C=1.0, gamma=0.1
Running RBF Model for C=1.0, gamma=0.5
Running RBF Model for C=1.0, gamma=1.0
C=0.0

In [None]:
""" Best SVC linear """

# Training on the entire training set and predicting on test
train_X, train_y = load_svmlight_file("./train.txt")
model = svm.SVC(C = 1.0 , kernel="linear")
model.fit(train_X, train_y)

# Test predictions
test_preds = model.predict(test_X)
test_accuracy = accuracy_score(test_y, test_preds)

print(f"Test Accuracy : {test_accuracy:.4f}")


# Train predictions
train_preds = model.predict(train_X)
train_accuracy = accuracy_score(train_y, train_preds)

print(f"Train Accuracy : {train_accuracy:.4f}")

Test Accuracy : 0.8496
Train Accuracy : 0.8500
