In [1]:
import numpy as np
import pandas as pd
import sklearn.datasets as skdata
import sklearn.metrics as skmetrics
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
# MAKE SURE TO CHANGE THIS TO THE LOCAL PATH TO DATA
#TRAIN_PATH = "../augmented_train/reformatted_train_10000_samples.csv"
TRAIN_PATH = "train_1998_samples_fft_0_to_10_hz_consensus_1.0_balanced.csv"
data = pd.read_csv(TRAIN_PATH)
data_np = data.to_numpy()

In [9]:
num_input = data_np.shape[1] - 1

x = data_np[:, 0:num_input]
y = data_np[:, num_input]

# Shuffle the dataset based on sample indices
shuffled_indices = np.random.permutation(x.shape[0])

# Choose the first 80% as training set, next 10% as validation and the rest as testing
train_split_idx = int(0.80 * x.shape[0])
val_split_idx = int(0.90 * x.shape[0])

train_indices = shuffled_indices[0:train_split_idx]
val_indices = shuffled_indices[train_split_idx:val_split_idx]
test_indices = shuffled_indices[val_split_idx:]

# Select the examples from x and y to construct our training, validation, testing sets
x_train, y_train = x[train_indices, :], y[train_indices]
x_val, y_val = x[val_indices, :], y[val_indices]
x_test, y_test = x[test_indices, :], y[test_indices]

In [17]:
model = SVC
models= []
kernels = ['linear', 'poly', 'rbf']
model_scores = []
tolerances = [5e-1, 1e-1, 1e-2, 1e-3, 1e-4]

#testing out various kernels and conditions for convergence
for kernel in kernels:
    for tolerance in tolerances:
        model = SVC(tol = tolerance, kernel = kernel)
        model.fit(x_train, y_train)
        models.append(model)

        predictions_train = model.predict(x_train)
        score_train = skmetrics.accuracy_score(predictions_train, y_train)

        predictions_val = model.predict(x_val)
        score_val = skmetrics.accuracy_score(predictions_val, y_val)

        predictions_test = model.predict(x_test)
        score_test = skmetrics.accuracy_score(predictions_test, y_test)

        model_scores.append(score_test)
        print('kernel: {}, tolerance: {}'.format(kernel, tolerance))
        print('Training accuracy: {:0.5f}%  Validation accuracy: {:0.2f}% Testing accuracy: {:0.2f}%'.format(score_train*100, score_val*100, score_test*100))

best_model_idx = np.argmax(model_scores)
best_model = models[best_model_idx]
best_model_prediction = models[best_model_idx].predict(x_test)

print('Best model with kernel {} and tolerance {}'.format(kernels[best_model_idx//len(tolerances)], tolerances[best_model_idx%len(tolerances)]))
print('\n with error: {}'.format(model_scores[best_model_idx]))

kernels: linear, tolerances: 0.5
Training accuracy: 100.00000%  Validation accuracy: 47.74% Testing accuracy: 52.26%
kernels: linear, tolerances: 0.1
Training accuracy: 100.00000%  Validation accuracy: 48.24% Testing accuracy: 51.26%
kernels: linear, tolerances: 0.01
Training accuracy: 100.00000%  Validation accuracy: 48.24% Testing accuracy: 51.26%
kernels: linear, tolerances: 0.001
Training accuracy: 100.00000%  Validation accuracy: 48.24% Testing accuracy: 51.26%
kernels: linear, tolerances: 0.0001
Training accuracy: 100.00000%  Validation accuracy: 48.24% Testing accuracy: 51.26%
kernels: poly, tolerances: 0.5
Training accuracy: 21.49591%  Validation accuracy: 15.58% Testing accuracy: 20.10%
kernels: poly, tolerances: 0.1
Training accuracy: 21.49591%  Validation accuracy: 15.58% Testing accuracy: 20.10%
kernels: poly, tolerances: 0.01
Training accuracy: 21.49591%  Validation accuracy: 15.58% Testing accuracy: 20.10%
kernels: poly, tolerances: 0.001
Training accuracy: 21.49591%  Val

In [None]:
TRAIN_PATH = "reformatted_train_10000_samples.csv"

data = pd.read_csv(TRAIN_PATH)

data_np = data.to_numpy()

print(data_np)
print(data_np.shape)

In [None]:
x = data_np[:, 0:820]
y = data_np[:, 820]

# Shuffle the dataset based on sample indices
shuffled_indices = np.random.permutation(x.shape[0])

# Choose the first 80% as training set, next 10% as validation and the rest as testing
train_split_idx = int(0.80 * x.shape[0])
val_split_idx = int(0.90 * x.shape[0])

train_indices = shuffled_indices[0:train_split_idx]
val_indices = shuffled_indices[train_split_idx:val_split_idx]
test_indices = shuffled_indices[val_split_idx:]

# Select the examples from x and y to construct our training, validation, testing sets
x_train, y_train = x[train_indices, :], y[train_indices]
x_val, y_val = x[val_indices, :], y[val_indices]
x_test, y_test = x[test_indices, :], y[test_indices]

In [None]:
#Testing best model on transformed dataset
model = SVC()

model.fit(x_train, y_train)

predictions_train = model.predict(x_train)
score_train = skmetrics.accuracy_score(predictions_train, y_train)

predictions_val = model.predict(x_val)
score_val = skmetrics.accuracy_score(predictions_val, y_val)

predictions_test = model.predict(x_test)
score_test = skmetrics.accuracy_score(predictions_test, y_test)

print('Training accuracy: {:0.2f}%\nValidation accuracy: {:0.2f}%\nTest accuracy: {:0.2f}%'.format(score_train*100, score_val*100, score_test*100))