In [93]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
%matplotlib inline

In [94]:
def load_data():
    
    ### import training data from csv files
    main_file = "labels.csv"
    main_label = np.loadtxt(main_file, delimiter = ",")
    main_label = main_label.reshape((main_label.shape[0],1))
    m = main_label.shape[0]
    
    # changing the labels to digits starting from 0
    labels = np.zeros((m,1), dtype = "int")
    num_class = int(0)
    current_bus = main_label[0]
    bus_reference = {str(num_class): current_bus} # dictionary containing original labels

    for i in range(m):
        if main_label[i] != current_bus:
            num_class += int(1)
            current_bus = main_label[i]
            bus_reference[str(num_class)] = current_bus
    
        labels[i] = num_class
    labels = labels.astype(int)
    
    main_file = "features.csv"
    main_features = np.loadtxt(main_file, delimiter=",")
    
    DATA = {'labels': labels,
            'main_features': main_features}
    num_class+=1
    
    return DATA, bus_reference, num_class

In [95]:
def select_features(selected_buses, DATA):
    
    # U can select what features to use for training
    main_features = DATA['main_features']
    m = len(selected_buses)
    n = DATA['main_features'].shape[0]
    features = main_features[:,selected_buses]
    
    return features

In [96]:
def make_onehot(DATA, num_class):
    
    #Convert labels to onehot
    one_hot = np.zeros((DATA['labels'].shape[0], num_class))
    for i,j in enumerate(DATA['labels']):
        one_hot[i,j]=1
        
    return one_hot

In [97]:
def split_dataset(features, labels, test_percentage):
    
    #Split data to training/crossval/test sets
    indices = np.random.permutation(features.shape[0])
    num_test = int(features.shape[0]*test_percentage)
    num_test2 = num_test // 2
    crossval_index, test_index, train_index = indices[:num_test2], indices[num_test2:num_test], indices[num_test:]
    
    crossval_features, test_features, train_features = features[crossval_index,:], features[test_index,:], features[train_index,:]
    crossval_labels, test_labels, train_labels = labels[crossval_index,:], labels[test_index,:], labels[train_index,:]
    
    dataset={'train_labels':       train_labels.reshape(train_labels.shape[0],),
             'train_features':     train_features,
             'test_labels':        test_labels.reshape(test_labels.shape[0],),
             'test_features':      test_features,
             'crossval_labels':    crossval_labels.reshape(crossval_labels.shape[0],),
             'crossval_features':  crossval_features}
    
    return dataset

In [98]:
DATA, bus_reference, num_class = load_data()

selected_buses = [1, 2, 3, 11, 27, 29, 50, 57, 74, 89]
features = select_features(selected_buses, DATA)

labels = DATA['labels'] #make_onehot(DATA, num_class)

dataset = split_dataset(features, labels, 0.2)

In [99]:
train_labels      = dataset['train_labels']
train_features    = dataset['train_features']
test_labels       = dataset['test_labels']
test_features     = dataset['test_features']
crossval_labels   = dataset['crossval_labels']
crossval_features = dataset['crossval_features']
print(train_features.shape)
print(train_labels.shape)
print(test_features.shape)
print(test_labels.shape)
print(crossval_features.shape)
print(crossval_labels.shape)

(10400, 10)
(10400,)
(1300, 10)
(1300,)
(1300, 10)
(1300,)


In [100]:
arr1 = np.arange(0,9,0.33); arr2 = np.arange(-1,6,0.25)
C = np.exp(arr1); gamma = np.exp(arr2);
best_acc=0; 

for i in C:
    for j in gamma:

        svc_model = SVC(kernel='rbf', gamma=j, C=i).fit(crossval_features, crossval_labels)
        acc = svc_model.score(crossval_features, crossval_labels)
        if acc>best_acc:
            best_acc = acc; C_best = i; gamma_best = j
            
print("best accuracy: %f" %best_acc)
print("C_best: %f" %C_best)
print("gamma_best: %f" %gamma_best)

best accuracy: 1.000000
C_best: 1022.493980
gamma_best: 314.190660


In [101]:
svc_model = SVC(kernel='rbf', gamma=gamma_best, C=C_best).fit(train_features, train_labels)
acc = svc_model.score(train_features, train_labels)
print("train accuracy: %f" %acc)

svc_model = SVC(kernel='rbf', gamma=gamma_best, C=C_best).fit(test_features, test_labels)
acc = svc_model.score(test_features, test_labels)
print("test accuracy: %f" %acc)

train accuracy: 1.000000
test accuracy: 1.000000
