# Data loading and spliting

In [1]:
import pandas as pd
import numpy as np

train_dataset = pd.read_csv('/content/features.train.txt', header=None, delim_whitespace=True)
test_dataset = pd.read_csv('/content/features.test.txt', header=None, delim_whitespace=True)
print(train_dataset.shape, test_dataset.shape)

# Only considering "1" and "5" digit for binary classification
train_dataset = train_dataset[train_dataset[0].isin([1.0, 5.0])]
test_dataset = test_dataset[test_dataset[0].isin([1.0, 5.0])]
print(train_dataset.shape, test_dataset.shape)

# Make train-test split
X_train = train_dataset.iloc[:, 1:].values
y_train = train_dataset.iloc[:, 0].values
X_test = test_dataset.iloc[:, 1:].values
y_test = test_dataset.iloc[:, 0].values
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(7291, 3) (2007, 3)
(1561, 3) (424, 3)
(1561, 2) (1561,) (424, 2) (424,)


# Function to build, train and test SVM

In [2]:
from sklearn import svm
from sklearn.metrics import accuracy_score

def buildSVM(X_train, y_train, X_test, y_test, kernel = "rbf", C = 1.0, gamma = "scale", degree = 3, coefficient = 0.0):

  classifier = svm.SVC(kernel = kernel, C = C, degree = degree, coef0 = coefficient, random_state = 0, gamma = gamma)
  classifier.fit(X_train, y_train)
  print("Training Accuracy: {:.4f} and Error: {:.4f}".format(classifier.score(X_train, y_train), 1 - classifier.score(X_train, y_train)))

  y_pred = classifier.predict(X_test)
  print("Testing Accuracy: {:.4f} and Error: {:.4f}".format(accuracy_score(y_test, y_pred), 1 - accuracy_score(y_test, y_pred)))
  print("Number of support vectors: ", classifier.n_support_)

# Training and testing using linear kernel

In [3]:
buildSVM(X_train, y_train, X_test, y_test, kernel = "linear")

Training Accuracy: 0.9962 and Error: 0.0038
Testing Accuracy: 0.9788 and Error: 0.0212
Number of support vectors:  [14 14]


# Training with different sizes of training data 

In [4]:
# Training with 50 samples
print("\nTraining with 50 samples")
buildSVM(X_train[:50], y_train[:50], X_test, y_test, kernel = "linear")

# Training with 100 samples
print("\nTraining with 100 samples")
buildSVM(X_train[:100], y_train[:100], X_test, y_test, kernel = "linear")

# Training with 200 samples
print("\nTraining with 200 samples")
buildSVM(X_train[:200], y_train[:200], X_test, y_test, kernel = "linear")

# Training with 800 samples
print("\nTraining with 800 samples")
buildSVM(X_train[:800], y_train[:800], X_test, y_test, kernel = "linear")


Training with 50 samples
Training Accuracy: 1.0000 and Error: 0.0000
Testing Accuracy: 0.9811 and Error: 0.0189
Number of support vectors:  [1 1]

Training with 100 samples
Training Accuracy: 1.0000 and Error: 0.0000
Testing Accuracy: 0.9811 and Error: 0.0189
Number of support vectors:  [2 2]

Training with 200 samples
Training Accuracy: 0.9950 and Error: 0.0050
Testing Accuracy: 0.9811 and Error: 0.0189
Number of support vectors:  [4 4]

Training with 800 samples
Training Accuracy: 0.9975 and Error: 0.0025
Testing Accuracy: 0.9811 and Error: 0.0189
Number of support vectors:  [7 7]


# Training and testing using polynomial kernel

In [5]:

# Using C = 0.0001 and degree = 2
print("\nUsing C = 0.0001 and degree = 2")
buildSVM(X_train, y_train, X_test, y_test, kernel = "poly", C = 0.0001, gamma = 1.0, degree = 2, coefficient = 1.0)

# Using C = 0.0001 and degree = 5
print("\nUsing C = 0.0001 and degree = 5")
buildSVM(X_train, y_train, X_test, y_test, kernel = "poly", C = 0.0001, gamma = 1.0, degree = 5, coefficient = 1.0)

# Using C = 0.001 and degree = 2
print("\nUsing C = 0.001 and degree = 2")
buildSVM(X_train, y_train, X_test, y_test, kernel = "poly", C = 0.001, gamma = 1.0, degree = 2, coefficient = 1.0)

# Using C = 0.001 and degree = 5
print("\nUsing C = 0.001 and degree = 5")
buildSVM(X_train, y_train, X_test, y_test, kernel = "poly", C = 0.001, gamma = 1.0, degree = 5, coefficient = 1.0)

# Using C = 0.01 and degree = 2
print("\nUsing C = 0.01 and degree = 2")
buildSVM(X_train, y_train, X_test, y_test, kernel = "poly", C = 0.01, gamma = 1.0, degree = 2, coefficient = 1.0)

# Using C = 0.01 and degree = 5
print("\nUsing C = 0.01 and degree = 5")
buildSVM(X_train, y_train, X_test, y_test, kernel = "poly", C = 0.01, gamma = 1.0, degree = 5, coefficient = 1.0)

# Using C = 1 and degree = 2
print("\nUsing C = 1 and degree = 2")
buildSVM(X_train, y_train, X_test, y_test, kernel = "poly", C = 1, gamma = 1.0, degree = 2, coefficient = 1.0)

# Using C = 1 and degree = 5
print("\nUsing C = 1 and degree = 5")
buildSVM(X_train, y_train, X_test, y_test, kernel = "poly", C = 1, gamma = 1.0, degree = 5, coefficient = 1.0)


Using C = 0.0001 and degree = 2
Training Accuracy: 0.9910 and Error: 0.0090
Testing Accuracy: 0.9835 and Error: 0.0165
Number of support vectors:  [118 118]

Using C = 0.0001 and degree = 5
Training Accuracy: 0.9955 and Error: 0.0045
Testing Accuracy: 0.9811 and Error: 0.0189
Number of support vectors:  [13 13]

Using C = 0.001 and degree = 2
Training Accuracy: 0.9955 and Error: 0.0045
Testing Accuracy: 0.9835 and Error: 0.0165
Number of support vectors:  [38 38]

Using C = 0.001 and degree = 5
Training Accuracy: 0.9955 and Error: 0.0045
Testing Accuracy: 0.9788 and Error: 0.0212
Number of support vectors:  [12 13]

Using C = 0.01 and degree = 2
Training Accuracy: 0.9955 and Error: 0.0045
Testing Accuracy: 0.9811 and Error: 0.0189
Number of support vectors:  [17 17]

Using C = 0.01 and degree = 5
Training Accuracy: 0.9962 and Error: 0.0038
Testing Accuracy: 0.9788 and Error: 0.0212
Number of support vectors:  [11 12]

Using C = 1 and degree = 2
Training Accuracy: 0.9968 and Error: 0.0

# Training and testing using Radial Basis Function(RBF) kernel

In [6]:

# Using C = 0.01
print("\nUsing C = 0.01")
buildSVM(X_train, y_train, X_test, y_test, kernel = "rbf", C = 0.01, gamma = 1.0)

# Using C = 1
print("\nUsing C = 1")
buildSVM(X_train, y_train, X_test, y_test, kernel = "rbf", C = 1, gamma = 1.0)

# Using C = 100
print("\nUsing C = 100")
buildSVM(X_train, y_train, X_test, y_test, kernel = "rbf", C = 100, gamma = 1.0)

# Using C = 10^4
print("\nUsing C = 10^4")
buildSVM(X_train, y_train, X_test, y_test, kernel = "rbf", C = 10**4, gamma = 1.0)

# Using C = 10^6
print("\nUsing C = 10^6")
buildSVM(X_train, y_train, X_test, y_test, kernel = "rbf", C = 10**6, gamma = 1.0)



Using C = 0.01
Training Accuracy: 0.9962 and Error: 0.0038
Testing Accuracy: 0.9764 and Error: 0.0236
Number of support vectors:  [200 203]

Using C = 1
Training Accuracy: 0.9955 and Error: 0.0045
Testing Accuracy: 0.9788 and Error: 0.0212
Number of support vectors:  [14 17]

Using C = 100
Training Accuracy: 0.9968 and Error: 0.0032
Testing Accuracy: 0.9811 and Error: 0.0189
Number of support vectors:  [ 8 14]

Using C = 10^4
Training Accuracy: 0.9974 and Error: 0.0026
Testing Accuracy: 0.9764 and Error: 0.0236
Number of support vectors:  [ 7 13]

Using C = 10^6
Training Accuracy: 0.9994 and Error: 0.0006
Testing Accuracy: 0.9764 and Error: 0.0236
Number of support vectors:  [8 9]
