In [1]:
# Import Important Libraries
import sklearn
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import preprocessing # preprossing is what we do with the data before we run the learning algorithm
from sklearn.model_selection import train_test_split 
import numpy as np
from scipy.io import arff
import numpy as np
# import math

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Load the .arff file
connect_4_dataset = arff.loadarff('../connect-4.arff')

# Convert to a numpy array
data = np.asarray(connect_4_dataset[0].tolist(), dtype=np.float32)

# Print the array
print(data)

[[0. 0. 0. ... 0. 0. 2.]
 [0. 0. 0. ... 0. 0. 2.]
 [0. 0. 0. ... 0. 0. 2.]
 ...
 [2. 2. 0. ... 0. 0. 1.]
 [2. 1. 0. ... 2. 0. 0.]
 [2. 1. 1. ... 0. 0. 0.]]


# Data Pre-Processing
Scale after splitting the data into train and test since we will be using gradient ascent. 
* Use `train_test_split` to split the data (`75% train` and `25% test`) to `X_train`, `X_test`, `y_train`, `y_test` with `random_state` of 42
* Reshape `y_train` into 2D array `y_2d_train` and `y_test` into 2D array `y_2d_test`
* Augment the dataset with a column of ones

In [3]:
X = data[:, :-1]
y = data[:, -1:]

print('X:' + str(X.shape))
print('y:' + str(y.shape))

X:(67557, 42)
y:(67557, 1)


In [21]:
X = data[:, :-1]
y = data[:, -1]

# extract the classes
X_zeros = X[y == 0]
y_zeros = y[y == 0]
X_ones = X[y == 1]
y_ones = y[y == 1]
X_twos = X[y == 2]
y_twos = y[y == 2]

# how many of each class the training set should have
# 90% of the smallest class
amount_per_class = (min(len(y_zeros), len(y_ones), len(y_twos))) * 0.9
print(amount_per_class)

X_zeros_train, X_zeros_test, y_zeros_train, y_zeros_test = train_test_split(
    X_zeros, y_zeros,
    test_size=1-(amount_per_class/len(y_zeros)),
    random_state=10, shuffle=True
)
X_ones_train, X_ones_test, y_ones_train, y_ones_test = train_test_split(
    X_ones, y_ones,
    test_size=1-(amount_per_class/len(y_ones)),
    random_state=10, shuffle=True
)
X_twos_train, X_twos_test, y_twos_train, y_twos_test = train_test_split(
    X_twos, y_twos,
    test_size=1-(amount_per_class/len(y_twos)),
    random_state=10, shuffle=True
)

X_train = np.concatenate((X_zeros_train, X_ones_train, X_twos_train))
y_train = np.concatenate((y_zeros_train, y_ones_train, y_twos_train))
X_val = np.concatenate((X_zeros_test, X_ones_test, X_twos_test))
y_val = np.concatenate((y_zeros_test, y_ones_test, y_twos_test))

'''X_train_val, X_test, y_train_val, y_test = train_test_split(
    X, y,   
    test_size = 0.07, random_state=10, shuffle=True
)

X_train, X_val, y_train, y_val = train_test_split(
    X, y, 
    test_size = 0.7, stratify=np.array([0.33, 0.33, 0.34]),
    random_state=10, shuffle=True
)'''



5804.1


'X_train_val, X_test, y_train_val, y_test = train_test_split(\n    X, y,   \n    test_size = 0.07, random_state=10, shuffle=True\n)\n\nX_train, X_val, y_train, y_val = train_test_split(\n    X, y, \n    test_size = 0.7, stratify=np.array([0.33, 0.33, 0.34]),\n    random_state=10, shuffle=True\n)'

In [22]:
# # Splitting the dataset 

# X_train_val, X_test, y_train_val, y_test = train_test_split(
#     X, y,   
#     test_size = 0.07, random_state=10, shuffle=True
# )
# X_train, X_val, y_train, y_val = train_test_split(
#     X_train_val, y_train_val, 
#     test_size = 0.07, random_state=10, shuffle=True
# )

# y_train = y_train.ravel()
# y_test = y_test.ravel()
# y_val = y_val.ravel()

# # ! No need to scale since the data is already the same scale

In [23]:
X_train = np.hstack((np.ones((X_train.shape[0], 1)), X_train))
X_test = np.hstack((np.ones((X_test.shape[0], 1)), X_test))
X_val = np.hstack((np.ones((X_val.shape[0], 1)), X_val))


In [24]:
# Check to make sure everything is as expected
print('X_train:' + str(X_train.shape))
print('y_train:' + str(y_train.shape))
print('X_val: \t'  + str(X_val.shape))
print('y_val: \t'  + str(y_val.shape))
print('X_test: '  + str(X_test.shape))
print('y_test: '  + str(y_test.shape))

print('X_train:' + str(X_train[0].shape))



X_train:(17412, 43)
y_train:(17412,)
X_val: 	(50145, 43)
y_val: 	(50145,)
X_test: (50145, 43)
y_test: (50145,)
X_train:(43,)


In [25]:
# The original labels are '0', '1', '2'. Our SVM algorithm Pegasos expects the labels to be encoded as +1 and -1
# Here we encode one digit as 1, and we encode the other 2 digits as -1
def one_vs_rest_encoding(y, digit = '0'):
    y_encoded = np.where(y == int(digit), 1, -1)
    return  y_encoded

In [26]:
# Perform the encoding for step 1's task
y_train_0_vs_rest = one_vs_rest_encoding(y_train, '0')
y_val_0_vs_rest =  one_vs_rest_encoding(y_val, '0')

In [27]:
print('y_train_0_vs_rest: ' + str(y_train_0_vs_rest))


y_train_0_vs_rest: [ 1  1  1 ... -1 -1 -1]


In [28]:
# # ! SVM with RBF kernel
# # Create the 3 classifiers
# labels = "012"
# w_vals = {}
# val_scores = {}
# for i in range(len(labels)):
#     # Note that each section may require more than one line of code.

#     # Perform one-vs-rest for labels[i]
#     # To do: Relabel the y labels in the train set to either 1 or -1 using one_vs_rest_encoding
#     y_encoded = one_vs_rest_encoding(y_train, labels[i])

#     svm_algo = svm.SVC(kernel='rbf', C=0.001)

#     svm_algo.fit(X_train, y_encoded)

#     # TODO: Get the weights (coefficients) of the SVM model and store them in w_vals[i]
#     # Note that the decision function oaf an SVM with RBF kernel is not a linear combination of the
#     # features, so there are no coefficients to directly compare to those of a linear SVM.
#     # However, you can still access the dual coefficients of the support vectors using the dual_coef_ attribute.
#     # For example, you can compute the weights as a weighted sum of the support vectors, where the dual coefficients
#     # are the weights. See the scikit-learn documentation for details.
#     support_vectors = svm_algo.support_vectors_
#     dual_coef = svm_algo.dual_coef_.reshape(1, -1, 1)
#     w = np.sum(dual_coef * support_vectors, axis=1)
#     w_vals[i] = w.reshape(1, -1)

#     y_pred_val = svm_algo.predict(X_val)

#     y_encoded_val = one_vs_rest_encoding(y_val, labels[i])

#     # compute the accuracy of the classifier
#     val_accuracy = accuracy_score(y_encoded_val, y_pred_val)

#     val_scores[i] = val_accuracy


In [29]:
# Create the 3 classifiers
labels = "012"
w_vals = {}
val_scores = {}
for i in range(len(labels)):
    # Note that each section may require more than one line of code.

    # Perform one-vs-rest for labels[i]
    # To do: Relabel the y labels in the train set to either 1 or -1 using one_vs_rest_encoding
    y_encoded = one_vs_rest_encoding(y_train, labels[i])

    svm_algo = svm.SVC(kernel='poly', degree=3, C=0.001)
    
    svm_algo.fit(X_train, y_encoded)

    # TODO: Get the weights (coefficients) of the SVM model and store them in w_vals[i]
    # Note that the decision function oaf an SVM with RBF kernel is not a linear combination of the
    # features, so there are no coefficients to directly compare to those of a linear SVM.
    # However, you can still access the dual coefficients of the support vectors using the dual_coef_ attribute.
    # For example, you can compute the weights as a weighted sum of the support vectors, where the dual coefficients
    # are the weights. See the scikit-learn documentation for details.
    support_vectors = svm_algo.support_vectors_
    dual_coef = svm_algo.dual_coef_.reshape(1, -1, 1)
    w = np.sum(dual_coef * support_vectors, axis=1)
    w_vals[i] = w.reshape(1, -1)

    y_pred_val = svm_algo.predict(X_val)

    y_encoded_val = one_vs_rest_encoding(y_val, labels[i])

    # compute the accuracy of the classifier
    val_accuracy = accuracy_score(y_encoded_val, y_pred_val)

    val_scores[i] = val_accuracy


In [30]:
# Check your work. With the proper amount of iterations, your values should range from 0.95 to 0.99  
for i in range(len(labels)):
     print(i,": score:", val_scores[i])

0 : score: 0.9871373018247084
1 : score: 0.7840063814936684
2 : score: 0.2288563166816233


In [31]:
# To do: Predict the label for each example in the validation set 
# We will let eval be a numpy array of length N, where N is the number of examples in the validation set. 
# eval1 will hold either a 1 or a 0, depending if the handwritten digit was predicted correctly or not.
eval1 = np.zeros(len(y_val))

# To do: Loop through each sample in the validation set and assign it a label based on the highest score. 
# Store either a 1 if the number was predicted correctly, or a 0 if the number was predicted incorrectly.
for i in range(len(X_val)):
    
    label_scores = np.zeros(len(labels))
    
    for j in range(len(labels)):
        X_val_i_2d = X_val[i].reshape(1, -1)  # reshape X_val to a 2D array with shape (1, 43)
        # print('X_val: \t'  + str(X_val_i_2d.shape))
        # print('w_vals: \t'  + str(w_vals[j].shape))
        label_scores[j] = X_val_i_2d @ w_vals[j].T
    
    # print(label_scores)
    index = np.argmax(label_scores) # get the index of the label with the highest score
    
    # print("index: ", int(labels[index]))
    # print("y_val: ", int(y_val[i]))
    # print("==============")
    if int(labels[index]) == int(y_val[i]):
        eval1[i] = 1
    else:
        eval1[i] = 0

In [32]:
# To do: Determine how many were predicted correctly (Find its accuracy score)
accuracy = np.sum(eval1) / len(y_val)
print("Accuracy Score:",accuracy)

Accuracy Score: 0.5201914448100509
