### Onur Can
### 20.12.2021
#### 1. Importing and Processing the Data --- 2. Distance and Kernel Functions
#### 3. Learning Algorithm for Binary Classification & One versus all --- 4. Confusion Matrix Results for C = 10
#### 5. Evaluation over C = [ 0.1, 1, 10, 100, 1000] --- 6. Plotting Accuracy Scores per C

In [None]:
# Onur Can 
# Project is done for Prof. Mehmet Gönen's DASC 521: Introduction to Machine Learning @ Koç University MSc Data Science Program
# Thanks Prof Mehmet for the dataset generation and instructions

import cvxopt as cvx    # for solver 
import numpy as np      # for matrix operations
import pandas as pd     # for confusion matrix
import matplotlib.pyplot as plt     # for Plotting
import scipy.spatial.distance as dt # for Euclidean distance calculation

### 1. Importing and Processing the Data

In [None]:
# Reading the data
dataset = np.genfromtxt("../input/image-Classification/images.csv", delimiter=",")
labels = np.genfromtxt("../input/image-Classification/labels.csv", delimiter=",")

# Processing the train & test data
X_train = dataset[0:1000,:]
y_train = labels[0:1000].astype(int)
x_test = dataset[1000:5000,:]
y_test = labels[1000:5000].astype(int)

# Get number of samples and number of features
N_train = len(y_train)   # Train Data Size
N_test = len(y_test)     # Test Data Size
D_train = X_train.shape[1]  # Number of Dimensions
K = np.max(y_train)         # Number of Classes

print(X_train.shape, y_train.shape, x_test.shape, y_test.shape, N_train, D_train, K)

### 2. Distance and Kernel Functions

$\begin{equation}
    \begin{split}
        d(\boldsymbol{x}_{i}, \boldsymbol{x}_{j}) &= ||\boldsymbol{x}_{i} - \boldsymbol{x}_{j}||_{2} = \sqrt{(\boldsymbol{x}_{i} - \boldsymbol{x}_{j})^{\top} (\boldsymbol{x}_{i} - \boldsymbol{x}_{j})} = \sqrt{\sum\limits_{d = 1}^{D}(x_{id} - x_{jd})^{2}} \\
        k(\boldsymbol{x}_{i}, \boldsymbol{x}_{j}) &= \exp\left(-\dfrac{||\boldsymbol{x}_{i} -\boldsymbol{x}_{j}||_{2}^{2}}{2s^{2}}\right)
    \end{split}
\end{equation}$

In [None]:
# define Gaussian kernel function
def gaussian_kernel(X1, X2, s):
    # Takes X1 and X2 points & calculates the Kernel Gaussian defined above with given s
    D = dt.cdist(X1, X2)   # Euclidian Distance Calculation from Scipy Library
    K = np.exp(-D**2 / (2 * s**2)) # Kernel Function
    return(K)

### 3. Learning Algorithm for Binary Classification & One versus all
#### Func. "one_versus_all"  takes datapoints, C & s parameters and comparison class
#### Outputs one_versus_all scores for training & test class for each data point
#### Note: Solver function cvx.solvers.qp messages was silented.

In [None]:
def one_versus_all(x, y, S, c, class_to_all, test_to_all = []):
    # Initialization
    s = S                 # Kernel width
    SVM_K = class_to_all  # Class to compare versus all
    X_compare = x         # X data points
    y_compare = y         # Labels
    C = c                 # Regularization parameter
    epsilon = 1e-3        # Quadratic programming solver parameter
    test_predicted = None # Evaluation of new data points
    
    # This is the part of where we change labels to 1, -1 and apply Kernel & Ko[Y.Y^T]
    y_train_modified = np.where(y_compare == SVM_K , 1, -1)
    K_train = gaussian_kernel(X_compare, X_compare, s)
    yyK = np.matmul(y_train_modified[:,None], y_train_modified[None,:]) * K_train #y_train shape correction

    # CSX solver parameter initializations / Changing conditions to Matrix Form
    P = cvx.matrix(yyK)                     # Already calculated before
    q = cvx.matrix(-np.ones((N_train, 1)))  # Column vector of ones
    # Representing -a <= 0  &  a <=  C constraints respectively in G & h
    G = cvx.matrix(np.vstack((-np.eye(N_train), np.eye(N_train))))
    h = cvx.matrix(np.vstack((np.zeros((N_train, 1)), C * np.ones((N_train, 1)))))
    A = cvx.matrix(1.0 * y_train_modified[None,:])  # y_transpose
    b = cvx.matrix(0.0)  # equal to 0
    
    cvx.solvers.options['show_progress'] = False    # to silent solver messages
    result = cvx.solvers.qp(P, q, G, h, A, b)      
    alpha = np.reshape(result["x"], N_train)        # alpha values for given X data_set
    alpha[alpha < C * epsilon] = 0                  # makes 0.00000000000001 to 0
    alpha[alpha > C * (1 - epsilon)] = C            # makes 9.99999999999999 to C
    support_indices, = np.where(alpha != 0)
    active_indices, = np.where(np.logical_and(alpha != 0, alpha < C))
    
    # W0 parameter calculation
    w0 = np.mean(y_train_modified[active_indices] * (1 - np.matmul(yyK[np.ix_(
        active_indices, support_indices)], alpha[support_indices])))
    
    # Binary Classification for train_set for given one versus all structure
    train_predicted = np.matmul(K_train, y_train_modified[:,None] * alpha[:,None]) + w0
    
    # If given new test set calculate SVM scores
    if len(test_to_all) != 0:
        K_test = gaussian_kernel(test_to_all, X_compare, s)
        test_predicted = np.matmul(K_test, y_train_modified[:,None] * alpha[:,None]) + w0
    
    return train_predicted, test_predicted

### 4. Confusion Matrix Results for C = 10

In [None]:
# Calculate SVMs on training & test samples for C = 10
SVM_train_predicted = np.empty((K , X_train.shape[0]))
SVM_test_predicted = np.empty((K , x_test.shape[0]))
# Save results from each one-versus-all comparison to take Maximum
for klass in range(5):
    train_scores, test_scores = one_versus_all(X_train, y_train, 10, 10, klass+1, x_test)
    SVM_train_predicted[klass] = np.transpose(train_scores)
    SVM_test_predicted[klass] = np.transpose(test_scores)

print("------------Training Data Predictions Shape and Confusion Matrix ------------")
print(SVM_train_predicted.shape)
train_y_predicted = np.argmax(SVM_train_predicted, axis = 0) + 1 # take MAX of the scores 
train_confusion_matrix = pd.crosstab(np.reshape(train_y_predicted, N_train), y_train, rownames = ['y_predicted'], colnames = ['y_train'])
print(train_confusion_matrix)
print("\n------------Test Data Predictions Shape and Confusion Matrix ------------")
print(SVM_test_predicted.shape)
test_y_predicted = np.argmax(SVM_test_predicted, axis = 0) + 1 # take MAX of the scores
test_confusion_matrix = pd.crosstab(np.reshape(test_y_predicted, N_test), y_test, rownames = ['y_predicted'], colnames = ['y_train'])
print(test_confusion_matrix)



### 5. Evaluation over C = [ 0.1, 1, 10, 100, 1000]

In [None]:
C_values = [10**-1, 10**0, 10**1, 10**2, 10**3 ]
train_accuracy_values = np.empty(len(C_values))
test_accuracy_values = np.empty(len(C_values))

# For each different C value apply One_versus_all classification for training / test
for i in range(len(C_values)):
    SVM_train_predicted = np.empty((K , X_train.shape[0]))
    SVM_test_predicted = np.empty((K , x_test.shape[0]))
    for klass in range(5):
        train_scores, test_scores = one_versus_all(X_train, y_train, 10 , C_values[i] , klass+1, x_test)
        SVM_train_predicted[klass] = np.transpose(train_scores)
        SVM_test_predicted[klass] = np.transpose(test_scores)    
    train_y_predicted = np.argmax(SVM_train_predicted, axis = 0) + 1
    test_y_predicted = np.argmax(SVM_test_predicted, axis = 0) + 1
    train_accuracy = np.sum(np.array([y_train == train_y_predicted ]) * 1) / N_train
    test_accuracy = np.sum(np.array([y_test == test_y_predicted ]) * 1) / N_test
    train_accuracy_values[i] = train_accuracy
    test_accuracy_values[i] = test_accuracy

C_values_axis = np.log10(C_values) # log10 was used to fit the accuracy line in x_axis in plot
print("-----Train Accuracy per C-----\n", train_accuracy_values)
print("-----Test Accuracy per C-----\n", test_accuracy_values)


### 6. Plotting Accuracy Scores per C

In [None]:
# Plotting Accuracy/C
# C_values_axis = log10 of C values to fit X_axis
plt.figure(figsize = (10,8))
plt.plot(C_values_axis, train_accuracy_values, "b.-", label = "training", markersize= 12)
plt.plot(C_values_axis, test_accuracy_values, "r.-", label = "test", markersize= 12)
plt.xticks(C_values_axis, ["10^(-1)", "10^(0)", "10^(1)", "10^(2)", "10^(3)"])
plt.xlabel("Regularization Parameter (C)")
plt.ylabel("Accuracy")
plt.legend(loc="upper left")
plt.grid(True)
plt.show()