## Exploratory Research in Neural Networks and SVMs

In [None]:
import numpy as np
import mnist_reader
import sklearn as sk
import matplotlib.pyplot as plt

## Data Preparation:

In [None]:
X_train, y_train = mnist_reader.load_mnist('fashion', kind='train')
X_test, y_test = mnist_reader.load_mnist('fashion', kind='t10k')

print(X_train.shape)
print(X_test.shape)

In [None]:
# Filtering the samples but the ones with labels 5 and 7

mask_train = (y_train == 5) | (y_train ==7)
X_train = X_train[mask_train]
y_train = y_train[mask_train]

mask_test = (y_test == 5) | (y_test ==7)
X_test = X_test[mask_test]
y_test = y_test[mask_test]

In [None]:
# Changing Label 5 and 7 to 0 and 1 respectively

y_train = np.where(y_train == 5, 0, 1)
y_test = np.where(y_test == 5, 0, 1)

print(X_train.shape)
print(X_test.shape)

In [None]:
# Normalizing each feature vector to its unit form

epsilon = 1e-10

norm_train = np.linalg.norm(X_train, axis=1, keepdims=True)
norm_train = np.where(norm_train == 0, epsilon, norm_train)
X_train = X_train/norm_train

norm_test = np.linalg.norm(X_test, axis=1, keepdims=True)
norm_test = np.where(norm_test == 0, epsilon, norm_test)
X_test = X_test/norm_test


In [None]:
# Splitting the current data set to get a validation set

X_train, X_validation, y_train, y_validation = sk.model_selection.train_test_split(X_train, y_train, train_size = 0.5, test_size=0.2, random_state = 1)

print(X_train.shape)
print(X_validation.shape)

In [None]:
# Adding noise to the data set

noise_mask_train = np.random.rand(len(y_train)) < 0.2
y_train_noisy = np.copy(y_train)
y_train_noisy[noise_mask_train] = np.where(y_train_noisy[noise_mask_train] == 0, 1, 0)

## Methods

In [None]:
# k-fold cross-validation training function

def k_fold_training(x, y, model, k):

    fold_size = len(x)//k

    indices = np.arange(len(x))

    np.random.shuffle(indices)

    scores = []

    for i in range(k):
        testing_indices = indices[i*fold_size : (i+1)*fold_size]
        training_indices = np.concatenate((indices[:i*fold_size], indices[(i+1)*fold_size:]))

        x_training, x_testing = x[training_indices], x[testing_indices]
        y_training, y_testing = y[training_indices], y[testing_indices]

        model.fit(x_training, y_training)

        accuracy = model.score(x_training, y_training)
        scores.append(accuracy)
        
    return np.mean(scores)

In [None]:
# Function to train svm from a list of C or Gamma values

def train_svm(c_values = None, gamma_values = None, kernel:str = 'linear'):

    training_scores = []
    validation_scores = []

    if kernel == 'linear':
        for c in c_values:
            linear_svm = sk.svm.SVC(kernel = kernel, C = c)
            linear_svm.fit(X_train, y_train_noisy)
            
            validation_scores.append(linear_svm.score(X_validation, y_validation))
            training_scores.append(linear_svm.score(X_train, y_train))

            print(f'Scores for C={c} already processed')

    elif (kernel == 'gaussian') or (kernel == 'rbf'):
        for gamma in gamma_values:
            gaussian_svm = sk.svm.SVC(kernel = kernel, gamma = gamma)
            gaussian_svm.fit(X_train, y_train_noisy)
            
            validation_scores.append(gaussian_svm.score(X_validation, y_validation))
            training_scores.append(gaussian_svm.score(X_train, y_train))

            print(f'Scores for Gamma={gamma} already processed')

    else:
        raise ValueError("Unsupported kernel. Please use 'linear' or 'gaussian'.")

    return training_scores, validation_scores

In [None]:
# Fuction that uses "k_fold_training" to compare different values of C or Gamma

def cross_validation(models_list):
    models_scores = []
    count = 0
    for model in models_list:
        models_scores.append(k_fold_training(X_train, y_train_noisy, model, 5))
        count += 1
        print(f'Model number {count} processed')
    return models_scores

In [None]:
# Testing C or Gamma values

def test_svm(c_values = None, gamma_values = None, kernel:str = 'linear'):

    X_combined = np.vstack((X_train, X_validation))
    y_combined = np.concatenate((y_train, y_validation), axis=0)

    
    training_scores = []
    test_scores = []

    if kernel == 'linear':
        for c in c_values:
            linear_svm = sk.svm.SVC(kernel = kernel, C = c)
            linear_svm.fit(X_combined, y_combined)

            test_scores.append(linear_svm.score(X_test, y_test))
            training_scores.append(linear_svm.score(X_train, y_train))
            
            

            print(f'Scores for C={c} were processed')

    elif (kernel == 'gaussian') or (kernel == 'rbf'):
        for gamma in gamma_values:
            gaussian_svm = sk.svm.SVC(kernel = kernel, gamma = gamma)
            gaussian_svm.fit(X_combined, y_combined)
            
            test_scores.append(gaussian_svm.score(X_test, y_test))
            training_scores.append(gaussian_svm.score(X_train, y_train))

            print(f'Scores for Gamma={gamma} were processed')

    else:
        raise ValueError("Unsupported kernel. Please use 'linear' or 'gaussian'.")

    return training_scores, test_scores

# Experiments and Analysis
**SVM with linear kernel**

In [None]:
# Preparing the list of C values to be tested

C_values = [0.001]
for val in range(1,10):
    C_values.append(C_values[0]*(4**val))
    
print(C_values)

In [None]:
# Training SVMs with the list of Cs

training_scores, validation_scores = train_svm(C_values, kernel = 'linear')
print (training_scores)
print (validation_scores)

In [None]:
# Plotting the error in training and validation sets 

plt.figure(figsize=(10, 6))

plt.plot(C_values, training_scores, label='Training Score', marker='o', linestyle='-', color='blue')
    
plt.plot(C_values, validation_scores, label='Validation Score', marker='o', linestyle='--', color='red')
    

plt.xlabel('C Values')
plt.ylabel('Score')
plt.title('SVM Scores vs C Values')
plt.legend()
plt.grid(True)
plt.xscale('log')
plt.show()

In [None]:
# Using K-fold training for the best 5 C values

best_Cs = [0.512, 1.024, 2.048, 4.096, 8.192]
models = []
for c in best_Cs:
    linear_svm = sk.svm.SVC(kernel = 'linear', C = c)
    models.append(linear_svm)
    
models_scores = cross_validation(models)

print(models_scores)

In [None]:
# Plotting the error in training and test sets

plt.figure(figsize=(10, 6))

plt.plot(best_Cs, models_scores, label='Training Score', marker='o', linestyle='-', color='blue')    

plt.xlabel('C Values')
plt.ylabel('Score')
plt.title('SVM Scores vs C Values Using K-fold Cross-Validation')
plt.legend()
plt.grid(True)
plt.xscale('log')
plt.show()

In [None]:
# Testing Values of C

best_Cs_test = [0.512, 1.024, 2.048, 4.096, 8.192, 16.384]

training_scores, test_scores = test_svm(c_values = best_Cs_test, kernel = 'linear')

print (training_scores, test_scores)

In [None]:
plt.figure(figsize=(10, 6))

plt.plot(best_Cs_test, training_scores, label='Training Score', marker='o', linestyle='-', color='blue')
plt.plot(best_Cs_test, test_scores, label='Testing Score', marker='o', linestyle='--', color='red')    


plt.xlabel('C Values')
plt.ylabel('Score')
plt.title('SVM Scores vs C Values in the Full Training and Test Sets')
plt.legend()
plt.grid(True)
plt.xscale('log')
plt.show()

**SVM with gaussian kernels**

In [None]:
# Preparing the list of C values to be tested

C_values = [0.001]
for val in range(1,10):
    C_values.append(C_values[0]*(4**val))
    
print(C_values)

In [None]:
# Preparing models with different C values

gaussian_svms_Cs = []

for c in C_values:
    gaussian_svm_C = sk.svm.SVC(kernel = 'rbf', C = c)
    gaussian_svms_Cs.append(gaussian_svm_C)

gaussian_svms_Cs_scores = cross_validation(gaussian_svms_Cs)

In [None]:
# Plotting C values scores results in K-fold Cross-validation for Gaussian SVMs

plt.figure(figsize=(10, 6))

plt.plot(C_values, gaussian_svms_Cs_scores, label='Scores for C values', marker='o', linestyle='-', color='blue')


plt.xlabel('C Values')
plt.ylabel('Score')
plt.title('Gaussian SVM Scores vs C Values using K-fold Cross-validation')
plt.legend()
plt.grid(True)
plt.xscale('log')
plt.show()

In [None]:
# Preparing the list of γ values to be tested

γ_values = [0.001, 0.064, 1.024, 4.096]

In [None]:
# Each γ value is going to trained with the following Cs

test_Cs = [1.024, 2.048, 4.096]
gaussian_svms = []

for γ in γ_values:
    for c in test_Cs:
        gaussian_svm = sk.svm.SVC(kernel = 'rbf', C = c, gamma = γ)
        gaussian_svms.append(gaussian_svm)

gaussian_svms_scores = cross_validation(gaussian_svms)

print(gaussian_svms_scores)

In [None]:
# Plotting γ and C combinations results

plt.figure(figsize=(10, 6))

plt.plot(test_Cs, gaussian_svms_scores[0:3], label='Scores for γ = 0.001', marker='o', linestyle='-', color='blue')
plt.plot(test_Cs, gaussian_svms_scores[3:6], label='Scores for γ = 0.064', marker='o', linestyle='--', color='red')
plt.plot(test_Cs, gaussian_svms_scores[6:9], label='Scores for γ = 1.024', marker='o', linestyle=':', color='green')
plt.plot(test_Cs, gaussian_svms_scores[9:], label='Scores for γ = 4.096', marker='o', linestyle='-.', color='purple')
  

plt.xlabel('C Values')
plt.ylabel('Score')
plt.title('Gaussian SVM Scores vs C Values for each γ value using K-fold Cross-Validation')
plt.legend()
plt.grid(True)
plt.xscale('log')
plt.show()

In [None]:
# Creating function to test best tuned SVMs

def test_gaussian_svms(models):
    X_combined = np.vstack((X_train, X_validation))
    y_combined = np.concatenate((y_train, y_validation), axis=0)

    
    training_scores = []
    test_scores = []

    for model in models:
        model.fit(X_combined, y_combined)
        test_scores.append(model.score(X_test, y_test))
        training_scores.append(model.score(X_train, y_train))
        
    return training_scores, test_scores

In [None]:
# Preparing Models for testing

best_Cs_gaussian = [0.256, 1.024, 4.096, 16.384]
best_γs_gaussian = [0.256, 1.024, 4.096, 16.384]

svms_0 = []
svms_1 = []
svms_4 = []
svms_16 = []

for c in best_Cs_gaussian:
    if c == 0.256:
        for γ in best_γs_gaussian:
            gaussian_svm = sk.svm.SVC(kernel = 'rbf', C = c, gamma = γ)
            svms_0.append(gaussian_svm)
    
    elif c == 1.024:
        for γ in best_γs_gaussian:
            gaussian_svm = sk.svm.SVC(kernel = 'rbf', C = c, gamma = γ)
            svms_1.append(gaussian_svm)
            
    elif c == 4.096:
        for γ in best_γs_gaussian:
            gaussian_svm = sk.svm.SVC(kernel = 'rbf', C = c, gamma = γ)
            svms_4.append(gaussian_svm)
            
    else:
        for γ in best_γs_gaussian:
            gaussian_svm = sk.svm.SVC(kernel = 'rbf', C = c, gamma = γ)
            svms_16.append(gaussian_svm)

print(len(svms_0), len(svms_1), len(svms_4), len(svms_16))

In [None]:
svms_0_training_scores, svms_0_test_scores = test_gaussian_svms(svms_0)
svms_1_training_scores, svms_1_test_scores = test_gaussian_svms(svms_1)
svms_4_training_scores, svms_4_test_scores = test_gaussian_svms(svms_4)
svms_16_training_scores, svms_16_test_scores = test_gaussian_svms(svms_16)


print(svms_0_training_scores, svms_0_test_scores)
print(svms_1_training_scores, svms_1_test_scores)
print(svms_4_training_scores, svms_4_test_scores)
print(svms_16_training_scores, svms_16_test_scores)

In [None]:
plt.figure(figsize=(10, 6))

plt.plot(best_γs_gaussian, svms_0_test_scores, label='Scores for C = 0.256', marker='o', linestyle='-', color='blue')
plt.plot(best_γs_gaussian, svms_1_test_scores, label='Scores for C = 1.024', marker='o', linestyle='--', color='red')
plt.plot(best_γs_gaussian, svms_4_test_scores, label='Scores for C = 4.096', marker='o', linestyle=':', color='green')
plt.plot(best_γs_gaussian, svms_16_test_scores, label='Scores for C = 16.384', marker='o', linestyle='-.', color='purple')
  

plt.xlabel('γ Values')
plt.ylabel('Score')
plt.title('Gaussian SVM Scores vs γ Values for each C value in the Test Set')
plt.legend()
plt.grid(True)
plt.xscale('log')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))

plt.plot(best_γs_gaussian, svms_0_training_scores, label='Scores for C = 0.256', marker='o', linestyle='-', color='blue')
plt.plot(best_γs_gaussian, svms_1_training_scores, label='Scores for C = 1.024', marker='o', linestyle='--', color='red')
plt.plot(best_γs_gaussian, svms_4_training_scores, label='Scores for C = 4.096', marker='o', linestyle=':', color='green')
plt.plot(best_γs_gaussian, svms_16_training_scores, label='Scores for C = 16.384', marker='o', linestyle='-.', color='purple')
  

plt.xlabel('γ Values')
plt.ylabel('Score')
plt.title('Gaussian SVM Scores vs γ Values for each C value in the Full Training set')
plt.legend()
plt.grid(True)
plt.xscale('log')
plt.show()

**MLPCs**

In [None]:
# Function to train MLPCs from a list of Alpha values

def train_MLPC(alphas = None):

    training_scores = []
    validation_scores = []
    
    for alpha in alphas:
        mlpc = sk.neural_network.MLPClassifier(alpha = alpha)
        mlpc.fit(X_train, y_train_noisy)
        
        validation_scores.append(mlpc.score(X_validation, y_validation))
        training_scores.append(mlpc.score(X_train, y_train_noisy))
        
        print(f'Scores for alpha={alpha} were processed')

    return training_scores, validation_scores

In [None]:
# Testing alpha values

alphas = [0.001, 0.004, 0.016, 0.064, 0.256, 1.024, 4.096, 16.384, 65.536, 262.144]

training_scores, validation_scores = train_MLPC(alphas)

print (training_scores, validation_scores)

In [None]:
plt.figure(figsize=(10, 6))

plt.plot(alphas, validation_scores, label='Validation Score', marker='o', linestyle='-', color='blue')
plt.plot(alphas, training_scores, label='Training Score', marker='o', linestyle='--', color='red')
  

plt.xlabel('Alpha Values')
plt.ylabel('Score')
plt.title('MLPCs Scores vs Alpha Values in Validation and Training sets')
plt.legend()
plt.grid(True)
plt.xscale('log')
plt.show()

In [None]:
# Creating models with different activation function and hidden layer sizes

functions = ['logistic', 'tanh', 'relu']
hidden_layer_sizes = [(100,), (50,50), (25,75,25), (25,50,50,25), (25,25,25,25)]

logistic_models = []
tanh_models = []
relu_models = []

for function in functions:
    if function == 'logistic':
        for hidden_layer_size in hidden_layer_sizes:
            mlpc = sk.neural_network.MLPClassifier(hidden_layer_sizes = hidden_layer_size, activation = function)
            logistic_models.append(mlpc)

    elif function == 'tanh':
        for hidden_layer_size in hidden_layer_sizes:
            mlpc = sk.neural_network.MLPClassifier(hidden_layer_sizes = hidden_layer_size, activation = function)
            tanh_models.append(mlpc)

    elif function == 'relu':
        for hidden_layer_size in hidden_layer_sizes:
            mlpc = sk.neural_network.MLPClassifier(hidden_layer_sizes = hidden_layer_size, activation = function)
            relu_models.append(mlpc)

    else:
        raise ValueError("Unsupported activation function. Please use 'logistic', 'tanh' or 'relu'.")
        
    
print(len(logistic_models), len(tanh_models), len(relu_models))

In [None]:
# Training MLPCs using K-fold Cross-Validation

logistic_models_scores = cross_validation(logistic_models)
tanh_models_scores = cross_validation(tanh_models)
relu_models_scores = cross_validation(relu_models)

print(logistic_models_scores)
print(tanh_models_scores)
print(relu_models_scores)

In [None]:
x_axis = np.arange(1, 6)

plt.figure(figsize=(10, 6))

plt.plot(x_axis, logistic_models_scores, marker='o', linestyle='-', label='logistic')
plt.plot(x_axis, tanh_models_scores, marker='s', linestyle='--', label='tanh')
plt.plot(x_axis, relu_models_scores, marker='^', linestyle=':', label='relu')

plt.xlabel('Number of Hidden Layers')
plt.ylabel('Accuracy')
plt.title('MLPCs Performance with Different Activation Functions and Hidden Layer Configurations')
plt.legend()
plt.grid(True)

plt.show()

In [None]:
# Creating function to test MLPCs in the full training and test sets

def test_mlpcs(models):
    X_combined = np.vstack((X_train, X_validation))
    y_combined = np.concatenate((y_train, y_validation), axis=0)

    
    training_scores = []
    test_scores = []

    for model in models:
        model.fit(X_combined, y_combined)
        test_scores.append(model.score(X_test, y_test))
        training_scores.append(model.score(X_train, y_train))
        
    return training_scores, test_scores

In [None]:
# Building MLPCs with different values for epocs(max_inter) and learning_rate

max_interations = [50, 100, 200, 300, 500]
learning_rates = ['constant', 'invscaling', 'adaptive']

max_interations_mlpcs = []
learning_rates_mlpcs = []

for iterations in max_interations:
    mlpc = sk.neural_network.MLPClassifier(max_iter = iterations, activation = 'relu', hidden_layer_sizes = (50,50))
    max_interations_mlpcs.append(mlpc)

for learning_rate in learning_rates:
    mlpc = sk.neural_network.MLPClassifier(solver='sgd', learning_rate = learning_rate, activation = 'relu', hidden_layer_sizes = (50,50))
    learning_rates_mlpcs.append(mlpc)

print(len(max_interations_mlpcs))
print(len(learning_rates_mlpcs))

In [None]:
max_interations_mlpcs_training_scores, max_interations_mlpcs_test_scores = test_mlpcs(max_interations_mlpcs)
learning_rates_mlpcs_training_scores, learning_rates_mlpcs_test_scores = test_mlpcs(learning_rates_mlpcs)

print(max_interations_mlpcs_test_scores)
print(learning_rates_mlpcs_test_scores)

In [None]:
# Plotting reaults
plt.figure(figsize=(12, 8))

# Subplot 1: Max Iterations
plt.subplot(2, 1, 1)
plt.plot(max_interations, max_interations_mlpcs_test_scores, marker='o', linestyle='-', color='blue')
plt.xlabel('Max Iterations')
plt.ylabel('Accuracy')
plt.title('MLPC Accuracy for Different Max Iterations')
plt.grid(True)

# Subplot 2: Learning Rates
plt.subplot(2, 1, 2)
plt.plot(learning_rates, learning_rates_mlpcs_test_scores, marker='s', linestyle='--', color='red')
plt.xlabel('Learning Rate Strategy')
plt.ylabel('Accuracy')
plt.title('MLPC Accuracy for Different Learning Rate Strategies')
plt.grid(True)

plt.tight_layout()
plt.show()

**Final Comparison**

In [None]:
# Configuring model's parameters

best_mlpc = sk.neural_network.MLPClassifier(max_iter = 100, activation = 'relu', hidden_layer_sizes = (50,50), solver = 'sgd', learning_rate = 'adaptive', alpha = 0.256)
best_gaussian_svm = sk.svm.SVC(kernel = 'rbf', C = 4.096, gamma = 4.096)
best_linear_svm = sk.svm.SVC(kernel = 'linear', C = 4.096)

best_models_list = [best_mlpc, best_gaussian_svm, best_linear_svm]

In [None]:
# Loading data again to train in the full and not noisy set 

X_train, y_train = mnist_reader.load_mnist('fashion', kind='train')
X_test, y_test = mnist_reader.load_mnist('fashion', kind='t10k')

mask_train = (y_train == 5) | (y_train ==7)
X_train = X_train[mask_train]
y_train = y_train[mask_train]

mask_test = (y_test == 5) | (y_test ==7)
X_test = X_test[mask_test]
y_test = y_test[mask_test]

y_train = np.where(y_train == 5, 0, 1)
y_test = np.where(y_test == 5, 0, 1)

epsilon = 1e-10

norm_train = np.linalg.norm(X_train, axis=1, keepdims=True)
norm_train = np.where(norm_train == 0, epsilon, norm_train)
X_train = X_train/norm_train

norm_test = np.linalg.norm(X_test, axis=1, keepdims=True)
norm_test = np.where(norm_test == 0, epsilon, norm_test)
X_test = X_test/norm_test

print(X_train.shape)
print(X_test.shape)

In [None]:
# Testing best models

best_models_training_scores = []
best_models_test_scores = []

for model in best_models_list:
    model.fit(X_train, y_train)
    best_models_test_scores.append(model.score(X_test, y_test))
    best_models_training_scores.append(model.score(X_train, y_train))

print(best_models_training_scores)
print(best_models_test_scores)

In [None]:
plt.figure(figsize=(10, 6))

plt.plot(['Best MLPC', 'Best Gaussian SVM', 'Best Linear SVM'], best_models_test_scores, marker='o', linestyle='-', color='blue', label = 'Test Set')
plt.plot(['Best MLPC', 'Best Gaussian SVM', 'Best Linear SVM'], best_models_training_scores, marker='o', linestyle='--', color='red', label = 'Training Set')

plt.xlabel('Best Models')
plt.ylabel('Score')
plt.title('Scores by Models in the Full Training and Test sets')
plt.legend()
plt.grid(True)
plt.show()