In [3]:
#----------------------------------------------------------------------------- Preparing the dataset-----------------------------------------------------------------------------#
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from keras.datasets import cifar10
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist



# Load CIFAR-10 into our programm
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Class labels in CIFAR-10 dataset to make the project more general
labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# Select the two desired classes
class_1, class_2 = 3, 5
mask_train = (y_train.flatten() == class_1) | (y_train.flatten() == class_2)
mask_test = (y_test.flatten() == class_1) | (y_test.flatten() == class_2)

x_train, y_train = x_train[mask_train], y_train[mask_train]
x_test, y_test = x_test[mask_test], y_test[mask_test]

# Relabel classes to 0 and 1 
y_train = (y_train.flatten() == class_2).astype(int)
y_test = (y_test.flatten() == class_2).astype(int)

# Reshaping our dataset
x_train_flat = x_train.reshape(x_train.shape[0], -1)
x_test_flat = x_test.reshape(x_test.shape[0], -1)

# Standardize our data 
scaler = StandardScaler()
x_train_flat = scaler.fit_transform(x_train_flat)
x_test_flat = scaler.transform(x_test_flat)

# Apply PCA
pca = PCA(n_components=0.9)
x_train_pca = pca.fit_transform(x_train_flat)
x_test_pca = pca.transform(x_test_flat)


In [None]:
#----------------------------------------------------------------------------- RBF Network (with K-Means)-----------------------------------------------------------------------------#
class RBFNN:
    def __init__(self, num_centers, spread=None):
        self.num_centers = num_centers 
        self.spread = spread
        self.centers = None
        self.classifier = None
    
    def fit(self, x, y):
        #Choosing centers using Kmeans from scikit learn Library
        kmeans = KMeans(n_clusters=self.num_centers, random_state=100) 
        self.centers = kmeans.fit(x).cluster_centers_ 
        
        # Using RBF transformation for the dataset 
        rbf_transformed_features = np.exp(-cdist(x, self.centers)  ** 2 / (2 * (self.spread ** 2)))

        # Declaring and training our output layer with Logistic Reggression  
        self.classifier = LogisticRegression(solver='lbfgs', C = 0.8, max_iter=1500) # max_iter refers to limiting the training interations   
        self.classifier.fit(rbf_transformed_features, y)
        
    def predict(self, x):
        rbf_transformed_features = np.exp(-cdist(x, self.centers)  ** 2 / (2 * (self.spread ** 2)))
        return self.classifier.predict(rbf_transformed_features)
    
    

# Setting up and testing our RBF model
num_centers_list = [10, 100, 500, 1000, 3000, 5000, 8000, 10000] # Choosing the different values of centers number to train our model
spreads = [0.1, 1, 10, 50 , 80, 150, 300, 450, 600, 800, 1000, 1200, 1500, 2000]  # Choosing the different number of gaussian function spread to train our model


for num_centers in num_centers_list:
    print(f"##################################### centers: {num_centers} #####################################")
    # Declaring arrays for storing accuracies to help us build the plots
    train_accuracies = []
    test_accuracies = []

    for spread in spreads:
        print(f"------------------------------- Spread: {spread}-------------------------------")
        start_time = time.time() # Used to calculate execution time

        # Initializing our RBF network using the above functions
        rbf_net = RBFNN(num_centers=num_centers, spread=spread)
        rbf_net.fit(x_train_pca, y_train) 

        # Making predictions
        y_train_pred = rbf_net.predict(x_train_pca)
        y_test_pred = rbf_net.predict(x_test_pca)
        
        # Calculating training and testing accuracy
        train_accuracy = accuracy_score(y_train, y_train_pred)*100 
        test_accuracy = accuracy_score(y_test, y_test_pred)*100

        end_time = time.time() - start_time 

        train_accuracies.append(train_accuracy)
        test_accuracies.append(test_accuracy)

        print(f"Training time: {end_time:.2f} seconds")
        print(f"Training accuracy: {train_accuracy:.2f}%")
        print(f"Testing accuracy: {test_accuracy:.2f}%")

    # Ploting the spread-accuracies plot for the current number of centers 
    plt.figure()
    plt.plot(spreads, train_accuracies, marker='o', label='Training Accuracy') # Ploting the curve for the training  accuracy
    plt.plot(spreads, test_accuracies, marker='s', label='Testing Accuracy') # Ploting the curve for the test accuracy
    plt.title(f'Accuracy vs Spread for {num_centers} Centers') # Setting the title of the plot
    plt.xlabel('Spread') # Setting the label for x-axis
    plt.ylabel('Accuracy (%)')  # Setting the label for y-axis
    plt.xscale('log')  # Logarithmic scale for x axis - spread representation
    plt.ylim(0, 100)  # Accuracy ranges from 0 to 100
    plt.legend()
    plt.grid(True, which="both", linestyle="--", linewidth=0.5)
    plt.show()


In [None]:
#----------------------------------------------------------------------------- RBF Network (with Random Centroids)-----------------------------------------------------------------------------#
class RBFNN:
    def __init__(self, num_centers, spread=None):
        self.num_centers = num_centers 
        self.spread = spread
        self.centers = None
        self.classifier = None
    
    def fit(self, x, y):
        #Choosing centers using Kmeans from scikit learn Library
        random_indices = np.random.choice(x.shape[0], self.num_centers, replace=False)
        self.centers = x[random_indices]
        
        # Using RBF transformation for the dataset 
        rbf_transformed_features = np.exp(-cdist(x, self.centers)  ** 2 / (2 * (self.spread ** 2)))

        # Declaring and training our output layer with Logistic Reggression  
        self.classifier = LogisticRegression(solver='lbfgs', C = 0.8, max_iter=1500) # max_iter refers to limiting the training interations   
        self.classifier.fit(rbf_transformed_features, y)
        
    def predict(self, x):
        rbf_transformed_features = np.exp(-cdist(x, self.centers)  ** 2 / (2 * (self.spread ** 2)))
        return self.classifier.predict(rbf_transformed_features)
    
    

# Setting up and testing our RBF model
num_centers_list = [10, 100, 500, 1000, 3000, 5000, 8000, 10000] # Choosing the different values of centers number to train our model
spreads = [0.1, 1, 10, 50 , 80, 150, 300, 450, 600, 800, 1000, 1200, 1500, 2000]  # Choosing the different number of gaussian function spread to train our model


for num_centers in num_centers_list:
    print(f"##################################### centers: {num_centers} #####################################")
    # Declaring arrays for storing accuracies to help us build the plots
    train_accuracies = []
    test_accuracies = []

    for spread in spreads:
        print(f"------------------------------- Spread: {spread}-------------------------------")
        start_time = time.time() # Used to calculate execution time

        # Initializing our RBF network using the above functions
        rbf_net = RBFNN(num_centers=num_centers, spread=spread)
        rbf_net.fit(x_train_pca, y_train) 

        # Making predictions
        y_train_pred = rbf_net.predict(x_train_pca)
        y_test_pred = rbf_net.predict(x_test_pca)
        
        # Calculating training and testing accuracy
        train_accuracy = accuracy_score(y_train, y_train_pred)*100 
        test_accuracy = accuracy_score(y_test, y_test_pred)*100

        end_time = time.time() - start_time 

        train_accuracies.append(train_accuracy)
        test_accuracies.append(test_accuracy)

        print(f"Training time: {end_time:.2f} seconds")
        print(f"Training accuracy: {train_accuracy:.2f}%")
        print(f"Testing accuracy: {test_accuracy:.2f}%")

    # Ploting the spread-accuracies plot for the current number of centers 
    plt.figure()
    plt.plot(spreads, train_accuracies, marker='o', label='Training Accuracy') # Ploting the curve for the training  accuracy
    plt.plot(spreads, test_accuracies, marker='s', label='Testing Accuracy') # Ploting the curve for the test accuracy
    plt.title(f'Accuracy vs Spread for {num_centers} Centers') # Setting the title of the plot
    plt.xlabel('Spread') # Setting the label for x-axis
    plt.ylabel('Accuracy (%)')  # Setting the label for y-axis
    plt.xscale('log')  # Logarithmic scale for x axis - spread representation
    plt.ylim(0, 100)  # Accuracy ranges from 0 to 100
    plt.legend()
    plt.grid(True, which="both", linestyle="--", linewidth=0.5)
    plt.show()

        