OSNN Algorithm:

In [None]:
from import_and_save import *
from centers_training import *
from width_update import *
from predict_function import *
from calc_pseudolabels import *
from weight_update import *
from calc_CEL import *
from evaluation_functions import *

import time

The function below is to remove unlabelled samples from the batch if we wish to do supervised learning

In [None]:
def remove_unlabelled_samples(batch):
    '''
    This function removes the unlabelled datapoints to convert the model into supervised learning.
    Returns an empty array with the correct shape if there are no labeled samples.
    '''
    filtered_batch = [sample for sample in batch if sample[-1] != -1]
    
    if len(filtered_batch) == 0:
        #return an empty array with the correct number of columns (e.g., 14)
        return np.empty((0, batch.shape[1]))
    
    return np.array(filtered_batch)

Below is the main OSNN algorithm

In [None]:
def OSNN(D, N, H, lam, alpha, beta, gamma, type=0):
    '''
    The main body of the OSNN algorithm. This model assumes that the number of neurons is less than the chunk size (i.e, H < N)

    parameters:
    - type : 1 for supervised, 0 for semisupervised
    - D : dataset
    - N : chunk size
    - H : number of neurons in the network
    - lam : manifold regularisation term
    - alpha : L2 regularisation term
    - beta : RBF_width
    - gamma : RBFN_width

    returns:
    - the predictions made, this is given as a numpy array structured as [predicted_probability, predicted_class, true_label, assigned_label]
    - the trained model at the final time step
        - The weights
        - The centers
        - The widths 
    '''

    #fix a seed
    np.random.seed(1)

    #initialise the algorithm
    t = 0
    C = np.empty((0, 14)) #there are 14 columns as each input has 14 attributes, change 14 to the appropriate amount
    w = np.empty((0, 0)) #an empty list
    batch = np.empty((0, 15)) #similar to C but two extra columns for 'contains_bug' and 'true_label', change 15 to the appropriate amount

    #initialise a list to store predicted values and its true label
    predictions = np.empty((0,4))

    while t < 10000: #while there is data remaining in the dataset

        if t%250 == 0: #to track the progress of the algorithm
            print(t)
        
        #set current batch
        if len(batch) < N: #if the size of the batch is less than the chunk size, 
            batch = np.vstack((batch, D[t][:-1])) #append the most recent sample
        else:
            batch = D[t-N+1:t+1,:-1] #otherwise set the batch to be the N most recent samples

        #if we are setting the model to be supervised only, we remove the unlabelled data in the batch
        if type == 1:
            batch = remove_unlabelled_samples(batch)

        #if the batch is empty:
        if len(batch) == 0: 
            #if there are no centers or weights, then there's nothing to predict so move to the next time step
            if C.size == 0 or w.size == 0:
                t += 1
            #if there are centers and weights and the batch is empty, then just predict the next sample using the current weights, 
            #centers and widths and move to the next time step.
            else:
                prob = predict(D[t+1], C, widths, w)
                s = 0 if prob < 0.5 else 1
                #store probability, prediction, true label, and 'contains bug' label
                predictions = np.vstack((predictions, [prob, s, D[t+1,-1], D[t+1,-2]]))
                t += 1

        #if the batch is not empty, then see if we need to add centers, add them, else, train them, update, predict, and move to the next time step
        else:
            
            if len(C) < H: 
                #while the number of centers is less than the number of neurons that we assigned
                #this is to ensure that we have enough centers for all the nodes of the network before we begin training
                
                #add that sample to the set of centers
                C = np.vstack((C, D[t][0:14])) #[0:14] ensures that only the attributes columns are added and the contains_bug and true label columns are excluded
                
                #initialise a new weight for that center
                w = np.append(w, np.random.normal(0, 0.1))
                
            else:
                #train centers
                C = train_centers(C, batch)
            
            #update the widths using the centers and the RBF width parameter, beta
            widths = update_widths(C, beta)
    
            #to calculate the pseudolabels and crossentropy loss, we require the predicted values of all samples in the batch, and the centers
            pred = predict_multiple(batch, C, widths, w)
            c_pred = predict_multiple(C, C, widths, w)
            
            #set learning rate eta = 1 and a stopping point epsilon. (epsilon is a user parameter that'll require experimenting with)
            eta = 1
            epsilon = 0.0039 # =2^-8
    
            while eta > epsilon:
                #calc pseudolabels using centers, batch,
                mu = pseudolabels_calc(C, batch, pred, c_pred, alpha, gamma)  
                
                #update the weights
                w_new = update_weights(w, batch, pred, mu, alpha, lam, C, widths, eta)
                pred_w_new = predict_multiple(batch, C, widths, w_new)
                
                #if the loss of the new weights is less than the loss of the current weights, set the new weights as the current weights
                if cross_entropy_loss(batch, pred_w_new, mu, w_new, alpha, lam) < cross_entropy_loss(batch, pred, mu, w, alpha, lam):
                    w = w_new
                    break #breaks out of the 'while eta > epsilon:' loop
                else:
                    eta = eta/2
    
            
            prob = predict(D[t+1], C, widths, w)
            s = 0 if prob < 0.5 else 1
            #store probability, prediction, true label, and 'contain's bug' label
            predictions = np.vstack((predictions, [prob, s, D[t+1,-1], D[t+1,-2]]))
    
            t += 1
    
    return predictions #, C, widths, w

Below is testing random numbers drawn from appropriate distributions for N, H, lam, alpha, beta, and gamma

First we choose and random seed to always draw the same parameters to test for both the supervised and semisupervised tests

Next I draw numbers for the parameter. The experiment is done with 50 different settings for each parameter, namely, N, H, lambda, alpha, beta, and gamma.

- N : uniformly from [2,160]
  
- H : uniformly from [1, max(⌈N/4⌉, 10)]

- lambda : uniformly from [0,1]

- alpha : uniformly from [0,1]

- beta : exponential distribution with mean = 1

- gamma : exponential distribution with mean = 2

Below is the results of drawing these random parameters with a fixed seed of 70

In [None]:
#fix seed
np.random.seed(70)

#select 50x integers for N from [2,150]
N_params = np.random.randint(2,160, 50)

#select 50x integers for H from [1, N]
H_params = []
for i in range(50):
    H_params.append(np.random.randint(1, max(np.ceil(N_params[i]/4), 10)))
H_params = np.array(H_params)

#select 50x floats for lambda and alpha from [0,1]
lam_params = np.random.uniform(0, 1, 50)
alpha_params = np.random.uniform(0, 1, 50)

#select 50x floats for beta from exp distib with mean = 1
beta_params = np.random.exponential(scale=1, size=50)

#select 50x floats for gamma from exp distib with mean = 2
gamma_params = np.random.exponential(scale=2, size=50)


#selecet
print(f"N's : \n {N_params}")
print(f"H's : \n {H_params}")
print(f"lambdas's : \n {lam_params}")
print(f"alpha's : \n {alpha_params}")
print(f"beta's : \n {beta_params}")
print(f"gamma's : \n {gamma_params}")

After testing, the best parameters for each model is the following:

semi-supervised:

N = 83, H = 8, lambda = 0.9972622972412019, alpha = 0.29875430006540116, beta = 0.027522971098209642, gamma = 0.5603235966001504

supervised:

N = 115, H = 28, lam = 0.7027851920001379, alpha = 0.30474592021652713, beta = 0.01810259997923652, gamma = 0.5005044156978288

Run the cell below to with the optimal parameters for supervised if you wish to do a supervised run, or vice versa for semi-supervised

In [None]:
#optimal parameters for supervised:
chunk_size = 115 #N
num_of_neurons = 28 #H
manifold_reg = 0.7027851920001379 #lam
L2_reg = 0.30474592021652713 #alpha
RBF_width = 0.01810259997923652 #beta
RBFN_width = 0.5005044156978288 #gamma

In [None]:
#optimal parameters for semisupervised:
'''
chunk_size = 83 #N
num_of_neurons = 8 #H
manifold_reg = 0.9972622972412019 #lam
L2_reg = 0.29875430006540116 #alpha
RBF_width = 0.027522971098209642 #beta
RBFN_width = 0.5603235966001504 #gamma
'''

Choose the dataset to experiment with

In [None]:
#choose dataset:
data = import_data("tomcat-Sort-PreProcess-minmax2-withfix.csv", delim=",")

Run the code below. Ensure that the 'type' parameter is 0 for semi-supversied, and 1 for supervised.

In [None]:
#Start the timer
start_time = time.perf_counter()

#predictions, centers, widths, weights = OSNN(type, data, chunk_size, num_of_neurons, manifold_reg, L2_reg, RBF_width, RBFN_width)
predictions = OSNN(data, chunk_size, num_of_neurons, manifold_reg, L2_reg, RBF_width, RBFN_width, type = 1)

#end the timer
end_time = time.perf_counter()

#calculate the elapsed time
elapsed_time = end_time - start_time
print(f"Time taken: {elapsed_time} seconds")

Save the results

In [None]:
#save it
save_to_CSV('brackets_predictions_supervised.csv', predictions)