Mini Project 2: Wen Xuan Wu, Chen Hu, Talise Wang



In [None]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from IPython.core.debugger import set_trace
import warnings
warnings.filterwarnings('ignore')
import random

## **Datasets**

Load Digits Dataset

In [None]:
#import datasets
from sklearn.datasets import load_digits
digits_x, digits_y = load_digits(return_X_y=True)
digits_classes=10
print(digits_x.shape)

(1797, 64)


Load Iris Dataset

In [None]:
from sklearn.datasets import fetch_openml
iris_x, iris_y = fetch_openml(name='iris', version=1, return_X_y=True)
iris_label_map = {k: v for v, k in enumerate(sorted(list(set(iris_y))))}
vectorize = np.vectorize(lambda map_, x: map_[x])
iris_y = vectorize(iris_label_map, iris_y) 
print(iris_x.shape, iris_y.shape, np.unique(iris_y))

(150, 4) (150,) [0 1 2]


Implementing SoftmaxRegression Class

In [None]:
def oneHot(y, num_classes):
#one hot encoding
  one_hot = np.zeros((len(y), num_classes))
  one_hot[np.arange(len(y)), y] = 1
  return one_hot

In [None]:

#implement the softmax operation
def softmax(x):
    x_exp = np.exp(x)
    partition = x_exp.sum(1, keepdims=True)
    return x_exp / partition 

class SoftmaxRegression:
    def __init__(self, num_classes, add_bias=True):
        self.add_bias = add_bias
        self.num_classes=num_classes
        self.w=None
            
    def fit(self, xTrained, yTrained, xVal, yVal, optimizer):
        if self.add_bias:
            N1 = xTrained.shape[0]
            xTrained = np.column_stack([xTrained,np.ones(N1)])    #add bias by adding a constant feature of value 1
            N2 = xVal.shape[0]
            xVal = np.column_stack([xVal,np.ones(N2)])
        N,D = xTrained.shape
        def gradient(x, y, w):
            N=x.shape[0]
            y_mat= oneHot(y,self.num_classes)                        # define the gradient function
            yh = softmax(x @ w)         
            #cost is mean ove N points
            grad = np.dot(x.T,(yh-y_mat))/N
            return grad
        w0 = np.zeros((D,self.num_classes))                                # initialize the weights to 0
        #optimizer here may change the var in it depends on the gradient
        self.w, history, best_dict = optimizer.run(gradient, xTrained, yTrained,xVal,yVal, w0, self.num_classes)      # run the optimizer to get the optimal weights
        return history, best_dict
    
    def predict(self, x):
        N=x.shape[0]
        if self.add_bias:
            x = np.column_stack([x,np.ones(N)])
        yh = x@self.w    #predict y values
        yh = softmax(yh) 
        return yh

Implementing GradientDescent Class

In [None]:
from sklearn.metrics import mean_squared_error
import pandas as pd

In [None]:
class GradientDescent:
  #initialization function
  def __init__(self, learning_rate = 0.1,momentum_para = 0.95, batch_size = 16, termination_con = 20, l2_regularization = 0.1, record_history=False):
    #def __init__(self, learning_rate, max_iters, epsilon, batch_size, momentum_para):
    self.learning_rate = learning_rate
    self.record_history = record_history
    self.batch_size = batch_size
    self.momentum_para = momentum_para
    self.termination_con = termination_con
    self.l2_regularization = l2_regularization
    self.epsilon = 1e-6
 
  #create_mini_batch func
  def create_mini_batch(self, x):
    lst = list(range(len(x)))
    np.random.shuffle(lst)
    return np.array_split(lst, self.batch_size)
    
  #helper method to calculate accuracy/err
  def error(self, x, y, w):
    return mean_squared_error(np.argmax(softmax(np.matmul(x, w)), axis=1), y)

  #accuracy function
  def prediction_accuracy(y_predicted, y_actual):
    n_accurate = 0
    for i in range(0, len(y_predicted)):
      if (y_predicted[i] == y_actual[i]):
        n_accurate +=1
    return n_accurate/len(y_predicted)
  
 #implementing cost function
  def cost(self, x, y, w, k):
    z = np.matmul(x, w)
    z -= np.max(z)
    return np.mean(-1 * np.sum(oneHot(y, k) * z - np.log(np.sum(np.exp(z)))))

  #implementing run function
  def run(self,gradient_fn, xT, yT, xV, yV, w, num_classes):
    grad = np.inf
    validation_best = np.inf
    train_best = np.inf
    cost_best = np.inf
    best_iteration = 0
    it = 1
    i = 0
    Vw = 0
    beta = self.momentum_para
    Opt_w = w.copy()
    termination = False
    #initialize history 
    history = {
        'iteration_no':[],
        'val_error':[],
        'cost':[],
        'train_error':[]
    }
 
    #Task 2.2 termination condition
    while not termination:
      mini_batches_indx = self.create_mini_batch(xT)      
      for mini_batch in mini_batches_indx:
        x_train_mini = xT[mini_batch] 
        y_train_mini = yT[mini_batch]

        #we calculate grad with momentmum
        grad = gradient_fn(x_train_mini, y_train_mini, w)
        grad += (np.linalg.norm(w) ** 2) * self.l2_regularization * 0.5
        Vw = beta*Vw + (1-beta)*grad
        w = w - self.learning_rate*Vw

        #track the error and cost
        validation_error = mean_squared_error(np.argmax(softmax(np.matmul(xV, w)), axis=1), yV)
        train_error = mean_squared_error(np.argmax(softmax(np.matmul(x_train_mini, w)), axis=1), y_train_mini)
        cur_cost = self.cost(x_train_mini, y_train_mini, w, num_classes)

        if(self.record_history):
          history['iteration_no'].append(it)
          history['val_error'].append(validation_error)
          history['cost'].append(cur_cost)
          history["train_error"].append(train_error)
        
        #Stop function and return the best w
        #if val err has hasn't been decreased in the past 20 steps, stop
        #used to count the iteration numbers
        it += 1
        #find the optimum w

        # print(validation_best)
        if validation_best - validation_error > self.epsilon :          
          validation_best = validation_error
          train_best = train_error
          cost_best = cur_cost
          best_iteration = it - 1
          Opt_w = w.copy()
          #reset index
          i = 0
        else:
          i += 1
          if i >=  self.termination_con:
            termination = True
            #history['iteration_no'].append(i)
            # print("break")
            break;

    
    best_dict = {
        'val_err_best': validation_best,
        'train_err_best': train_best,
        'cost_best': cost_best,
        'last_iteration': best_iteration
    }

    history_df = pd.DataFrame(data=history, columns=['iteration_no','val_error', 'train_error', 'cost'])
    #return the best w
    return Opt_w, history_df, best_dict

       
    


Analysis for the hyper-parameters of the optimization procedure

In [None]:
# imports
import math
import random
import plotly.express as px
import plotly.graph_objects as go

Defining mean squared error function

In [None]:
# mean squared error function
def valid_err_calc(xV, yV, w):
  return mean_squared_error(np.argmax(softmax(np.matmul(xV, w)), axis=1), yV)

Defining the function to get average number of iterations, cost, val/train error

In [None]:
# get the average number of iterations, cost, validation error, training error
def get_CVT(k_fold_info_dict):
  # last_costs = np.zeros(5)
  # last_val_err = np.zeros(5)
  # last_train_err = np.zeros(5)
  # last_iterations = np.zeros(5)

  # for i in range(0, len(k_fold_history)):
  #   obj = k_fold_history[i].tail(1)
  # #   last_costs[i] = obj['cost']
  # #   last_val_err[i] = obj['val_error']
  # #   last_train_err[i] = obj['train_error']
  #   last_iterations[i] = obj['iteration_no']

  return {
      'iterations': np.average(k_fold_info_dict['last_iteration']),
      'cost': np.average(k_fold_info_dict['cost_best']),
      'val_err': np.average(k_fold_info_dict['val_err_best']),
      'train_err':np.average(k_fold_info_dict['train_err_best'])
  }

Defining the accuracy function

In [None]:
# accuracy function
def prediction_accuracy(y_predicted, y_actual):
  n_accurate = 0
  for i in range(0, len(y_predicted)):
    if (y_predicted[i] == y_actual[i]):
      n_accurate +=1
  
  return n_accurate/len(y_predicted)

Defining the kFold function

In [None]:
def kFold(x,y,k=5):
  data=list(range(len(x)))
  np.random.shuffle(data)
  folds=np.array_split(data,k)
  i=0
  while i<k:
    test_data=folds[i]
    trained_data=np.concatenate(*[folds[:i] + folds[i+1:]]).flatten()
    i=i+1
    yield (x[trained_data], y[trained_data]), (x[test_data], y[test_data])

Defining boundary and run_5_fold function

In [None]:
def boundary(list):
    "Takes in probabilities array, returns array with classifications"
    # for i in range(0, list.shape[0]):
    classification = np.argmax(list, axis=1)
    return classification

def run_5_fold(x, y, model, optimizer):
    history_list = []

    dict_info = {   
      'val_y_predicted':  [],
      'val_class_predicted': [],
      'val_class_true': [],
      'val_accuracy_list': [],
      'val_err_best': [],

      'train_y_predicted': [],
      'train_class_predicted': [],
      'train_class_true': [],
      'train_accuracy_list': [],
      'train_err_best': [],

      'cost_best':[],
      'last_iteration':[]
    }

    i=0
    for (x_train, y_train), (x_test, y_test) in kFold(x, y,5):
      history, best_dict = model.fit(x_train, y_train, x_test, y_test, optimizer)
      history_list.append(history)

      dict_info['val_err_best'].append(best_dict['val_err_best'])
      dict_info['train_err_best'].append(best_dict['train_err_best'])
      dict_info['cost_best'].append(best_dict['cost_best'])
      dict_info['last_iteration'].append(best_dict['last_iteration'])

      # predict the y values
      val_y_predict = model.predict(x_test)
      dict_info['val_y_predicted'].append(val_y_predict)

      train_y_predict = model.predict(x_train)
      dict_info['train_y_predicted'].append(train_y_predict)

      # get the class for each 
      val_result = boundary(val_y_predict)
      train_result = boundary(train_y_predict)

      dict_info['val_class_predicted'].append(val_result)
      dict_info['val_class_true'].append(y_test)

      dict_info['train_class_predicted'].append(train_result)
      dict_info['train_class_true'].append(y_train)

      dict_info['val_accuracy_list'].append(prediction_accuracy(val_result, y_test))
      dict_info['train_accuracy_list'].append(prediction_accuracy(train_result, y_train))
      i+=1
    return dict_info, history_list

Hyper-parameters of the optimization procedure

In [None]:
# Setting up the Hyperparameters lists
# batch sizes
batch_sizes = [8, 16, 32, 64, 128, 256]
batch_sizes = np.array(batch_sizes)
# batch_sizes = np.geomspace(4, 128, num=6)
# learning rates
learning_rates = []
learning_rates = np.linspace(0.01, 0.2, 15)
# momentum parameters
nb_momentums = 8
momentums = []
momentums = np.linspace(0.85, 0.99, 12)



Analysis of the Digit Dataset

In [None]:
#test the SoftmaxRegression model for digits dataset with randomly picked parameters
model = SoftmaxRegression(digits_classes)
# Our 3 Hyper-Parameters:
# Batch size
bs = 8
# Learning Rate
lr = 0.0472
# momentum
momentum = 0.939
optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1, record_history=True)
info_dict, history_list = run_5_fold(digits_x, digits_y, model, optimizer)

Plot the cost graph

In [None]:
fig = go.Figure()
for i in range(0, len(history_list)):
  fig.add_trace(go.Scatter(x=history_list[i]['iteration_no'], y=history_list[i]['cost'],
                    mode='lines',
                    name='run ' + str(i+1)))

# Add the title and axis titles
fig.update_layout(title='Cost at each iteration for Softmax Regression of the Digits Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Cost',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Plot the Validation Error graph

In [None]:
fig = go.Figure()
for i in range(0, len(history_list)):
  fig.add_trace(go.Scatter(x=history_list[i]['iteration_no'], y=history_list[i]['val_error'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Validation Error at each iteration for Softmax Regression of the Digits Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Validation Error',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
# calculate average accuracy
np.average(info_dict['val_accuracy_list'])


0.9588115134633242

Find the best hyper-parameters combination using simple grid search

In [None]:
# Grid Search 
# Cartesian product of batch_sizes, learnings_rates and momentums 
model = SoftmaxRegression(digits_classes)
grid_df = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations'])

for bs in batch_sizes:
  for lr in learning_rates:
    for momentum in momentums:

      #max iteration
      #max_iteration = 100
      #epsilon
      #epsilon = 1e-8

      optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con =20, l2_regularization = 0.1,record_history=True)
      # y_predictions, history_list = run_5_fold(digits_x, digits_y, model, optimizer)
      info_dict, history_list  = run_5_fold(digits_x, digits_y, model, optimizer)

  
      row_dict = get_CVT(info_dict)
      row_dict['batch_size'] = bs
      row_dict['learning_rate'] = lr
      row_dict['momentum_param'] = momentum
      # accuracy_list2 = np.array(info_dict['val_accuracy_list'])

      row_dict['val_accuracy'] = np.average(info_dict['val_accuracy_list'])
      row_dict['train_accuracy'] = np.average(info_dict['train_accuracy_list'])

      row_dict['val_accuracy_list'] = info_dict['val_accuracy_list']
      row_dict['train_accuracy_list'] = info_dict['train_accuracy_list']

      grid_df = grid_df.append(row_dict, ignore_index=True)


Plot the grid search graph

In [None]:
fig = px.scatter_3d(grid_df, x='batch_size', y='learning_rate', z='momentum_param',
              color='cost', opacity=0.7, log_x=True, title="Grid Search by cost",
              width=500, height=500)

fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

In [None]:
grid_df[grid_df.cost == grid_df.cost.min()]

Unnamed: 0,batch_size,learning_rate,momentum_param,cost,val_err,train_err,iterations,train_accuracy,train_accuracy_list,val_accuracy,val_accuracy_list
910,256,0.01,0.977273,40.74689,2.124386,3.133333,64.4,0.882171,"[0.9053583855254002, 0.9046624913013221, 0.899...",0.878674,"[0.8861111111111111, 0.8944444444444445, 0.894..."


In [None]:
fig = px.scatter_3d(grid_df, x='batch_size', y='learning_rate', z='momentum_param',
              color='val_err', opacity=0.7, log_x=True,title="Grid Search by Validation Error for the Digits Dataset")

fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

In [None]:
lowest_error = grid_df[grid_df.val_err == grid_df.val_err.min()]
lowest_error

Unnamed: 0,batch_size,learning_rate,momentum_param,cost,val_err,train_err,iterations,train_accuracy,train_accuracy_list,val_accuracy,val_accuracy_list
39,8,0.050714,0.888182,6163.050586,0.629918,0.67193,43.4,0.973428,"[0.97633959638135, 0.9735560194850382, 0.96731...",0.963829,"[0.9583333333333334, 0.9694444444444444, 0.961..."


In [None]:
fig = px.scatter_3d(grid_df, x='batch_size', y='learning_rate', z='momentum_param',
              color='val_accuracy', opacity=0.7, log_x=True,
              title="Grid Search by Validation Accuracy")

fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

In [None]:
highest_accuracy = grid_df[grid_df.val_accuracy == grid_df.val_accuracy.max()]
saved_accuracy_data1 = highest_accuracy
highest_accuracy

Unnamed: 0,batch_size,learning_rate,momentum_param,cost,val_err,train_err,iterations,train_accuracy,train_accuracy_list,val_accuracy,val_accuracy_list
24,8,0.037143,0.85,10348.966159,0.686137,0.428808,48.2,0.972316,"[0.97633959638135, 0.9742519137091162, 0.96453...",0.968841,"[0.9555555555555556, 0.975, 0.9805013927576601..."


Get the best hyper-parameters

In [None]:
best_bs = highest_accuracy['batch_size'].values[0]
best_lr = highest_accuracy['learning_rate'].values[0]
best_momentum = highest_accuracy['momentum_param'].values[0]

print(best_bs)
print(best_lr)
print(best_momentum)

8
0.037142857142857144
0.85


Test Run

In [None]:
model = SoftmaxRegression(digits_classes)
# We will choose the model with the highest accuracy
# Our 3 Hyper-Parameters:
# Batch size
bs = highest_accuracy['batch_size'].values[0]
# Learning Rate
lr = highest_accuracy['learning_rate'].values[0]
# momentum
momentum = highest_accuracy['momentum_param'].values[0]


optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
info_dict1, history_list1 = run_5_fold(digits_x, digits_y, model, optimizer)

Plot the cost graph

In [None]:
fig = go.Figure()
for i in range(0, len(history_list)):
  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['cost'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Cost at each iteration for Softmax Regression of the Digits Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Cost',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Plot the validation error graph

In [None]:
fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['val_error'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Validation Error at each iteration for Softmax Regression of the Digits Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Validation Error',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Confusion Matrix

In [None]:
y_true = np.concatenate((info_dict1['val_class_true']), axis=None)
y_predict = np.concatenate((info_dict1['val_class_predicted']), axis=None)

In [None]:
from sklearn.metrics import confusion_matrix
confusionM = confusion_matrix(y_true, y_predict)
# print(confusionM)

In [None]:
import plotly.figure_factory as ff

# z = [[.1, .3, .5],
#      [1.0, .8, .6],
#      [.6, .4, .2]]

x = [str(i) for i in range(0, confusionM.shape[0])]
y = [str(i) for i in range(0, confusionM.shape[1])]

fig = ff.create_annotated_heatmap(confusionM, x=x, y=y, colorscale='Viridis')
fig.update_layout(
    margin=dict(l=0, r=0, b=0, t=150),
    title="Confusion Matrix for the Classification of the Digits Dataset Using Softmax Regression",
    xaxis_title="Predicted Labels",
    yaxis_title="True Labels"
)


  
fig.show()

Variation of Momentum 

In [None]:
model = SoftmaxRegression(digits_classes)

grid_df2 = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations', 'val_accuracy', 'train_accuracy'])

bs = best_bs
lr = best_lr
for momentum in momentums:

  optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
  info_dict2, history_list2  = run_5_fold(digits_x, digits_y, model, optimizer)


  row_dict = get_CVT(info_dict2)
  row_dict['batch_size'] = bs
  row_dict['learning_rate'] = lr
  row_dict['momentum_param'] = momentum

  row_dict['val_accuracy'] = np.average(info_dict2['train_accuracy_list'])
  row_dict['train_accuracy'] = np.average(info_dict2['val_accuracy_list'])

  grid_df2 = grid_df2.append(row_dict, ignore_index=True)

Training Accuracy Graph

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['momentum_param'], y=grid_df2['train_accuracy'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Training Accuracy vs Momentum with Batch Size of ' ,
                   xaxis_title='Momentum',
                   yaxis_title='Training Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Validation Accuracy Graph

In [None]:
fig = go.Figure()


fig.add_trace(go.Scatter(x=grid_df2['momentum_param'], y=grid_df2['val_accuracy'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Validation Accuracy vs Momentum with Batch Size of ' + str(best_bs) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Momentum',
                   yaxis_title='Validation Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=grid_df2['momentum_param'], y=grid_df2['train_accuracy'],
                    mode='lines', name="Training Accuracy"))
fig.add_trace(go.Scatter(x=grid_df2['momentum_param'], y=grid_df2['val_accuracy'],
                    mode='lines', name="Validation Accuracy"))


# Add the title and axis titles
fig.update_layout(title='Accuracies vs Momentum' ,
                   xaxis_title='Momentum',
                   yaxis_title='Validation Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Cost Graph

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=grid_df2['momentum_param'], y=grid_df2['cost'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Cost vs Momentum with Batch Size of ' + str(best_bs) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Momentum',
                   yaxis_title='Cost',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Measure The Runtime of the Algorithm

In [None]:
import timeit
# times = timeit.Timer(run_5_fold(digits_x, digits_y, model, optimizer)).timeit(1);

In [None]:
model = SoftmaxRegression(digits_classes)

# runtime = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations', 'val_accuracy', 'train_accuracy'])
runtime_list = []

bs = best_bs
lr = best_lr
for momentum in momentums:
  
  optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
  # start the timer
  times = timeit.Timer(lambda: run_5_fold(digits_x, digits_y, model, optimizer)).timeit(1);
  runtime_list.append(times)

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=momentums, y=runtime_list,
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Runtime vs Momentum',
                   xaxis_title='Momentum',
                   yaxis_title='Runtime (in seconds)',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Combined those graphs

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=momentums, y=grid_df2['train_accuracy'], name="Training Accuracy"),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=momentums, y=grid_df2['val_accuracy'], name="Validation Accuracy"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=momentums, y=runtime_list, name="Runtime"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Accuracies and Runtime vs Momentum",
    title_x=0.5
)

# Set x-axis title
fig.update_xaxes(title_text="Momentum")

# Set y-axes titles
fig.update_yaxes(title_text="Accuracy", secondary_y=False)
fig.update_yaxes(title_text="Runtime (in seconds)", secondary_y=True)

fig.show()

Variation of Batch Size

In [None]:
model = SoftmaxRegression(digits_classes)

grid_df2 = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations', 'val_accuracy', 'train_accuracy'])

# bs = best_bs
lr = best_lr
momentum = best_momentum
for bs in batch_sizes:

  optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
  info_dict2, history_list2  = run_5_fold(digits_x, digits_y, model, optimizer)


  row_dict = get_CVT(info_dict2)
  row_dict['batch_size'] = bs
  row_dict['learning_rate'] = lr
  row_dict['momentum_param'] = momentum

  row_dict['val_accuracy'] = np.average(info_dict2['train_accuracy_list'])
  row_dict['train_accuracy'] = np.average(info_dict2['val_accuracy_list'])

  grid_df2 = grid_df2.append(row_dict, ignore_index=True)

Training Accuracy Graph

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['batch_size'], y=grid_df2['train_accuracy'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Training Accuracy vs Batch Size with Momentum Parameter of ' + str(momentum) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Batch Size',
                   yaxis_title='Training Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Validation Accuracy Graph

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['batch_size'], y=grid_df2['val_accuracy'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Validation Accuracy vs Batch Size with Momentum Parameter of ' + str(momentum) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Batch Size',
                   yaxis_title='Validation Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):
fig.add_trace(go.Scatter(x=grid_df2['batch_size'], y=grid_df2['train_accuracy'],
                    mode='lines',
                    name="Training Accuracy"))

fig.add_trace(go.Scatter(x=grid_df2['batch_size'], y=grid_df2['val_accuracy'],
                    mode='lines',
                    name="validation Accuracy"))


# Add the title and axis titles
fig.update_layout(title='Accuracies vs Batch Size',
                   xaxis_title='Batch Size',
                   yaxis_title='Validation Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Cost Graph


In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['batch_size'], y=grid_df2['cost'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Cost vs Batch Size with Momentum Parameter of ' + str(momentum) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Batch Size',
                   yaxis_title='Cost',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Measure the Runtime of the Algorithm vs batch_sizes

In [None]:
model = SoftmaxRegression(digits_classes)

# runtime = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations', 'val_accuracy', 'train_accuracy'])
runtime_list = []

momentum = best_momentum
lr = best_lr
for bs in batch_sizes:
  
  optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
  # start the timer
  times = timeit.Timer(lambda: run_5_fold(digits_x, digits_y, model, optimizer)).timeit(1);
  runtime_list.append(times)

In [None]:
print (runtime_list)

[0.5352209210000183, 0.4493936889998622, 0.39024341000003915, 0.3691168390000712, 0.31273521200000687, 0.28093342400006804]


In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=batch_sizes, y=runtime_list,
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Runtime vs Batch Size',
                   xaxis_title='Batch Size',
                   yaxis_title='Runtime (in seconds)',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Combined those graphs

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=batch_sizes, y=grid_df2['train_accuracy'], name="Training Accuracy"),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=batch_sizes, y=grid_df2['val_accuracy'], name="Validation Accuracy"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=batch_sizes, y=runtime_list, name="Runtime"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Accuracies and Runtime vs Batch Size",
    title_x=0.5
)

# Set x-axis title
fig.update_xaxes(title_text="Batch Size")

# Set y-axes titles
fig.update_yaxes(title_text="Accuracy", secondary_y=False)
fig.update_yaxes(title_text="Runtime (in seconds)", secondary_y=True)

fig.show()

Variation of Learning Rate

In [None]:
model = SoftmaxRegression(digits_classes)

grid_df2 = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations', 'val_accuracy', 'train_accuracy'])

bs = best_bs
momentum = best_momentum
for lr in learning_rates:

  optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
  info_dict2, history_list2  = run_5_fold(digits_x, digits_y, model, optimizer)


  row_dict = get_CVT(info_dict2)
  row_dict['batch_size'] = bs
  row_dict['learning_rate'] = lr
  row_dict['momentum_param'] = momentum

  row_dict['val_accuracy'] = np.average(info_dict2['train_accuracy_list'])
  row_dict['train_accuracy'] = np.average(info_dict2['val_accuracy_list'])

  grid_df2 = grid_df2.append(row_dict, ignore_index=True)

Training Accuracy Graph

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['learning_rate'], y=grid_df2['train_accuracy'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Training Accuracy vs Learning Rate with Batch Size of ' + str(best_bs) + ' and Momentum Parameter of ' + str(momentum),
                   xaxis_title='Learning Rate',
                   yaxis_title='Training Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Validation Accuracy Graph

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['learning_rate'], y=grid_df2['train_accuracy'],
                    mode='lines',
                    name="Training Accuracy"))

fig.add_trace(go.Scatter(x=grid_df2['learning_rate'], y=grid_df2['val_accuracy'],
                    mode='lines',
                    name="Validation Accuracy"))


# Add the title and axis titles
fig.update_layout(title='Validation Accuracy vs Learning Rate',
                   xaxis_title='Learning Rate',
                   yaxis_title='Training Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['learning_rate'], y=grid_df2['val_accuracy'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Accuracies vs Learning Rate with Batch Size of ' + str(best_bs) + ' and Momentum Parameter of ' + str(momentum),
                   xaxis_title='Learning Rate',
                   yaxis_title='Training Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Cost Graph

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['learning_rate'], y=grid_df2['cost'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Cost vs Learning Rate with Batch Size of ' + str(best_bs) + ' and Momentum Parameter of ' + str(momentum),
                   xaxis_title='Learning Rate',
                   yaxis_title='Training Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Measure the Runtime of the Algorithm vs batch_sizes

In [None]:
model = SoftmaxRegression(digits_classes)

# runtime = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations', 'val_accuracy', 'train_accuracy'])
runtime_list = []

bs = best_bs
momentum = best_momentum
for lr in learning_rates:

  optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
  # start the timer
  t = timeit.Timer(lambda: run_5_fold(digits_x, digits_y, model, optimizer))
  runtime_list.append(t.timeit(1))

In [None]:
print (runtime_list)

[0.6861076470004264, 0.7803795830004674, 0.7044862220000141, 0.7038769740001953, 0.5979141540001365, 0.5459299459998874, 0.5255852719992617, 0.47090466800000286, 0.45789466699989134, 0.4538694080001733, 0.43734076200053096, 0.4402171609999641]


In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=momentums, y=runtime_list,
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Runtime vs Learning Rate with Batch Size of ' + str(best_bs) + ' and Momentum Parameter of ' + str(momentum),
                   xaxis_title='Learning Rate',
                   yaxis_title='Runtime (in seconds)',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Combined those graphs

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=learning_rates, y=grid_df2['train_accuracy'], name="Training Accuracy"),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=learning_rates, y=grid_df2['val_accuracy'], name="Validation Accuracy"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=learning_rates, y=runtime_list, name="Runtime"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Accuracies and Runtime vs Learning Rate",
    title_x=0.5
)

# Set x-axis title
fig.update_xaxes(title_text="Learning Rate")

# Set y-axes titles
fig.update_yaxes(title_text="Accuracy", secondary_y=False)
fig.update_yaxes(title_text="Runtime (in seconds)", secondary_y=True)

fig.show()

In [None]:
model = SoftmaxRegression(digits_classes)

# We will choose the model with the highest accuracy
# Our 3 Hyper-Parameters:
# Batch size
bs = 8
# Learning Rate
lr = 0.0507
# momentum
momentum = 0.901

optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
info_dict1, history_list1 = run_5_fold(digits_x, digits_y, model, optimizer)

row_dict = get_CVT(info_dict1)
row_dict['batch_size'] = bs
row_dict['learning_rate'] = lr
row_dict['momentum_param'] = momentum

row_dict['val_accuracy'] = np.average(info_dict1['train_accuracy_list'])
row_dict['train_accuracy'] = np.average(info_dict1['val_accuracy_list'])


fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['cost'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Cost at each iteration for Softmax Regression of the Digits Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Cost',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['val_error'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Validation Error at each iteration for Softmax Regression of the Digits Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Validation Error',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()


print(row_dict['val_accuracy'])
print(row_dict['train_accuracy'])

0.970367585072827
0.9643887341380377


In [None]:
model = SoftmaxRegression(digits_classes)

# We will choose the model with the highest accuracy
# Our 3 Hyper-Parameters:
# Batch size
bs = 8
# Learning Rate
lr = 0.0472
# momentum
momentum = 0.8882

optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
info_dict1, history_list1 = run_5_fold(digits_x, digits_y, model, optimizer)

row_dict = get_CVT(info_dict1)
row_dict['batch_size'] = bs
row_dict['learning_rate'] = lr
row_dict['momentum_param'] = momentum

row_dict['val_accuracy'] = np.average(info_dict1['train_accuracy_list'])
row_dict['train_accuracy'] = np.average(info_dict1['val_accuracy_list'])


fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['cost'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Cost at each iteration for Softmax Regression of the Digits Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Cost',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['val_error'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Validation Error at each iteration for Softmax Regression of the Digits Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Validation Error',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()


print(row_dict['val_accuracy'])
print(row_dict['train_accuracy'])

0.9705057960536314
0.9632698854843701


In [None]:
model = SoftmaxRegression(digits_classes)

# We will choose the model with the highest accuracy
# Our 3 Hyper-Parameters:
# Batch size
bs = 8
# Learning Rate
lr = 0.01818
# momentum
momentum = 0.9009

optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
info_dict1, history_list1 = run_5_fold(digits_x, digits_y, model, optimizer)

row_dict = get_CVT(info_dict1)
row_dict['batch_size'] = bs
row_dict['learning_rate'] = lr
row_dict['momentum_param'] = momentum

row_dict['val_accuracy'] = np.average(info_dict1['train_accuracy_list'])
row_dict['train_accuracy'] = np.average(info_dict1['val_accuracy_list'])


fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['cost'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Cost at each iteration for Softmax Regression of the Digits Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Cost',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['val_error'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Validation Error at each iteration for Softmax Regression of the Digits Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Validation Error',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()


print(row_dict['val_accuracy'])
print(row_dict['train_accuracy'])

0.9688371017118611
0.9593763540699474


Analysis of the Iris Dataset

Test Run

In [None]:
# Our 3 Hyper-Parameters:
# Batch size
bs = 8
# Learning Rate
lr = 0.01
# momentum
momentum = 0.9

model = SoftmaxRegression(3)

optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1, record_history=True)
info_dict, history_list = run_5_fold(iris_x, iris_y, model, optimizer)

In [None]:
fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list[i]['iteration_no'], y=history_list[i]['cost'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Cost at each iteration for Softmax Regression of the Iris Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Cost',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list[i]['iteration_no'], y=history_list[i]['val_error'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Validation Error at each iteration for Softmax Regression of the Iris Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Validation Error',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Grid search to find the best hyper-parameters combination for Iris dataset

In [None]:
# Grid Search 
# Cartesian product of batch_sizes, learnings_rates and momentums 

model = SoftmaxRegression(3)

grid_df = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations'])

for bs in batch_sizes:
  for lr in learning_rates:
    for momentum in momentums:

      #max iteration
      #max_iteration = 100
      #epsilon
      #epsilon = 1e-8

      optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con =20, l2_regularization = 0.1,record_history=True)
      # y_predictions, history_list = run_5_fold(digits_x, digits_y, model, optimizer)
      info_dict, history_list  = run_5_fold(iris_x, iris_y, model, optimizer)

  
      row_dict = get_CVT(info_dict)
      row_dict['batch_size'] = bs
      row_dict['learning_rate'] = lr
      row_dict['momentum_param'] = momentum
      # accuracy_list2 = np.array(info_dict['val_accuracy_list'])

      row_dict['val_accuracy'] = np.average(info_dict['val_accuracy_list'])
      row_dict['train_accuracy'] = np.average(info_dict['train_accuracy_list'])


      row_dict['val_accuracy_list'] = info_dict['val_accuracy_list']
      row_dict['train_accuracy_list'] = info_dict['train_accuracy_list']

      grid_df = grid_df.append(row_dict, ignore_index=True)


Plot simple grid search graph

In [None]:
fig = px.scatter_3d(grid_df, x='batch_size', y='learning_rate', z='momentum_param',
              color='cost', opacity=0.7, log_x=True, title="Grid Search by cost",
              width=500, height=500)

fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

In [None]:
fig = px.scatter_3d(grid_df, x='batch_size', y='learning_rate', z='momentum_param',
              color='val_err', opacity=0.7, log_x=True,title="Grid Search by Validation Error for the Digits Dataset")

fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

In [None]:
fig = px.scatter_3d(grid_df, x='batch_size', y='learning_rate', z='momentum_param',
              color='val_accuracy', opacity=0.7, log_x=True,
              title="Grid Search by Validation Accuracy")

fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

In [None]:
highest_accuracy = grid_df[grid_df.val_accuracy == grid_df.val_accuracy.max()]

saved_accuracy_data2 = highest_accuracy;
highest_accuracy

Unnamed: 0,batch_size,learning_rate,momentum_param,cost,val_err,train_err,iterations,train_accuracy,train_accuracy_list,val_accuracy,val_accuracy_list
329,16,0.172857,0.913636,44.059171,0.006667,0.053571,35.0,0.943333,"[0.9666666666666667, 0.9166666666666666, 0.9, ...",0.993333,"[1.0, 1.0, 1.0, 1.0, 0.9666666666666667]"


Get the best hyper-parameters

In [None]:
best_bs = highest_accuracy['batch_size'].values[0]
best_lr = highest_accuracy['learning_rate'].values[0]
best_momentum = highest_accuracy['momentum_param'].values[0]

print(best_bs)
print(best_lr)
print(best_momentum)

8
0.05090909090909091
0.9136363636363636


Test Run

In [None]:
model = SoftmaxRegression(digits_classes)

# We will choose the model with the highest accuracy
# Our 3 Hyper-Parameters:
# Batch size
# bs = highest_accuracy['batch_size'].values[0]
bs = 16
# Learning Rate
# lr = highest_accuracy['learning_rate'].values[0]
lr = 0.0883
# momentum
# momentum = highest_accuracy['momentum_param'].values[0]
momentum = 0.8881

optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
info_dict1, history_list1 = run_5_fold(digits_x, digits_y, model, optimizer)

In [None]:
fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['cost'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Cost at each iteration for Softmax Regression of the Iris Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Cost',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['train_error'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Validation Error at each iteration for Softmax Regression of the Iris Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Validation Error',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['val_error'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Validation Error at each iteration for Softmax Regression of the Iris Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Validation Error',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Confusion Matrix

In [None]:

y_true = np.concatenate((info_dict1['val_class_true']), axis=None)

y_predict = np.concatenate((info_dict1['val_class_predicted']), axis=None)

In [None]:
from sklearn.metrics import confusion_matrix
confusionM = confusion_matrix(y_true, y_predict)
# print(confusionM)

In [None]:
import plotly.figure_factory as ff


x = [str(i) for i in range(0, 3)]
y = [str(i) for i in range(0, 3)]

fig = ff.create_annotated_heatmap(confusionM, x=x, y=y, colorscale='Viridis')
fig.update_layout(
    margin=dict(l=0, r=0, b=0, t=150),
    title="Confusion Matrix for the Classification of the Iris Dataset Using Softmax Regression",
    xaxis_title="Predicted Labels",
    yaxis_title="True Labels"
)


  
fig.show()

PlotlyError: ignored

Variation of Momentum

In [None]:
model = SoftmaxRegression(3)

grid_df2 = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations', 'val_accuracy', 'train_accuracy'])

bs = best_bs
lr = best_lr
for momentum in momentums:

  optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
  info_dict2, history_list2  = run_5_fold(iris_x, iris_y, model, optimizer)


  row_dict = get_CVT(info_dict2)
  row_dict['batch_size'] = bs
  row_dict['learning_rate'] = lr
  row_dict['momentum_param'] = momentum

  row_dict['val_accuracy'] = np.average(info_dict2['train_accuracy_list'])
  row_dict['train_accuracy'] = np.average(info_dict2['val_accuracy_list'])

  grid_df2 = grid_df2.append(row_dict, ignore_index=True)

Training Accuracy Graph

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['momentum_param'], y=grid_df2['train_accuracy'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Training Accuracy vs Momentum with Batch Size of ' + str(best_bs) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Momentum',
                   yaxis_title='Training Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Validation Accuracy Graph

In [None]:
fig = go.Figure()


fig.add_trace(go.Scatter(x=grid_df2['momentum_param'], y=grid_df2['val_accuracy'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Validation Accuracy vs Momentum with Batch Size of ' + str(best_bs) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Momentum',
                   yaxis_title='Validation Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=grid_df2['momentum_param'], y=grid_df2['train_accuracy'],
                    mode='lines',
                    name="Training Accuracy"))

fig.add_trace(go.Scatter(x=grid_df2['momentum_param'], y=grid_df2['val_accuracy'],
                    mode='lines',
                    name="Validation Accuracy"))


# Add the title and axis titles
fig.update_layout(title='Validation Accuracy vs Momentum with Batch Size of ' + str(best_bs) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Momentum',
                   yaxis_title='Validation Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Cost Graph

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=grid_df2['momentum_param'], y=grid_df2['cost'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Cost vs Momentum with Batch Size of ' + str(best_bs) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Momentum',
                   yaxis_title='Cost',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Measure the Runtime of the Algorithm as a Function of momentum

In [None]:
model = SoftmaxRegression(len(iris_label_map))

# runtime = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations', 'val_accuracy', 'train_accuracy'])
runtime_list = []

bs = best_bs
lr = best_lr
for momentum in momentums:
  
  optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
  # start the timer
  times = timeit.Timer(lambda: run_5_fold(iris_x, iris_y, model, optimizer)).timeit(1);
  runtime_list.append(times)

In [None]:
print (runtime_list)

[0.11333281699990039, 0.14009372700002132, 0.11652521999985765, 0.10441480000008596, 0.11637977700002011, 0.13236733099984122, 0.14017445999979827, 0.13282978200004436, 0.10529661100008525, 0.15005657999972755, 0.12411780000002182, 0.07072798399985913]


In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=momentums, y=runtime_list,
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Runtime vs Momentum with Batch Size of ' + str(best_bs) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Momentum',
                   yaxis_title='Runtime (in seconds)',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=momentums, y=grid_df2['train_accuracy'], name="Training Accuracy"),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=momentums, y=grid_df2['val_accuracy'], name="Validation Accuracy"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=momentums, y=runtime_list, name="Runtime"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Accuracies and Runtime vs Momentum",
    title_x=0.5
)

# Set x-axis title
fig.update_xaxes(title_text="Momentum")

# Set y-axes titles
fig.update_yaxes(title_text="Accuracy", secondary_y=False)
fig.update_yaxes(title_text="Runtime (in seconds)", secondary_y=True)

fig.show()

Variation of Batch Size

In [None]:
model = SoftmaxRegression(len(iris_label_map))

grid_df2 = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations', 'val_accuracy', 'train_accuracy'])

# bs = best_bs
lr = best_lr
momentum = best_momentum
for bs in batch_sizes:

  optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
  info_dict2, history_list2  = run_5_fold(iris_x, iris_y, model, optimizer)


  row_dict = get_CVT(info_dict2)
  row_dict['batch_size'] = bs
  row_dict['learning_rate'] = lr
  row_dict['momentum_param'] = momentum

  row_dict['val_accuracy'] = np.average(info_dict2['train_accuracy_list'])
  row_dict['train_accuracy'] = np.average(info_dict2['val_accuracy_list'])

  grid_df2 = grid_df2.append(row_dict, ignore_index=True)

Training Accuracy Graph

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['batch_size'], y=grid_df2['train_accuracy'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Training Accuracy vs Batch Size with Momentum Parameter of ' + str(momentum) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Batch Size',
                   yaxis_title='Training Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Validation Accuracy Graph

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['batch_size'], y=grid_df2['val_accuracy'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Validation Accuracy vs Batch Size with Momentum Parameter of ' + str(momentum) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Batch Size',
                   yaxis_title='Validation Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):
fig.add_trace(go.Scatter(x=grid_df2['batch_size'], y=grid_df2['train_accuracy'],
                    mode='lines',
                    name="Training Accuracy"))
fig.add_trace(go.Scatter(x=grid_df2['batch_size'], y=grid_df2['val_accuracy'],
                    mode='lines',
                    name="Validation Accuracy"))


# Add the title and axis titles
fig.update_layout(title='Validation Accuracy vs Batch Size with Momentum Parameter of ' + str(momentum) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Batch Size',
                   yaxis_title='Validation Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Cost Graph

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['batch_size'], y=grid_df2['cost'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Cost vs Batch Size with Momentum Parameter of ' + str(momentum) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Batch Size',
                   yaxis_title='Cost',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Measure the Runtime of the Algorithm vs Batch Size

In [None]:
model = SoftmaxRegression(digits_classes)

# runtime = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations', 'val_accuracy', 'train_accuracy'])
runtime_list = []

momentum = best_momentum
lr = best_lr
for bs in batch_sizes:
  
  optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
  # start the timer
  times = timeit.Timer(lambda: run_5_fold(digits_x, digits_y, model, optimizer)).timeit(1);
  runtime_list.append(times)

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=batch_sizes, y=runtime_list,
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Runtime vs Batch Size with Momentum Parameter of ' + str(momentum) + ' and Learning Rate of ' + str(best_lr),
                   xaxis_title='Batch Size',
                   yaxis_title='Runtime (in seconds)',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=batch_sizes, y=grid_df2['train_accuracy'], name="Training Accuracy"),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=batch_sizes, y=grid_df2['val_accuracy'], name="Validation Accuracy"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=batch_sizes, y=runtime_list, name="Runtime"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Accuracies and Runtime vs Batch Size",
    title_x=0.5
)

# Set x-axis title
fig.update_xaxes(title_text="Batch Size")

# Set y-axes titles
fig.update_yaxes(title_text="Accuracy", secondary_y=False)
fig.update_yaxes(title_text="Runtime (in seconds)", secondary_y=True)

fig.show()

Variation of Learning Rate

In [None]:
model = SoftmaxRegression(len(iris_label_map))

grid_df2 = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations', 'val_accuracy', 'train_accuracy'])

bs = best_bs
momentum = best_momentum
for lr in learning_rates:

  optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
  info_dict2, history_list2  = run_5_fold(iris_x, iris_y, model, optimizer)


  row_dict = get_CVT(info_dict2)
  row_dict['batch_size'] = bs
  row_dict['learning_rate'] = lr
  row_dict['momentum_param'] = momentum

  row_dict['val_accuracy'] = np.average(info_dict2['train_accuracy_list'])
  row_dict['train_accuracy'] = np.average(info_dict2['val_accuracy_list'])

  grid_df2 = grid_df2.append(row_dict, ignore_index=True)

Training Accuracy

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['learning_rate'], y=grid_df2['train_accuracy'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Training Accuracy vs Learning Rate with Batch Size of ' + str(best_bs) + ' and Momentum Parameter of ' + str(momentum),
                   xaxis_title='Learning Rate',
                   yaxis_title='Training Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Validation Accuracy Graph

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['learning_rate'], y=grid_df2['val_accuracy'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Validation Accuracy vs Learning Rate with Batch Size of ' + str(best_bs) + ' and Momentum Parameter of ' + str(momentum),
                   xaxis_title='Learning Rate',
                   yaxis_title='Training Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):
fig.add_trace(go.Scatter(x=grid_df2['learning_rate'], y=grid_df2['train_accuracy'],
                    mode='lines',
                    name="Training Accuracy"))
fig.add_trace(go.Scatter(x=grid_df2['learning_rate'], y=grid_df2['val_accuracy'],
                    mode='lines',
                    name="Validation Accuracy"))


# Add the title and axis titles
fig.update_layout(title='Validation Accuracy vs Learning Rate with Batch Size of ' + str(best_bs) + ' and Momentum Parameter of ' + str(momentum),
                   xaxis_title='Learning Rate',
                   yaxis_title='Training Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Cost Graph

In [None]:
fig = go.Figure()

# for i in range(0, len(history_list)):

fig.add_trace(go.Scatter(x=grid_df2['learning_rate'], y=grid_df2['cost'],
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Cost vs Learning Rate with Batch Size of ' + str(best_bs) + ' and Momentum Parameter of ' + str(momentum),
                   xaxis_title='Learning Rate',
                   yaxis_title='Training Accuracy',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

Measure the Runtime of the Algorithm vs Batch Size

In [None]:
model = SoftmaxRegression(len(iris_label_map))

# runtime = pd.DataFrame(columns=['batch_size', 'learning_rate', 'momentum_param', 'cost', 'val_err', 'train_err', 'iterations', 'val_accuracy', 'train_accuracy'])
runtime_list = []

bs = best_bs
momentum = best_momentum
for lr in learning_rates:

  optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
  # start the timer
  t = timeit.Timer(lambda: run_5_fold(iris_x, iris_y, model, optimizer))
  runtime_list.append(t.timeit(1))

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=learning_rates, y=runtime_list,
                    mode='lines'))


# Add the title and axis titles
fig.update_layout(title='Runtime vs Learning Rate with Batch Size of ' + str(best_bs) + ' and Momentum Parameter of ' + str(momentum),
                   xaxis_title='Learning Rate',
                   yaxis_title='Runtime (in seconds)',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=learning_rates, y=grid_df2['train_accuracy'], name="Training Accuracy"),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=learning_rates, y=grid_df2['val_accuracy'], name="Validation Accuracy"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=learning_rates, y=runtime_list, name="Runtime"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Accuracies and Runtime vs Learning Rate",
    title_x=0.5
)

# Set x-axis title
fig.update_xaxes(title_text="Learning Rate")

# Set y-axes titles
fig.update_yaxes(title_text="Accuracy", secondary_y=False)
fig.update_yaxes(title_text="Runtime (in seconds)", secondary_y=True)

fig.show()

In [None]:
model = SoftmaxRegression(digits_classes)

# We will choose the model with the highest accuracy
# Our 3 Hyper-Parameters:
# Batch size
bs = 8
# Learning Rate
lr = 0.0509
# momentum
momentum = 0.8881

optimizer = GradientDescent(learning_rate=lr, momentum_para=momentum, batch_size=bs, termination_con=20, l2_regularization = 0.1,record_history=True)
info_dict1, history_list1 = run_5_fold(digits_x, digits_y, model, optimizer)

row_dict = get_CVT(info_dict1)
row_dict['batch_size'] = bs
row_dict['learning_rate'] = lr
row_dict['momentum_param'] = momentum

row_dict['val_accuracy'] = np.average(info_dict1['train_accuracy_list'])
row_dict['train_accuracy'] = np.average(info_dict1['val_accuracy_list'])


fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['cost'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Cost at each iteration for Softmax Regression of the Digits Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Cost',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()

fig = go.Figure()

for i in range(0, len(history_list)):

  fig.add_trace(go.Scatter(x=history_list1[i]['iteration_no'], y=history_list1[i]['val_error'],
                    mode='lines',
                    name='run ' + str(i+1)))


# Add the title and axis titles
fig.update_layout(title='Validation Error at each iteration for Softmax Regression of the Digits Dataset',
                   xaxis_title='Number of Iterations',
                   yaxis_title='Validation Error',
                  width=800, height=300, margin=dict(l=50,r=50,b=30,t=30,pad=2))

fig.show()


print(row_dict['val_accuracy'])
print(row_dict['train_accuracy'])

0.9716192268121558
0.9610507582791705


Comparison Against KNN Classifier for Both Iris and Digits Datasets

Implementing functions

In [None]:
import plotly.graph_objects as go
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

def KNN_5(x,y,a):
  #training_acc = 0
  #val_acc = 0
  val_err = []
  knn = KNeighborsClassifier(n_neighbors=a)
  for (x_train, y_train), (x_val, y_val) in kFold(x,y,5):
    knn.fit(x_train,y_train)
    y_pred=knn.predict(x_val)
    #training_acc += knn.score(x_train,y_train)
    #val_acc += knn.score(x_val, y_val)
    err = mean_squared_error(knn.predict(x_val), y_val)
    val_err.append(err)    

  avg_err = np.average(val_err) 
  #training_acc=(training_acc/5)
  #val_acc=(val_acc/5)
  return avg_err

def find_hyperparameter(x,y, n):
  #tra_acc=[]
  #val_acc=[]
  avg_err = []
  for a in range(1,n):
    err = KNN_5(x, y,a)
    #tra_acc.append(training_accuracy)
    #val_acc.append(val_accuracy)
    avg_err.append(err)
  #return min val err and number of k
  return np.min(avg_err), np.argwhere(avg_err==np.min(avg_err))[0,0]+1, avg_err



Find the best hyper-parameter for Digits dataset

In [None]:
min_val_err, k1, avg_err = find_hyperparameter(digits_x,digits_y,20)
print(avg_err)
print("k = ", k1, "has the lowerst validation err: ", min_val_err)

In [None]:
def plot_KNN_avg_err(data, title):
  fig = go.Figure(data=go.Scatter(x=np.arange(1,len(data)+1),y=data,mode='lines'))
  fig.update_layout(title=title, xaxis_title="k neighbours", yaxis_title="average validation err", autosize=False, width=500, height=250, margin=dict(l=50,r=30,b=30,t=30,pad=5))
  fig.update_xaxes(tick0=1,dtick=1)
  fig.show()

Plot the graph of validation error of KNN with different values of k(Digits Dataset)

In [None]:
plot_KNN_avg_err(avg_err, "KNN Average Validation Error for Digits dataset")

Find the best hyperparamter for Iris dataset

In [None]:
min_val_err2, k2, avg_err2 = find_hyperparameter(iris_x,iris_y,20)
print("k = ", k2, "has the lowerst validation err: ", min_val_err2)

k =  11 has the lowerst validation err:  0.026666666666666665


Plot the graph of validation error of KNN with different values of k(Iris Dataset)

In [None]:

plot_KNN_avg_err(avg_err2, "KNN Average Validation Error for Iris Dataset")

5-fold cross validation for accuarcy

In [None]:
def KNN_5_accuracy(x,y,n):
  train_acc = []
  val_acc = []  
  knn = KNeighborsClassifier(n_neighbors=n)
  for (x_train, y_train), (x_val, y_val) in kFold(x,y,5):
    knn.fit(x_train,y_train)
    y_pred=knn.predict(x_val)
    tr = knn.score(x_train,y_train)
    val = knn.score(x_val, y_val)
    train_acc.append(tr)
    val_acc.append(val)  

  avg_train_acc = np.average(train_acc)
  avg_val_acc = np.average(val_acc)
 
  return train_acc, val_acc, avg_train_acc, avg_val_acc


In [None]:
def plot_KNN_accuracy(data, title):
  fig = go.Figure(data=go.Scatter(x=np.arange(1,len(data)+1),y=data,mode='lines'))
  fig.update_layout(title=title, xaxis_title="k neighbours", yaxis_title="average validation err", autosize=False, width=500, height=250, margin=dict(l=10,r=30,b=30,t=30,pad=5))
  fig.update_xaxes(tick0=1,dtick=1)
  fig.show()

Average trainning/validation accuracy for the best hyperparameter of KNN Classification(Digits Dataset)

In [None]:
train_acc1, val_acc1, avg_train_acc1, avg_val_acc1 = KNN_5_accuracy(digits_x,digits_y,k1)
print(val_acc1)
print(train_acc1)
print("average trainning accuracy for k=", k1, "is: ",avg_train_acc1)
print("average validation accuracy for k=", k1, "is: ",avg_val_acc1)


[0.9888888888888889, 0.9833333333333333, 0.9749303621169917, 0.9916434540389972, 0.9944289693593314]
[0.9909533750869868, 0.9923451635351427, 0.9930458970792768, 0.9895688456189151, 0.9881780250347705]
average trainning accuracy for k= 5 is:  0.9908182612710185
average validation accuracy for k= 5 is:  0.9866450015475084


In [None]:
plot_KNN_accuracy(train_acc1, "trainning accuracy for 5-fold cross validation for Digits ")
plot_KNN_accuracy(val_acc1, "validation accuracy for 5-fold cross validation for Digits")

Average trainning/validation accuracy for the best hyperparameter of KNN Classification(Iris Dataset)

In [None]:
train_acc2, val_acc2, avg_train_acc2, avg_val_acc2 = KNN_5_accuracy(iris_x,iris_y,k2)
print(train_acc2)
print(val_acc2)
print("average trainning accuracy for k=", k2, "is: ",avg_train_acc2)
print("average validation accuracy for k=", k2, "is: ",avg_val_acc2)

[0.9916666666666667, 0.9833333333333333, 0.9583333333333334, 0.9583333333333334, 0.975]
[0.9333333333333333, 0.9666666666666667, 0.9666666666666667, 0.9666666666666667, 1.0]
average trainning accuracy for k= 11 is:  0.9733333333333334
average validation accuracy for k= 11 is:  0.9666666666666668


In [None]:
plot_KNN_accuracy(train_acc2, "trainning accuracy for 5-fold cross validation")
plot_KNN_accuracy(val_acc2, "validation accuracy for 5-fold cross validation")

Comparing by confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix
import plotly.figure_factory as ff


knn = KNeighborsClassifier(n_neighbors = k1) 
knn.fit(digits_x, digits_y)
y_pred=knn.predict(digits_x)
confusionM = confusion_matrix(digits_y, y_pred)
z = [[.1, .3, .5],
     [1.0, .8, .6],
     [.6, .4, .2]]

x = [str(i) for i in range(0, 10)]
y = [str(i) for i in range(0, 10)]

fig = ff.create_annotated_heatmap(confusionM, x=x, y=y, colorscale='Viridis')
fig.update_layout(
    margin=dict(l=10, r=10, b=10, t=150),
    title="Confusion Matrix for the Classification of the Digits Dataset Using KNN", title_x=0.5,
    xaxis_title="Predicted Labels",
    yaxis_title="True Labels"
)


  
fig.show()


In [None]:
knn = KNeighborsClassifier(n_neighbors = k2) 
knn.fit(iris_x, iris_y)
y_pred=knn.predict(iris_x)
confusionM2 = confusion_matrix(iris_y, y_pred)

x = [str(i) for i in range(0, 3)]
y = [str(i) for i in range(0, 3)]

fig = ff.create_annotated_heatmap(confusionM2, x=x, y=y, colorscale='Viridis')
fig.update_layout(
    margin=dict(l=10, r=10, b=0, t=150),
    title="Confusion Matrix for the Classification of the Iris Dataset Using KNN", title_x=0.5,
    xaxis_title="Predicted Labels",
    yaxis_title="True Labels"
)

Bar Chart for the Training and Validation Accuracies of Both models

Accuracy vs Model: Iris datase

In [None]:
import numpy as np
import matplotlib.pyplot as plt

data = [[7.02, 7.3 ],
[9.33,6.7]]

X = np.arange(2)
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.bar(X + 0.00, data[0], color = 'b', width = 0.35, bottom=90)
ax.bar(X + 0.35, data[1], color = 'g', width = 0.35, bottom=90)
ax.set_title('Accuracy vs Model: Iris dataset')
ax.legend(labels=['Trainning Accuracy', 'Validation Accuracy'])
ax.set_ylabel('Accuracy')
ax.set_xticklabels(('', '','','Softmax regressor ','','','','KNN classifier','',''))

Accuracy vs Model: Digits dataset

In [None]:
data = [[7.02, 9.1 ],
[6.77,8.7]]

X = np.arange(2)
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.bar(X + 0.00, data[0], color = 'b', width = 0.35, bottom=90)
ax.bar(X + 0.35, data[1], color = 'g', width = 0.35, bottom=90)
ax.set_title('Accuracy vs Model: Digits dataset')
ax.legend(labels=['Trainning Accuracy', 'Validation Accuracy'])
ax.set_ylabel('Accuracy')
ax.set_xticklabels(('', '','','Softmax regressor ','','','','KNN classifier','',''))