import libraries

In [1]:
import numpy as np
import math
import random
import matplotlib.pyplot as plt
import pandas as pd;
from scipy.stats import zscore
import torch as torch
import torch.nn as nn

Excercise #3: 

Batching Data

In [2]:
fulldataset = pd.read_csv('./iris.csv')

np_full_input=fulldataset[list(fulldataset.columns)[0:-1]].apply(zscore).to_numpy();
np_full_output = pd.get_dummies(fulldataset['variety']).to_numpy();


n_classes = 3;
n_features = np_full_input.shape[1];

x_train=np_full_input[0:99,:]
y_train=np_full_output[0:99,:]
x_train.shape

#Partition off 1/3rd of the dataset for testing
x_test = np_full_input[100:149,:]
y_test = np_full_input[100:149,:]

Now to create our tensor of training and testing variables.

In [3]:
t_x_train=torch.tensor(x_train,requires_grad=False,dtype=torch.float64,device='cpu');
t_y_train=torch.tensor(y_train,requires_grad=False,dtype=torch.float64,device='cpu');
t_x_test=torch.tensor(x_train,requires_grad=False,dtype=torch.float64,device='cpu');
t_y_test=torch.tensor(y_train,requires_grad=False,dtype=torch.float64,device='cpu');

Lets divide this training data into 5 batches of size 20 each.

In [4]:
batch_size = 50
num_batches = math.ceil(t_x_test.size()[0]/batch_size)
x_test_batches = [t_x_test[batch_size*j:batch_size*(j+1),:] for j in range(num_batches)]
y_test_batches = [t_y_test[batch_size*j:batch_size*(j+1),:] for j in range(num_batches)]

Initialize variables for Gradient Descent:

In [5]:
init_std_dev = 0.01;
initialW=init_std_dev*np.random.randn(n_features,n_classes)

Creating variables for weights:

In [6]:
W = torch.tensor(initialW,requires_grad=True,device='cpu');
b = torch.zeros((1,n_classes),requires_grad=True,device='cpu');

Here lets define our CrossEntropyLoss and Softmax functions.

In [7]:
lossModel = nn.CrossEntropyLoss()
sm = nn.Softmax(dim=0)

Lets run a loop!

In [17]:
rate = 0.00005
batch_sizes = [5,10,25,50]
for batch_size in batch_sizes:
    W = torch.tensor(initialW,requires_grad=True,device='cpu');
    b = torch.zeros((1,n_classes),requires_grad=True,device='cpu');
    num_batches = math.ceil(t_x_test.size()[0]/batch_size)
    x_test_batches = [t_x_test[batch_size*j:batch_size*(j+1),:] for j in range(num_batches)]
    y_test_batches = [t_y_test[batch_size*j:batch_size*(j+1),:] for j in range(num_batches)]
    batch_x = x_test_batches[0]
    batch_y = y_test_batches[0]
    optimizer = torch.optim.Adam([W,b],lr=rate)
    iteration_limit = 100000; #Desired maximum iterations
    tol = 0.95 # Desired Error
    i = 0
    accuracy = 0
    while accuracy < tol and i < iteration_limit:
        # clear previous gradient calculations
        optimizer.zero_grad();
        # Load one of n batches
        batch_number = random.randint(0,num_batches-1)
        batch_x = x_test_batches[batch_number]
        batch_y = y_test_batches[batch_number]
        # calculate model predictions
        linear_predictions = torch.matmul(batch_x,W)+b
        activations = 1.0 / (1.0 + torch.exp(-linear_predictions));
        norm_predictions = sm(linear_predictions)
        #calculate loss
        loss = lossModel(linear_predictions, batch_y)
        risk = torch.mean(loss)
        #calculate gradients of risk w.r.t. W,b and propagate them back
        loss.backward();
        # use the gradient to change W, b
        optimizer.step();
        #calculate accuracy (on the training set!)
        true_class = np.argmax(batch_y.detach().cpu().numpy(),axis=1)
        pred_class = np.argmax(activations.detach().cpu().numpy(),axis=1)
        accuracy = np.count_nonzero(true_class == pred_class)/pred_class.shape[0];
        i = i+1
    # calculate error (on the testing set!)
    linear_predictions = torch.matmul(t_x_test,W)+b
    activations = 1.0 / (1.0 + torch.exp(-linear_predictions));
    prediction_error = np.abs(np.mean(t_y_test.detach().numpy()-activations.detach().numpy()))
    print('End of loop results:')
    print('Completed in '+str(i)+' iterations with '+str(round(accuracy*100,4))+' percent accuracy on training data and an error of '+str(round(prediction_error,4))+' on testing data.')
    print('--------------------------------')

End of loop results:
Completed in 86 iterations with 100.0 percent accuracy on testing data and an error of 0.1633 on training data.
--------------------------------
End of loop results:
Completed in 127 iterations with 100.0 percent accuracy on testing data and an error of 0.1632 on training data.
--------------------------------
End of loop results:
Completed in 125 iterations with 96.0 percent accuracy on testing data and an error of 0.1631 on training data.
--------------------------------
End of loop results:
Completed in 137 iterations with 96.0 percent accuracy on testing data and an error of 0.1631 on training data.
--------------------------------


I found while running the model through the testing data. None of the models gave particularly good accuracy. I believe this is because in a real world application, the likleyhood of a linear model matching the input and output of a dataset exactly are incredibly low. Before partitioning our dataset, we could use accuracy as a metric since we are directly fitting our model to the dataset, but after dividing it we can't expect perfect accuracy on the testing data.

As a result, the prediction error must now be used as our metric to evaluate the "goodness" of our linear models.

It seems that Batching slows down convergence to the actual value somewhat. However oddly enough, it enhances the precision of the model compared to using one large batch as the dataset. As you can see we have attained 100% accuracy and much lower prediction error compared to training the model on one large batch.

I suspect this precision increase has more to do with partitioning the dataset between testing and trading data than batching. By setting aside some data for testing, we help avoid overfitting of our approximation.