## Assignment 2

In this task we implement a simple single hidden layer neural networks with logistic regression classifier on CIFAR10 dataset. 
Our implementation consists of 2 main class:

* LogisticRegression 

    Contains implementation of logistic regression classifier with 1 hidden layer on top of pytorch nn Module.


* ModelEvaluator 

    Class consisting of basic functionalities for training, testing and visualizing loss.

For extra task we implement another class named: CrossValidation. THis class supports functionality of tuning hyperparameters with grid search and k-fold cross validation.  

In [1]:
#importing basic stuff
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.utils import shuffle as skshuffle
import numpy as np
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from matplotlib import pyplot as plt

Implement logistic regression classifier using pytorch.

In [2]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, n_in, n_hidden, n_out):
        super(LogisticRegression, self).__init__()
        '''
        n_in: Number of Inputs
        n_hidden: Number of Hidden Units
        n_out: Number of Output Units
        '''
        self.n_in = n_in
        self.n_out = n_out
        self.n_hidden = n_hidden
        self.fc1 = nn.Linear(self.n_in, self.n_hidden)
        self.fc2 = nn.Linear(self.n_hidden, self.n_out)
        self.nonlin = nn.ReLU()
        self.loss = torch.nn.CrossEntropyLoss()

    def forward(self, X):
        '''
        forward pass
        '''
        return self.fc2(self.nonlin(self.fc1(X)))



Basic functionalities for evaluating model is implemented as a part of ModelEvaluator class.

In [3]:
class ModelEvaluator:
    def __init__(self, model, epochs, lr, use_gpu=False, optim='adam'):
        '''
        model: instance of pytorch model class
        epochs: number of training epochs
        lr: learning rate
        use_gpu: to use gpu
        optim: optimizer used for training, SGD or adam
        '''
        self.epochs = epochs
        self.lr = lr
        self.model = model
        self.use_gpu = use_gpu
        self.epoch_loss = []
        if self.use_gpu:
            self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

            if self.device == 'cuda:0':
                if torch.cuda.device_count()>1:
                    self.model = nn.DataParallel(model)
                self.model.to(device)
        if optim=='adam':
            self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        elif optim=='sgd':
            self.optimizer = optim.SGD(self.model.parameters(), lr = lr, momentum=0.9)
        else:
            ValueError('Optimizer Not Supported')


    def train(self, trainloader, testloader, validation=False):
        '''
        method for training
        '''
        iter_ = 0
        for epoch in range(self.epochs):
            print('Epoch-{}'.format(epoch+1))
            print('-----------------')
            loss_batch = []
            for train_data, train_labels in trainloader:
                if self.use_gpu and self.device == 'cuda:0':
                    train_data, train_labels = train_data.to(self.device), train_labels.to(self.device)
                train_data = train_data.reshape(-1, 32*32*3)
                train_data = train_data / 255
                train_preds = self.model.forward(train_data)
                loss = self.model.loss(train_preds, train_labels)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                iter_ += 1
                print('Iter-{0}, training loss{1:.2f}'.format(iter_, loss))
                if validation:
                    if iter_%500 == 0:
                        acc_test = self.test(testloader)
                        print('Accuracy on Test Set {:.2f}'.format(acc_test))
                loss_batch.append(loss)
            self.epoch_loss.append(np.sum(loss_batch))    

    def test(self, testloader):
        '''
        method for testing
        '''
        correct_ = 0
        total_ = 0
        with torch.no_grad():
            for test_data, test_labels in testloader:
                if self.use_gpu and self.device == 'cuda:0':
                    test_data, test_labels = test_data.to(self.device), test_labels.to(self.device)
                test_data = test_data.reshape(-1, 32*32*3)
                test_data = test_data / 255
                test_preds = self.model.forward(test_data)
                _, test_pred_labels = torch.max(test_preds.data, 1)
                total_ += test_labels.size(0)
                correct_ += (test_pred_labels.cpu() == test_labels.cpu()).sum()
                accuracy_test = (100*correct_/total_)
            return accuracy_test

    def plot_loss(self):
        '''
        to visualize loss
        '''
        plt.plot(range(len(self.epoch_loss)), self.epoch_loss)
        plt.xlabel('Iteration')
        plt.ylabel('Loss')
        plt.show()

# Load and Prepare Dataset

In [4]:
trainset = dsets.CIFAR10('./data', train=True, download=True, transform=transforms.ToTensor())
testset = dsets.CIFAR10('./data', train=False, download=True, transform=transforms.ToTensor())
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [5]:
# Parameters
n_in = np.prod(trainset[0][0].numpy().shape)
n_out = len(classes)
batch_size = 100
epochs = 30

In [6]:
# Hyperparameters
lr = 0.001
n_hidden = 512

In [7]:
# Data Loader
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True, num_workers=2)

In [None]:
# Model
model = LogisticRegression(n_in, n_hidden, n_out)
modeleval = ModelEvaluator(model, epochs, lr, use_gpu=True)
modeleval.train(trainloader, testloader)

Epoch-1
-----------------
Iter-1, training loss2.30
Iter-2, training loss2.30
Iter-3, training loss2.31
Iter-4, training loss2.30
Iter-5, training loss2.30
Iter-6, training loss2.30
Iter-7, training loss2.30
Iter-8, training loss2.30
Iter-9, training loss2.30
Iter-10, training loss2.30
Iter-11, training loss2.30
Iter-12, training loss2.30
Iter-13, training loss2.30
Iter-14, training loss2.29
Iter-15, training loss2.30
Iter-16, training loss2.30
Iter-17, training loss2.30
Iter-18, training loss2.29
Iter-19, training loss2.31
Iter-20, training loss2.29
Iter-21, training loss2.29
Iter-22, training loss2.30
Iter-23, training loss2.30
Iter-24, training loss2.30
Iter-25, training loss2.30
Iter-26, training loss2.30
Iter-27, training loss2.29
Iter-28, training loss2.29
Iter-29, training loss2.28
Iter-30, training loss2.29
Iter-31, training loss2.29
Iter-32, training loss2.30
Iter-33, training loss2.30
Iter-34, training loss2.29
Iter-35, training loss2.28
Iter-36, training loss2.29
Iter-37, tr

Iter-298, training loss2.02
Iter-299, training loss2.06
Iter-300, training loss2.03
Iter-301, training loss2.11
Iter-302, training loss2.03
Iter-303, training loss2.03
Iter-304, training loss1.94
Iter-305, training loss2.06
Iter-306, training loss2.01
Iter-307, training loss1.91
Iter-308, training loss2.00
Iter-309, training loss1.95
Iter-310, training loss1.95
Iter-311, training loss1.94
Iter-312, training loss1.91
Iter-313, training loss2.01
Iter-314, training loss1.89
Iter-315, training loss2.03
Iter-316, training loss1.93
Iter-317, training loss1.89
Iter-318, training loss2.00
Iter-319, training loss2.04
Iter-320, training loss2.03
Iter-321, training loss1.96
Iter-322, training loss2.04
Iter-323, training loss1.94
Iter-324, training loss2.01
Iter-325, training loss1.98
Iter-326, training loss1.87
Iter-327, training loss1.95
Iter-328, training loss2.08
Iter-329, training loss1.98
Iter-330, training loss1.90
Iter-331, training loss1.97
Iter-332, training loss1.90
Iter-333, training l

Iter-591, training loss1.81
Iter-592, training loss1.98
Iter-593, training loss1.88
Iter-594, training loss1.84
Iter-595, training loss2.01
Iter-596, training loss1.89
Iter-597, training loss1.98
Iter-598, training loss1.95
Iter-599, training loss2.07
Iter-600, training loss1.84
Iter-601, training loss1.96
Iter-602, training loss1.93
Iter-603, training loss1.92
Iter-604, training loss2.01
Iter-605, training loss1.87
Iter-606, training loss1.86
Iter-607, training loss1.92
Iter-608, training loss1.91
Iter-609, training loss1.95
Iter-610, training loss1.93
Iter-611, training loss1.94
Iter-612, training loss1.88
Iter-613, training loss1.91
Iter-614, training loss1.92
Iter-615, training loss1.96
Iter-616, training loss1.82
Iter-617, training loss1.97
Iter-618, training loss1.88
Iter-619, training loss2.01
Iter-620, training loss1.85
Iter-621, training loss1.82
Iter-622, training loss1.93
Iter-623, training loss1.93
Iter-624, training loss1.93
Iter-625, training loss1.92
Iter-626, training l

Iter-886, training loss1.87
Iter-887, training loss2.03
Iter-888, training loss1.84
Iter-889, training loss1.85
Iter-890, training loss1.84
Iter-891, training loss1.75
Iter-892, training loss1.76
Iter-893, training loss1.81
Iter-894, training loss1.81
Iter-895, training loss1.87
Iter-896, training loss1.92
Iter-897, training loss1.78
Iter-898, training loss1.80
Iter-899, training loss1.89
Iter-900, training loss1.86
Iter-901, training loss1.82
Iter-902, training loss1.96
Iter-903, training loss1.76
Iter-904, training loss1.86
Iter-905, training loss1.86
Iter-906, training loss1.87
Iter-907, training loss1.79
Iter-908, training loss1.75
Iter-909, training loss2.07
Iter-910, training loss1.88
Iter-911, training loss1.88
Iter-912, training loss1.96
Iter-913, training loss1.81
Iter-914, training loss1.85
Iter-915, training loss1.99
Iter-916, training loss1.84
Iter-917, training loss1.86
Iter-918, training loss1.83
Iter-919, training loss1.89
Iter-920, training loss1.97
Iter-921, training l

Iter-1172, training loss1.86
Iter-1173, training loss1.94
Iter-1174, training loss1.97
Iter-1175, training loss1.88
Iter-1176, training loss1.89
Iter-1177, training loss1.96
Iter-1178, training loss1.64
Iter-1179, training loss1.89
Iter-1180, training loss1.82
Iter-1181, training loss1.97
Iter-1182, training loss1.84
Iter-1183, training loss1.76
Iter-1184, training loss1.88
Iter-1185, training loss1.74
Iter-1186, training loss1.94
Iter-1187, training loss1.97
Iter-1188, training loss1.83
Iter-1189, training loss1.89
Iter-1190, training loss1.94
Iter-1191, training loss1.94
Iter-1192, training loss1.84
Iter-1193, training loss1.83
Iter-1194, training loss1.88
Iter-1195, training loss1.87
Iter-1196, training loss1.91
Iter-1197, training loss1.99
Iter-1198, training loss1.97
Iter-1199, training loss1.93
Iter-1200, training loss1.86
Iter-1201, training loss1.91
Iter-1202, training loss1.49
Iter-1203, training loss1.93
Iter-1204, training loss1.82
Iter-1205, training loss1.92
Iter-1206, tra

Iter-1455, training loss1.79
Iter-1456, training loss1.93
Iter-1457, training loss1.70
Iter-1458, training loss1.87
Iter-1459, training loss1.79
Iter-1460, training loss1.80
Iter-1461, training loss1.85
Iter-1462, training loss1.84
Iter-1463, training loss1.65
Iter-1464, training loss1.79
Iter-1465, training loss1.91
Iter-1466, training loss1.85
Iter-1467, training loss1.83
Iter-1468, training loss1.90
Iter-1469, training loss1.85
Iter-1470, training loss1.77
Iter-1471, training loss1.94
Iter-1472, training loss1.74
Iter-1473, training loss1.82
Iter-1474, training loss1.85
Iter-1475, training loss1.89
Iter-1476, training loss1.76
Iter-1477, training loss1.76
Iter-1478, training loss1.71
Iter-1479, training loss1.80
Iter-1480, training loss1.83
Iter-1481, training loss1.75
Iter-1482, training loss1.71
Iter-1483, training loss1.98
Iter-1484, training loss1.89
Iter-1485, training loss1.77
Iter-1486, training loss1.88
Iter-1487, training loss1.76
Iter-1488, training loss1.77
Iter-1489, tra

Iter-1737, training loss1.75
Iter-1738, training loss1.84
Iter-1739, training loss1.60
Iter-1740, training loss1.67
Iter-1741, training loss1.80
Iter-1742, training loss1.67
Iter-1743, training loss1.70
Iter-1744, training loss1.96
Iter-1745, training loss1.84
Iter-1746, training loss1.70
Iter-1747, training loss1.93
Iter-1748, training loss1.73
Iter-1749, training loss1.87
Iter-1750, training loss1.76
Iter-1751, training loss1.75
Iter-1752, training loss1.84
Iter-1753, training loss1.90
Iter-1754, training loss1.74
Iter-1755, training loss1.99
Iter-1756, training loss1.88
Iter-1757, training loss1.75
Iter-1758, training loss1.71
Iter-1759, training loss1.85
Iter-1760, training loss1.89
Iter-1761, training loss1.74
Iter-1762, training loss1.68
Iter-1763, training loss1.75
Iter-1764, training loss1.78
Iter-1765, training loss1.83
Iter-1766, training loss1.89
Iter-1767, training loss1.69
Iter-1768, training loss1.89
Iter-1769, training loss1.89
Iter-1770, training loss1.76
Iter-1771, tra

Iter-2021, training loss1.72
Iter-2022, training loss1.75
Iter-2023, training loss1.80
Iter-2024, training loss1.68
Iter-2025, training loss1.64
Iter-2026, training loss2.00
Iter-2027, training loss1.92
Iter-2028, training loss1.85
Iter-2029, training loss1.82
Iter-2030, training loss1.86
Iter-2031, training loss1.91
Iter-2032, training loss1.53
Iter-2033, training loss1.86
Iter-2034, training loss1.82
Iter-2035, training loss1.83
Iter-2036, training loss1.78
Iter-2037, training loss1.78
Iter-2038, training loss1.91
Iter-2039, training loss1.82
Iter-2040, training loss1.82
Iter-2041, training loss1.75
Iter-2042, training loss1.74
Iter-2043, training loss1.59
Iter-2044, training loss1.72
Iter-2045, training loss1.84
Iter-2046, training loss1.87
Iter-2047, training loss1.67
Iter-2048, training loss2.03
Iter-2049, training loss1.92
Iter-2050, training loss1.71
Iter-2051, training loss1.93
Iter-2052, training loss1.81
Iter-2053, training loss1.79
Iter-2054, training loss1.77
Iter-2055, tra

Iter-2305, training loss1.80
Iter-2306, training loss1.78
Iter-2307, training loss1.90
Iter-2308, training loss1.74
Iter-2309, training loss1.66
Iter-2310, training loss1.69
Iter-2311, training loss1.72
Iter-2312, training loss1.75
Iter-2313, training loss1.62
Iter-2314, training loss1.70
Iter-2315, training loss1.87
Iter-2316, training loss1.75
Iter-2317, training loss1.73
Iter-2318, training loss1.82
Iter-2319, training loss1.62
Iter-2320, training loss1.73
Iter-2321, training loss1.75
Iter-2322, training loss1.82
Iter-2323, training loss1.92
Iter-2324, training loss1.58
Iter-2325, training loss1.80
Iter-2326, training loss1.79
Iter-2327, training loss1.74
Iter-2328, training loss1.77
Iter-2329, training loss1.80
Iter-2330, training loss1.69
Iter-2331, training loss1.90
Iter-2332, training loss1.73
Iter-2333, training loss1.49
Iter-2334, training loss1.84
Iter-2335, training loss1.79
Iter-2336, training loss1.74
Iter-2337, training loss1.72
Iter-2338, training loss1.84
Iter-2339, tra

Iter-2591, training loss1.63
Iter-2592, training loss1.72
Iter-2593, training loss1.71
Iter-2594, training loss1.78
Iter-2595, training loss1.71
Iter-2596, training loss1.63
Iter-2597, training loss1.75
Iter-2598, training loss1.82
Iter-2599, training loss1.86
Iter-2600, training loss1.69
Iter-2601, training loss1.65
Iter-2602, training loss1.78
Iter-2603, training loss1.71
Iter-2604, training loss1.68
Iter-2605, training loss1.75
Iter-2606, training loss1.70
Iter-2607, training loss1.86
Iter-2608, training loss1.69
Iter-2609, training loss1.63
Iter-2610, training loss1.79
Iter-2611, training loss1.76
Iter-2612, training loss1.79
Iter-2613, training loss1.81
Iter-2614, training loss1.70
Iter-2615, training loss1.74
Iter-2616, training loss1.75
Iter-2617, training loss1.71
Iter-2618, training loss1.64
Iter-2619, training loss1.77
Iter-2620, training loss1.65
Iter-2621, training loss1.72
Iter-2622, training loss1.79
Iter-2623, training loss1.74
Iter-2624, training loss1.73
Iter-2625, tra

Iter-2876, training loss1.85
Iter-2877, training loss1.71
Iter-2878, training loss1.95
Iter-2879, training loss1.65
Iter-2880, training loss1.86
Iter-2881, training loss1.62
Iter-2882, training loss1.65
Iter-2883, training loss1.73
Iter-2884, training loss1.86
Iter-2885, training loss1.66
Iter-2886, training loss1.91
Iter-2887, training loss1.86
Iter-2888, training loss1.65
Iter-2889, training loss1.75
Iter-2890, training loss1.67
Iter-2891, training loss1.73
Iter-2892, training loss1.80
Iter-2893, training loss1.66
Iter-2894, training loss1.58
Iter-2895, training loss1.64
Iter-2896, training loss1.85
Iter-2897, training loss1.81
Iter-2898, training loss1.59
Iter-2899, training loss1.74
Iter-2900, training loss1.83
Iter-2901, training loss1.74
Iter-2902, training loss1.81
Iter-2903, training loss1.72
Iter-2904, training loss1.63
Iter-2905, training loss1.62
Iter-2906, training loss1.71
Iter-2907, training loss1.76
Iter-2908, training loss1.61
Iter-2909, training loss1.82
Iter-2910, tra

Iter-3163, training loss1.90
Iter-3164, training loss1.74
Iter-3165, training loss1.69
Iter-3166, training loss1.78
Iter-3167, training loss1.68
Iter-3168, training loss1.81
Iter-3169, training loss1.70
Iter-3170, training loss1.82
Iter-3171, training loss1.66
Iter-3172, training loss1.70
Iter-3173, training loss1.65
Iter-3174, training loss1.63
Iter-3175, training loss1.78
Iter-3176, training loss1.69
Iter-3177, training loss1.82
Iter-3178, training loss1.74
Iter-3179, training loss1.66
Iter-3180, training loss1.59
Iter-3181, training loss1.76
Iter-3182, training loss1.79
Iter-3183, training loss1.63
Iter-3184, training loss1.81
Iter-3185, training loss1.66
Iter-3186, training loss1.70
Iter-3187, training loss1.60
Iter-3188, training loss1.77
Iter-3189, training loss1.66
Iter-3190, training loss1.70
Iter-3191, training loss1.59
Iter-3192, training loss1.55
Iter-3193, training loss1.57
Iter-3194, training loss1.61
Iter-3195, training loss1.62
Iter-3196, training loss1.62
Iter-3197, tra

In [None]:
modeleval.plot_loss()
accuracy_test = modeleval.test(testloader)
print('Accuracy of model on test set {0:.2f}'.format(accuracy_test))

## K-fold Cross Validation

In [None]:
class CrossValidation:
    def __init__(self, k, batch_size, trainset, use_gpu):
        '''
        k: number of folds
        batch_size: batch size for training
        trainset: training data as pytorch iterator
        use_gpu: boolean variable to use gpus
        '''
        self.k = k
        self.nm_samples = len(trainset)
        self.indices = list(range(self.nm_samples))
        self.trainset = trainset
        self.batch_size = batch_size
        self.use_gpu = use_gpu
        
    def kfold(self):
        '''
        k-fold split
        '''
        for i in range(self.k):
            train_idx = [idx for j,idx in enumerate(self.indices) if j%self.k != i]
            valid_idx = [idx for j,idx in enumerate(self.indices) if j%self.k == i]            
            yield train_idx, valid_idx
    
    def trainloader_sampling(self):
        '''
        k-fold samples
        '''
        for train_idx, valid_idx in self.kfold():
            train_sampler = SubsetRandomSampler(train_idx)
            valid_sampler = SubsetRandomSampler(valid_idx)
            yield train_sampler, valid_sampler

    def gridsearchCV(self, parameters):
        '''
        find best parameters by doing grid search with k-fold cross validation
        '''
        accuracy_mat = np.zeros((len(parameters['lr']), len(parameters['n_hidden'])))
        for ii,lr in enumerate(parameters['lr']):
            for jj,n_hidden in enumerate(parameters['n_hidden']):
                fold_accuracy = []
                i = 0
                for train_sampler, valid_sampler in self.trainloader_sampling():
                    trainloader = torch.utils.data.DataLoader(self.trainset, batch_size=self.batch_size, sampler=train_sampler, num_workers=2)
                    validloader = torch.utils.data.DataLoader(self.trainset, batch_size=self.batch_size, sampler=valid_sampler, num_workers=2)    
                    
                    model = LogisticRegression(n_in, n_hidden, n_out)
                    modeleval = ModelEvaluator(model, epochs, lr, use_gpu=self.use_gpu)
                    modeleval.train(trainloader, validloader, validation=True)
                    #modeleval.plot_loss()
                    accuracy_valid = modeleval.test(validloader)
                    print('Accuracy of model on validation set {0:.2f}'.format(accuracy_valid))
                    fold_accuracy.append(accuracy_valid)
                    i += 1
                mean_acc = np.mean(fold_accuracy)
                if mean_acc > np.max(accuracy_mat):
                    best_model, best_lr, best_n_hidden = copy.deepcopy(model), lr, n_hidden
                    # bestmodeleval = copy.deepcopy(modeleval)
                accuracy_mat[ii, jj] = np.mean(fold_accuracy)
        return accuracy_mat, best_model, best_lr, best_n_hidden

## Run k-fold and evaluate with best parameter

In [None]:
# Number of Parameters
parameters = {'lr':[0.00001, 0.0001, 0.001, 0.01], 'n_hidden': [512, 256, 128]}
# k fold cross validation
k = 3
cv = CrossValidation(k=k, batch_size=batch_size, trainset=trainset, use_gpu=True)
accuracy_mat, best_model, best_lr, best_n_hidden = cv.gridsearchCV(parameters)
bestmodeleval = ModelEvaluator(best_model, epochs, best_lr, use_gpu=self.use_gpu)
# Visualization accuracy vs parameters
fig, ax = plt.subplots()
lr_ = [str(lr) for lr in parameters['lr']]
hidden_ = [str(n_hidden) for n_hidden in parameters['n_hidden']]
im, cbar = heatmap(accuracy_mat, lr_, hidden_, ax=ax,
                   cmap='YlGn', cbarlabel='lr vs hidden_')
texts = annotate_heatmap(im, valfmt='{x:.1f} t')
fig.tight_layout()
plt.show()

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True, num_workers=2)
bestmodeleval.train(trainloader, testloader, validation=False)
accuracy_test = bestmodeleval.test(testloader)
print('Accuracy of best model on test set with lr= {0:.2f}, hidden units= {1:.2f}, is {2:.2f}'.format(best_lr, best_n_hidden, accuracy_test))