In [1]:
import pickle, gzip, numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import math


def plot_images(X):
    if X.ndim == 1:
        X = np.array([X])
    num_images = X.shape[0]
    num_rows = math.floor(math.sqrt(num_images))
    num_cols = math.ceil(num_images/num_rows)
    for i in range(num_images):
        reshaped_image = X[i,:].reshape(28,28)
        plt.subplot(num_rows, num_cols, i+1)
        plt.imshow(reshaped_image, cmap = cm.Greys_r)
        plt.axis('off')
    plt.show()


def pick_examples_of(X, Y, labels, total_count):
    bool_arr = None
    for label in labels:
        bool_arr_for_label = (Y == label)
        if bool_arr is None:
            bool_arr = bool_arr_for_label
        else:
            bool_arr |= bool_arr_for_label
    filtered_x = X[bool_arr]
    filtered_y = Y[bool_arr]
    return (filtered_x[:total_count], filtered_y[:total_count])


def extract_training_and_test_examples_with_labels(train_x, train_y, test_x, test_y, labels, training_count, test_count):
    filtered_train_x, filtered_train_y = pick_examples_of(train_x, train_y, labels, training_count)
    filtered_test_x, filtered_test_y = pick_examples_of(test_x, test_y, labels, test_count)
    return (filtered_train_x, filtered_train_y, filtered_test_x, filtered_test_y)

def write_pickle_data(data, file_name):
    f = gzip.open(file_name, 'wb')
    pickle.dump(data, f)
    f.close()

def read_pickle_data(file_name):
    f = gzip.open(file_name, 'rb')
    data = pickle.load(f, encoding='latin1')
    f.close()
    return data

def get_MNIST_data():
    """
    Reads mnist dataset from file

    Returns:
        train_x - 2D Numpy array (n, d) where each row is an image
        train_y - 1D Numpy array (n, ) where each row is a label
        test_x  - 2D Numpy array (n, d) where each row is an image
        test_y  - 1D Numpy array (n, ) where each row is a label

    """
    train_set, valid_set, test_set = read_pickle_data('Datasets/mnist.pkl.gz')
    train_x, train_y = train_set
    valid_x, valid_y = valid_set
    train_x = np.vstack((train_x, valid_x))
    train_y = np.append(train_y, valid_y)
    test_x, test_y = test_set
    return (train_x, train_y, test_x, test_y)

def load_train_and_test_pickle(file_name):
    train_x, train_y, test_x, test_y = read_pickle_data(file_name)
    return train_x, train_y, test_x, test_y

# returns the feature set in a numpy ndarray
def load_CSV(filename):
    stuff = np.asarray(np.loadtxt(open(filename, 'rb'), delimiter=','))
    return stuff


In [2]:
import numpy as np
import math

"""
 ==================================
 Problem 3: Neural Network Basics
 ==================================
    Generates a neural network with the following architecture:
        Fully connected neural network.
        Input vector takes in two features.
        One hidden layer with three neurons whose activation function is ReLU.
        One output neuron whose activation function is the identity function.
"""


def rectified_linear_unit(x):
    """ Returns the ReLU of x, or the maximum between 0 and x."""
    return np.maximum(0, x)

def rectified_linear_unit_derivative(x):
    """Returns the derivative of ReLU."""
    if x > 0:
        return 1
    else:
        return 0

def output_layer_activation(x):
    """ Linear function, returns input as is. """
    return x

def output_layer_activation_derivative(x):
    """ Returns the derivative of a linear function: 1. """
    return 1

class NeuralNetwork():
    """
        Contains the following functions:
            -train: tunes parameters of the neural network based on error obtained from forward propagation.
            -predict: predicts the label of a feature vector based on the class's parameters.
            -train_neural_network: trains a neural network over all the data points for the specified number of epochs during initialization of the class.
            -test_neural_network: uses the parameters specified at the time in order to test that the neural network classifies the points given in testing_points within a margin of error.
    """

    def __init__(self):

        # DO NOT CHANGE PARAMETERS (Initialized to floats instead of ints)
        self.input_to_hidden_weights = np.matrix('1. 1.; 1. 1.; 1. 1.')
        self.hidden_to_output_weights = np.matrix('1. 1. 1.')
        self.biases = np.matrix('0.; 0.; 0.')
        self.learning_rate = .001
        self.epochs_to_train = 10
        self.training_points = [((2,1), 10), ((3,3), 21), ((4,5), 32), ((6, 6), 42)]
        self.testing_points = [(1,1), (2,2), (3,3), (5,5), (10,10)]

    def train(self, x1, x2, y):

        ### Forward propagation ###
        input_values = np.matrix([[x1], [x2]])  # 2 by 1

        # Calculate the input and activation of the hidden layer
        hidden_layer_weighted_input = (
                self.input_to_hidden_weights.dot(input_values) + self.biases
        )  # (3 by 1 matrix)
        ReLU_vec = np.vectorize(rectified_linear_unit)  # Vectorize ReLU function
        hidden_layer_activation = ReLU_vec(
            hidden_layer_weighted_input
        )  # (3 by 1 matrix)

        output = self.hidden_to_output_weights.dot(hidden_layer_activation)
        activated_output = output_layer_activation(output)

        ### Backpropagation ###

        # Compute gradients
        output_layer_error = -(y - activated_output)  # dC/df(u1)

        output_derivative_vec = np.vectorize(
            output_layer_activation_derivative
        )  # Vectorize derivative of output activation
        hidden_layer_error = (
                np.multiply(
                    output_derivative_vec(activated_output),
                    self.hidden_to_output_weights.transpose(),
                )
                * output_layer_error
        )  # (3 by 1 matrix)

        ReLU_derivative_vec = np.vectorize(
            rectified_linear_unit_derivative
        )  # Vectorize ReLU derivative
        bias_gradients = np.multiply(
            hidden_layer_error, ReLU_derivative_vec(hidden_layer_weighted_input)
        )  # dC/db

        hidden_to_output_weight_gradients = np.multiply(
            hidden_layer_activation, output_layer_error
        ).transpose()  # dC/dV
        input_to_hidden_weight_gradients = bias_gradients.dot(
            input_values.transpose()
        )  # dC/dW

        # Use gradients to adjust weights and biases using gradient descent
        self.biases = self.biases - self.learning_rate * bias_gradients
        self.input_to_hidden_weights = (
                self.input_to_hidden_weights
                - self.learning_rate * input_to_hidden_weight_gradients
        )
        self.hidden_to_output_weights = (
                self.hidden_to_output_weights
                - self.learning_rate * hidden_to_output_weight_gradients
        )

    def predict(self, x1, x2):

        input_values = np.matrix([[x1], [x2]])

        # Compute output for a single input(should be same as the forward propagation in training)
        hidden_layer_weighted_input = (
                self.input_to_hidden_weights.dot(input_values) + self.biases
        )
        ReLU_vec = np.vectorize(rectified_linear_unit)
        hidden_layer_activation = ReLU_vec(hidden_layer_weighted_input)

        output = self.hidden_to_output_weights.dot(hidden_layer_activation)
        activated_output = output_layer_activation(output)

        return activated_output.item()

    # Run this to train your neural network once you complete the train method
    def train_neural_network(self):

        for epoch in range(self.epochs_to_train):
            for x,y in self.training_points:
                self.train(x[0], x[1], y)

    # Run this to test your neural network implementation for correctness after it is trained
    def test_neural_network(self):

        for point in self.testing_points:
            print("Point,", point, "Prediction,", self.predict(point[0], point[1]))
            if abs(self.predict(point[0], point[1]) - 7*point[0]) < 0.1:
                print("Test Passed")
            else:
                print("Point ", point[0], point[1], " failed to be predicted correctly.")
                return

x = NeuralNetwork()

x.train_neural_network()

In [3]:
x.test_neural_network()

Point, (1, 1) Prediction, 7.038453196038151
Test Passed
Point, (2, 2) Prediction, 14.042814826755537
Test Passed
Point, (3, 3) Prediction, 21.047176457472922
Test Passed
Point, (5, 5) Prediction, 35.055899718907696
Test Passed
Point, (10, 10) Prediction, 70.07770787249461
Test Passed


In [5]:
"""Training utilities."""

from tqdm import tqdm
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn


class Flatten(nn.Module):
    """A custom layer that views an input as 1D."""

    def forward(self, input):
        return input.view(input.size(0), -1)

# Helpers
def batchify_data(x_data, y_data, batch_size):
    """Takes a set of data points and labels and groups them into batches."""
    # Only take batch_size chunks (i.e. drop the remainder)
    N = int(len(x_data) / batch_size) * batch_size
    batches = []
    for i in range(0, N, batch_size):
        batches.append({
            'x': torch.tensor(x_data[i:i+batch_size], dtype=torch.float32),
            'y': torch.tensor(y_data[i:i+batch_size], dtype=torch.long
                              )})
    return batches

def compute_accuracy(predictions, y):
    """Computes the accuracy of predictions against the gold labels, y."""
    return np.mean(np.equal(predictions.numpy(), y.numpy()))


# Training Procedure
def train_model(train_data, dev_data, model, lr=0.01, momentum=0.9, nesterov=False, n_epochs=30):
    """Train a model for N epochs given data and hyper-params."""
    # We optimize with SGD
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, nesterov=nesterov)

    for epoch in range(1, 11):
        print("-------------\nEpoch {}:\n".format(epoch))


        # Run **training***
        loss, acc = run_epoch(train_data, model.train(), optimizer)
        print('Train loss: {:.6f} | Train accuracy: {:.6f}'.format(loss, acc))

        # Run **validation**
        val_loss, val_acc = run_epoch(dev_data, model.eval(), optimizer)
        print('Val loss:   {:.6f} | Val accuracy:   {:.6f}'.format(val_loss, val_acc))
        # Save model
        torch.save(model, 'mnist_model_fully_connected.pt')
    return val_acc

def run_epoch(data, model, optimizer):
    """Train model for one pass of train data, and return loss, acccuracy"""
    # Gather losses
    losses = []
    batch_accuracies = []

    # If model is in train mode, use optimizer.
    is_training = model.training

    # Iterate through batches
    for batch in tqdm(data):
        # Grab x and y
        x, y = batch['x'], batch['y']

        # Get output predictions
        out = model(x)

        # Predict and store accuracy
        predictions = torch.argmax(out, dim=1)
        batch_accuracies.append(compute_accuracy(predictions, y))

        # Compute loss
        loss = F.cross_entropy(out, y)
        losses.append(loss.data.item())

        # If training, do an update.
        if is_training:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # Calculate epoch level scores
    avg_loss = np.mean(losses)
    avg_accuracy = np.mean(batch_accuracies)
    return avg_loss, avg_accuracy


#  Training and Testing Accuracy Over Time 

In [7]:
import _pickle as cPickle, gzip
import numpy as np
from tqdm import tqdm
import torch
import torch.autograd as autograd
import torch.nn.functional as F
import torch.nn as nn
import sys
sys.path.append("..")
import warnings
# Suppress warnings:
def warn(*args, **kwargs):
    pass

warnings.warn = warn

def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    model = nn.Sequential(
        nn.Linear(784, 10),
        nn.ReLU(),
        nn.Linear(10, 10),
    )
    lr=0.1
    momentum=0
    ##################################

    train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print ("Loss on test set:"  + str(loss) + " Accuracy on test set: " + str(accuracy))


if __name__ == '__main__':
    # Specify seed for deterministic behavior, then shuffle. Do not change seed for official submissions to edx
    np.random.seed(12321)  # for reproducibility
    torch.manual_seed(12321)  # for reproducibility
    main()

-------------
Epoch 1:


100%|██████████| 1687/1687 [00:03<00:00, 492.02it/s]


Train loss: 0.445684 | Train accuracy: 0.870758


100%|██████████| 187/187 [00:00<00:00, 2047.95it/s]


Val loss:   0.260342 | Val accuracy:   0.923630
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:03<00:00, 547.49it/s]


Train loss: 0.303323 | Train accuracy: 0.912289


100%|██████████| 187/187 [00:00<00:00, 1977.97it/s]


Val loss:   0.239971 | Val accuracy:   0.929479
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:03<00:00, 525.10it/s]


Train loss: 0.281178 | Train accuracy: 0.918920


100%|██████████| 187/187 [00:00<00:00, 1554.71it/s]


Val loss:   0.232849 | Val accuracy:   0.932152
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:03<00:00, 524.37it/s]


Train loss: 0.266577 | Train accuracy: 0.923088


100%|██████████| 187/187 [00:00<00:00, 1722.85it/s]


Val loss:   0.231824 | Val accuracy:   0.930983
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:03<00:00, 524.77it/s]


Train loss: 0.255165 | Train accuracy: 0.926460


100%|██████████| 187/187 [00:00<00:00, 2042.38it/s]


Val loss:   0.230423 | Val accuracy:   0.931317
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:03<00:00, 537.30it/s]


Train loss: 0.245930 | Train accuracy: 0.928442


100%|██████████| 187/187 [00:00<00:00, 2265.56it/s]


Val loss:   0.229930 | Val accuracy:   0.931150
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:03<00:00, 542.40it/s]


Train loss: 0.238349 | Train accuracy: 0.930702


100%|██████████| 187/187 [00:00<00:00, 2212.25it/s]


Val loss:   0.231337 | Val accuracy:   0.930648
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:03<00:00, 464.30it/s]


Train loss: 0.232549 | Train accuracy: 0.932276


100%|██████████| 187/187 [00:00<00:00, 1632.62it/s]


Val loss:   0.229597 | Val accuracy:   0.932487
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:03<00:00, 454.26it/s]


Train loss: 0.227751 | Train accuracy: 0.933888


100%|██████████| 187/187 [00:00<00:00, 1825.35it/s]


Val loss:   0.229607 | Val accuracy:   0.932487
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:03<00:00, 522.31it/s]


Train loss: 0.223966 | Train accuracy: 0.935147


100%|██████████| 187/187 [00:00<00:00, 1590.55it/s]


Val loss:   0.228388 | Val accuracy:   0.932487


100%|██████████| 312/312 [00:00<00:00, 1846.79it/s]


Loss on test set:0.26722689023993623 Accuracy on test set: 0.9204727564102564


# Improving Accuracy 

In [8]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

def main(modification):
    # Set seeds
    np.random.seed(12321)
    torch.manual_seed(12321)

    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Set default values
    batch_size = 32
    lr = 0.1
    momentum = 0
    activation_function = nn.ReLU()

    # Make necessary changes based on the modification
    if modification == "batch size 64":
        batch_size = 64
    elif modification == "learning rate 0.01":
        lr = 0.01
    elif modification == "momentum 0.9":
        momentum = 0.9
    elif modification == "LeakyReLU activation":
        activation_function = nn.LeakyReLU()

    # Split dataset into batches
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    # Model
    model = nn.Sequential(
        nn.Linear(784, 10),
        activation_function,
        nn.Linear(10, 10),
    )

    # Train
    val_accuracy = train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum)

    return val_accuracy


if __name__ == '__main__':
    best_accuracy = 0
    best_modification = "baseline"

    modifications = ["baseline", "batch size 64", "learning rate 0.01", "momentum 0.9", "LeakyReLU activation"]

    for modification in modifications:
        val_accuracy = main(modification)
        print(modification, "achieved validation accuracy of:", val_accuracy)

        # Update best accuracy if this run is better
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            best_modification = modification

    print("Best modification is:", best_modification, "with validation accuracy:", best_accuracy)

-------------
Epoch 1:


100%|██████████| 1687/1687 [00:02<00:00, 596.04it/s]


Train loss: 0.445684 | Train accuracy: 0.870758


100%|██████████| 187/187 [00:00<00:00, 2160.79it/s]


Val loss:   0.260342 | Val accuracy:   0.923630
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:02<00:00, 605.29it/s]


Train loss: 0.303323 | Train accuracy: 0.912289


100%|██████████| 187/187 [00:00<00:00, 1738.69it/s]


Val loss:   0.239971 | Val accuracy:   0.929479
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:02<00:00, 577.10it/s]


Train loss: 0.281178 | Train accuracy: 0.918920


100%|██████████| 187/187 [00:00<00:00, 2239.13it/s]


Val loss:   0.232849 | Val accuracy:   0.932152
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:03<00:00, 540.18it/s]


Train loss: 0.266577 | Train accuracy: 0.923088


100%|██████████| 187/187 [00:00<00:00, 2077.11it/s]


Val loss:   0.231824 | Val accuracy:   0.930983
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:03<00:00, 543.07it/s]


Train loss: 0.255165 | Train accuracy: 0.926460


100%|██████████| 187/187 [00:00<00:00, 1806.16it/s]


Val loss:   0.230423 | Val accuracy:   0.931317
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:03<00:00, 551.40it/s]


Train loss: 0.245930 | Train accuracy: 0.928442


100%|██████████| 187/187 [00:00<00:00, 2112.22it/s]


Val loss:   0.229930 | Val accuracy:   0.931150
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:03<00:00, 545.18it/s]


Train loss: 0.238349 | Train accuracy: 0.930702


100%|██████████| 187/187 [00:00<00:00, 2093.37it/s]


Val loss:   0.231337 | Val accuracy:   0.930648
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:03<00:00, 532.15it/s]


Train loss: 0.232549 | Train accuracy: 0.932276


100%|██████████| 187/187 [00:00<00:00, 1979.58it/s]


Val loss:   0.229597 | Val accuracy:   0.932487
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:03<00:00, 562.20it/s]


Train loss: 0.227751 | Train accuracy: 0.933888


100%|██████████| 187/187 [00:00<00:00, 2229.69it/s]


Val loss:   0.229607 | Val accuracy:   0.932487
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:03<00:00, 551.86it/s]


Train loss: 0.223966 | Train accuracy: 0.935147


100%|██████████| 187/187 [00:00<00:00, 2211.23it/s]


Val loss:   0.228388 | Val accuracy:   0.932487
baseline achieved validation accuracy of: 0.9324866310160428
-------------
Epoch 1:


100%|██████████| 843/843 [00:01<00:00, 536.61it/s]


Train loss: 0.528406 | Train accuracy: 0.851016


100%|██████████| 93/93 [00:00<00:00, 2268.54it/s]


Val loss:   0.270610 | Val accuracy:   0.922715
-------------
Epoch 2:


100%|██████████| 843/843 [00:01<00:00, 521.91it/s]


Train loss: 0.312326 | Train accuracy: 0.910569


100%|██████████| 93/93 [00:00<00:00, 2042.36it/s]


Val loss:   0.245788 | Val accuracy:   0.927923
-------------
Epoch 3:


100%|██████████| 843/843 [00:01<00:00, 510.45it/s]


Train loss: 0.287699 | Train accuracy: 0.917927


100%|██████████| 93/93 [00:00<00:00, 2066.83it/s]


Val loss:   0.234042 | Val accuracy:   0.930948
-------------
Epoch 4:


100%|██████████| 843/843 [00:01<00:00, 496.56it/s]


Train loss: 0.273053 | Train accuracy: 0.922505


100%|██████████| 93/93 [00:00<00:00, 1956.31it/s]


Val loss:   0.225923 | Val accuracy:   0.935316
-------------
Epoch 5:


100%|██████████| 843/843 [00:01<00:00, 492.56it/s]


Train loss: 0.262140 | Train accuracy: 0.924655


100%|██████████| 93/93 [00:00<00:00, 1660.54it/s]


Val loss:   0.220798 | Val accuracy:   0.936828
-------------
Epoch 6:


100%|██████████| 843/843 [00:01<00:00, 498.62it/s]


Train loss: 0.253124 | Train accuracy: 0.927157


100%|██████████| 93/93 [00:00<00:00, 1915.60it/s]


Val loss:   0.216004 | Val accuracy:   0.937164
-------------
Epoch 7:


100%|██████████| 843/843 [00:01<00:00, 504.40it/s]


Train loss: 0.244414 | Train accuracy: 0.929660


100%|██████████| 93/93 [00:00<00:00, 1575.86it/s]


Val loss:   0.213142 | Val accuracy:   0.937668
-------------
Epoch 8:


100%|██████████| 843/843 [00:01<00:00, 465.22it/s]


Train loss: 0.237167 | Train accuracy: 0.931606


100%|██████████| 93/93 [00:00<00:00, 1660.83it/s]


Val loss:   0.210919 | Val accuracy:   0.939684
-------------
Epoch 9:


100%|██████████| 843/843 [00:01<00:00, 504.61it/s]


Train loss: 0.230904 | Train accuracy: 0.933459


100%|██████████| 93/93 [00:00<00:00, 2103.96it/s]


Val loss:   0.209108 | Val accuracy:   0.941364
-------------
Epoch 10:


100%|██████████| 843/843 [00:01<00:00, 514.87it/s]


Train loss: 0.225577 | Train accuracy: 0.934794


100%|██████████| 93/93 [00:00<00:00, 1840.74it/s]


Val loss:   0.208171 | Val accuracy:   0.940020
batch size 64 achieved validation accuracy of: 0.9400201612903226
-------------
Epoch 1:


100%|██████████| 1687/1687 [00:02<00:00, 571.90it/s]


Train loss: 1.060107 | Train accuracy: 0.720417


100%|██████████| 187/187 [00:00<00:00, 1722.62it/s]


Val loss:   0.422035 | Val accuracy:   0.895388
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:03<00:00, 546.42it/s]


Train loss: 0.430563 | Train accuracy: 0.882576


100%|██████████| 187/187 [00:00<00:00, 2075.27it/s]


Val loss:   0.317500 | Val accuracy:   0.914940
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:03<00:00, 539.64it/s]


Train loss: 0.367536 | Train accuracy: 0.897044


100%|██████████| 187/187 [00:00<00:00, 2308.22it/s]


Val loss:   0.285166 | Val accuracy:   0.922293
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:03<00:00, 503.23it/s]


Train loss: 0.340316 | Train accuracy: 0.904157


100%|██████████| 187/187 [00:00<00:00, 2065.77it/s]


Val loss:   0.268281 | Val accuracy:   0.926303
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:03<00:00, 536.96it/s]


Train loss: 0.323848 | Train accuracy: 0.907880


100%|██████████| 187/187 [00:00<00:00, 2238.73it/s]


Val loss:   0.257622 | Val accuracy:   0.927640
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:03<00:00, 532.99it/s]


Train loss: 0.312422 | Train accuracy: 0.910862


100%|██████████| 187/187 [00:00<00:00, 2211.88it/s]


Val loss:   0.250134 | Val accuracy:   0.928476
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:03<00:00, 541.55it/s]


Train loss: 0.303781 | Train accuracy: 0.913289


100%|██████████| 187/187 [00:00<00:00, 2211.59it/s]


Val loss:   0.244547 | Val accuracy:   0.930983
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:03<00:00, 533.02it/s]


Train loss: 0.296870 | Train accuracy: 0.915142


100%|██████████| 187/187 [00:00<00:00, 2065.72it/s]


Val loss:   0.240068 | Val accuracy:   0.931818
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:03<00:00, 534.20it/s]


Train loss: 0.291028 | Train accuracy: 0.916938


100%|██████████| 187/187 [00:00<00:00, 2087.72it/s]


Val loss:   0.236431 | Val accuracy:   0.933322
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:03<00:00, 540.99it/s]


Train loss: 0.285981 | Train accuracy: 0.918179


100%|██████████| 187/187 [00:00<00:00, 2337.50it/s]


Val loss:   0.233266 | Val accuracy:   0.934659
learning rate 0.01 achieved validation accuracy of: 0.9346590909090909
-------------
Epoch 1:


100%|██████████| 1687/1687 [00:03<00:00, 507.00it/s]


Train loss: 0.610020 | Train accuracy: 0.823633


100%|██████████| 187/187 [00:00<00:00, 2351.00it/s]


Val loss:   0.518295 | Val accuracy:   0.846925
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:03<00:00, 499.04it/s]


Train loss: 0.522522 | Train accuracy: 0.858569


100%|██████████| 187/187 [00:00<00:00, 2265.32it/s]


Val loss:   0.387432 | Val accuracy:   0.894886
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:03<00:00, 517.91it/s]


Train loss: 0.497169 | Train accuracy: 0.865664


100%|██████████| 187/187 [00:00<00:00, 2350.60it/s]


Val loss:   0.573270 | Val accuracy:   0.865307
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:03<00:00, 504.78it/s]


Train loss: 0.503275 | Train accuracy: 0.866609


100%|██████████| 187/187 [00:00<00:00, 2124.46it/s]


Val loss:   0.399847 | Val accuracy:   0.897727
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:03<00:00, 476.96it/s]


Train loss: 0.507508 | Train accuracy: 0.866164


100%|██████████| 187/187 [00:00<00:00, 2212.42it/s]


Val loss:   0.421162 | Val accuracy:   0.880013
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:03<00:00, 504.76it/s]


Train loss: 0.545011 | Train accuracy: 0.860903


100%|██████████| 187/187 [00:00<00:00, 2136.17it/s]


Val loss:   0.413657 | Val accuracy:   0.901905
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:03<00:00, 505.30it/s]


Train loss: 0.495312 | Train accuracy: 0.875259


100%|██████████| 187/187 [00:00<00:00, 2671.39it/s]


Val loss:   0.403087 | Val accuracy:   0.902741
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:03<00:00, 504.45it/s]


Train loss: 0.470934 | Train accuracy: 0.883558


100%|██████████| 187/187 [00:00<00:00, 2443.16it/s]


Val loss:   0.467854 | Val accuracy:   0.892714
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:03<00:00, 499.04it/s]


Train loss: 0.487413 | Train accuracy: 0.878501


100%|██████████| 187/187 [00:00<00:00, 2186.38it/s]


Val loss:   0.487316 | Val accuracy:   0.886531
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:03<00:00, 482.46it/s]


Train loss: 0.516685 | Train accuracy: 0.872907


100%|██████████| 187/187 [00:00<00:00, 2027.73it/s]


Val loss:   0.481226 | Val accuracy:   0.885862
momentum 0.9 achieved validation accuracy of: 0.8858622994652406
-------------
Epoch 1:


100%|██████████| 1687/1687 [00:03<00:00, 550.93it/s]


Train loss: 0.444938 | Train accuracy: 0.871128


100%|██████████| 187/187 [00:00<00:00, 2160.77it/s]


Val loss:   0.261497 | Val accuracy:   0.922627
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:03<00:00, 519.58it/s]


Train loss: 0.303264 | Train accuracy: 0.913011


100%|██████████| 187/187 [00:00<00:00, 2444.26it/s]


Val loss:   0.241508 | Val accuracy:   0.928810
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:03<00:00, 528.91it/s]


Train loss: 0.281472 | Train accuracy: 0.918809


100%|██████████| 187/187 [00:00<00:00, 2389.12it/s]


Val loss:   0.235257 | Val accuracy:   0.929646
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:03<00:00, 535.87it/s]


Train loss: 0.267219 | Train accuracy: 0.923551


100%|██████████| 187/187 [00:00<00:00, 2350.77it/s]


Val loss:   0.235469 | Val accuracy:   0.930481
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:03<00:00, 543.44it/s]


Train loss: 0.256387 | Train accuracy: 0.926052


100%|██████████| 187/187 [00:00<00:00, 2293.34it/s]


Val loss:   0.233859 | Val accuracy:   0.930816
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:03<00:00, 521.91it/s]


Train loss: 0.247782 | Train accuracy: 0.928534


100%|██████████| 187/187 [00:00<00:00, 2211.42it/s]


Val loss:   0.229305 | Val accuracy:   0.930816
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:03<00:00, 525.62it/s]


Train loss: 0.240420 | Train accuracy: 0.930553


100%|██████████| 187/187 [00:00<00:00, 2105.52it/s]


Val loss:   0.229741 | Val accuracy:   0.931317
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:03<00:00, 526.85it/s]


Train loss: 0.234297 | Train accuracy: 0.932758


100%|██████████| 187/187 [00:00<00:00, 2136.51it/s]


Val loss:   0.228470 | Val accuracy:   0.933322
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:03<00:00, 520.11it/s]


Train loss: 0.228930 | Train accuracy: 0.934610


100%|██████████| 187/187 [00:00<00:00, 2252.88it/s]


Val loss:   0.228252 | Val accuracy:   0.932487
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:03<00:00, 524.54it/s]


Train loss: 0.224515 | Train accuracy: 0.935147


100%|██████████| 187/187 [00:00<00:00, 2065.41it/s]


Val loss:   0.227615 | Val accuracy:   0.931985
LeakyReLU activation achieved validation accuracy of: 0.9319852941176471
Best modification is: batch size 64 with validation accuracy: 0.9400201612903226


In [9]:
def main(modification):
    # Set seeds
    np.random.seed(12321)
    torch.manual_seed(12321)

    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Set default values
    batch_size = 32
    lr = 0.1
    momentum = 0
    activation_function = nn.ReLU()

    # Make necessary changes based on the modification
    if modification == "batch size 64":
        batch_size = 64
    elif modification == "learning rate 0.01":
        lr = 0.01
    elif modification == "momentum 0.9":
        momentum = 0.9
    elif modification == "LeakyReLU activation":
        activation_function = nn.LeakyReLU()

    # Split dataset into batches
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    # Model
    model = nn.Sequential(
        nn.Linear(784, 10),
        activation_function,
        nn.Linear(10, 10),
    )

    # Train
    val_accuracy = train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum)
    _, test_accuracy = run_epoch(test_batches, model.eval(), None)

    return val_accuracy, test_accuracy


if __name__ == '__main__':
    best_val_accuracy = 0
    best_test_accuracy = 0
    best_modification_val = "baseline"
    best_modification_test = "baseline"

    modifications = ["baseline", "batch size 64", "learning rate 0.01", "momentum 0.9", "LeakyReLU activation"]

    for modification in modifications:
        val_accuracy, test_accuracy = main(modification)
        print(modification, "achieved validation accuracy of:", val_accuracy, "and test accuracy of:", test_accuracy)

        # Update best accuracies if this run is better
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            best_modification_val = modification
        if test_accuracy > best_test_accuracy:
            best_test_accuracy = test_accuracy
            best_modification_test = modification

    print("Best modification based on validation accuracy is:", best_modification_val, "with accuracy:", best_val_accuracy)
    print("Best modification based on test accuracy is:", best_modification_test, "with accuracy:", best_test_accuracy)

-------------
Epoch 1:


100%|██████████| 1687/1687 [00:02<00:00, 595.73it/s]


Train loss: 0.445684 | Train accuracy: 0.870758


100%|██████████| 187/187 [00:00<00:00, 2688.38it/s]


Val loss:   0.260342 | Val accuracy:   0.923630
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:02<00:00, 577.48it/s]


Train loss: 0.303323 | Train accuracy: 0.912289


100%|██████████| 187/187 [00:00<00:00, 2507.80it/s]


Val loss:   0.239971 | Val accuracy:   0.929479
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:03<00:00, 509.18it/s]


Train loss: 0.281178 | Train accuracy: 0.918920


100%|██████████| 187/187 [00:00<00:00, 2185.90it/s]


Val loss:   0.232849 | Val accuracy:   0.932152
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:03<00:00, 510.87it/s]


Train loss: 0.266577 | Train accuracy: 0.923088


100%|██████████| 187/187 [00:00<00:00, 1878.45it/s]


Val loss:   0.231824 | Val accuracy:   0.930983
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:03<00:00, 529.01it/s]


Train loss: 0.255165 | Train accuracy: 0.926460


100%|██████████| 187/187 [00:00<00:00, 2351.53it/s]


Val loss:   0.230423 | Val accuracy:   0.931317
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:03<00:00, 541.88it/s]


Train loss: 0.245930 | Train accuracy: 0.928442


100%|██████████| 187/187 [00:00<00:00, 1402.16it/s]


Val loss:   0.229930 | Val accuracy:   0.931150
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:03<00:00, 526.45it/s]


Train loss: 0.238349 | Train accuracy: 0.930702


100%|██████████| 187/187 [00:00<00:00, 1776.38it/s]


Val loss:   0.231337 | Val accuracy:   0.930648
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:03<00:00, 529.03it/s]


Train loss: 0.232549 | Train accuracy: 0.932276


100%|██████████| 187/187 [00:00<00:00, 1878.24it/s]


Val loss:   0.229597 | Val accuracy:   0.932487
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:03<00:00, 543.20it/s]


Train loss: 0.227751 | Train accuracy: 0.933888


100%|██████████| 187/187 [00:00<00:00, 2211.59it/s]


Val loss:   0.229607 | Val accuracy:   0.932487
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:03<00:00, 528.91it/s]


Train loss: 0.223966 | Train accuracy: 0.935147


100%|██████████| 187/187 [00:00<00:00, 2383.55it/s]


Val loss:   0.228388 | Val accuracy:   0.932487


100%|██████████| 312/312 [00:00<00:00, 2032.97it/s]


baseline achieved validation accuracy of: 0.9324866310160428 and test accuracy of: 0.9204727564102564
-------------
Epoch 1:


100%|██████████| 843/843 [00:01<00:00, 531.44it/s]


Train loss: 0.528406 | Train accuracy: 0.851016


100%|██████████| 93/93 [00:00<00:00, 1937.78it/s]


Val loss:   0.270610 | Val accuracy:   0.922715
-------------
Epoch 2:


100%|██████████| 843/843 [00:01<00:00, 494.98it/s]


Train loss: 0.312326 | Train accuracy: 0.910569


100%|██████████| 93/93 [00:00<00:00, 1759.74it/s]


Val loss:   0.245788 | Val accuracy:   0.927923
-------------
Epoch 3:


100%|██████████| 843/843 [00:01<00:00, 504.68it/s]


Train loss: 0.287699 | Train accuracy: 0.917927


100%|██████████| 93/93 [00:00<00:00, 1456.56it/s]


Val loss:   0.234042 | Val accuracy:   0.930948
-------------
Epoch 4:


100%|██████████| 843/843 [00:01<00:00, 502.53it/s]


Train loss: 0.273053 | Train accuracy: 0.922505


100%|██████████| 93/93 [00:00<00:00, 1877.57it/s]


Val loss:   0.225923 | Val accuracy:   0.935316
-------------
Epoch 5:


100%|██████████| 843/843 [00:01<00:00, 494.51it/s]


Train loss: 0.262140 | Train accuracy: 0.924655


100%|██████████| 93/93 [00:00<00:00, 2077.43it/s]


Val loss:   0.220798 | Val accuracy:   0.936828
-------------
Epoch 6:


100%|██████████| 843/843 [00:01<00:00, 486.95it/s]


Train loss: 0.253124 | Train accuracy: 0.927157


100%|██████████| 93/93 [00:00<00:00, 1690.42it/s]


Val loss:   0.216004 | Val accuracy:   0.937164
-------------
Epoch 7:


100%|██████████| 843/843 [00:01<00:00, 478.98it/s]


Train loss: 0.244414 | Train accuracy: 0.929660


100%|██████████| 93/93 [00:00<00:00, 1704.17it/s]


Val loss:   0.213142 | Val accuracy:   0.937668
-------------
Epoch 8:


100%|██████████| 843/843 [00:01<00:00, 448.82it/s]


Train loss: 0.237167 | Train accuracy: 0.931606


100%|██████████| 93/93 [00:00<00:00, 1754.55it/s]


Val loss:   0.210919 | Val accuracy:   0.939684
-------------
Epoch 9:


100%|██████████| 843/843 [00:01<00:00, 494.70it/s]


Train loss: 0.230904 | Train accuracy: 0.933459


100%|██████████| 93/93 [00:00<00:00, 1974.60it/s]


Val loss:   0.209108 | Val accuracy:   0.941364
-------------
Epoch 10:


100%|██████████| 843/843 [00:01<00:00, 497.30it/s]


Train loss: 0.225577 | Train accuracy: 0.934794


100%|██████████| 93/93 [00:00<00:00, 2213.87it/s]


Val loss:   0.208171 | Val accuracy:   0.940020


100%|██████████| 156/156 [00:00<00:00, 2054.38it/s]


batch size 64 achieved validation accuracy of: 0.9400201612903226 and test accuracy of: 0.9314903846153846
-------------
Epoch 1:


100%|██████████| 1687/1687 [00:03<00:00, 551.93it/s]


Train loss: 1.060107 | Train accuracy: 0.720417


100%|██████████| 187/187 [00:00<00:00, 2272.79it/s]


Val loss:   0.422035 | Val accuracy:   0.895388
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:03<00:00, 539.29it/s]


Train loss: 0.430563 | Train accuracy: 0.882576


100%|██████████| 187/187 [00:00<00:00, 2225.92it/s]


Val loss:   0.317500 | Val accuracy:   0.914940
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:03<00:00, 508.91it/s]


Train loss: 0.367536 | Train accuracy: 0.897044


100%|██████████| 187/187 [00:00<00:00, 2633.30it/s]


Val loss:   0.285166 | Val accuracy:   0.922293
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:03<00:00, 520.73it/s]


Train loss: 0.340316 | Train accuracy: 0.904157


100%|██████████| 187/187 [00:00<00:00, 2042.87it/s]


Val loss:   0.268281 | Val accuracy:   0.926303
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:03<00:00, 502.94it/s]


Train loss: 0.323848 | Train accuracy: 0.907880


100%|██████████| 187/187 [00:00<00:00, 2366.91it/s]


Val loss:   0.257622 | Val accuracy:   0.927640
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:03<00:00, 528.67it/s]


Train loss: 0.312422 | Train accuracy: 0.910862


100%|██████████| 187/187 [00:00<00:00, 2321.41it/s]


Val loss:   0.250134 | Val accuracy:   0.928476
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:03<00:00, 522.99it/s]


Train loss: 0.303781 | Train accuracy: 0.913289


100%|██████████| 187/187 [00:00<00:00, 2331.07it/s]


Val loss:   0.244547 | Val accuracy:   0.930983
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:03<00:00, 541.60it/s]


Train loss: 0.296870 | Train accuracy: 0.915142


100%|██████████| 187/187 [00:00<00:00, 2337.25it/s]


Val loss:   0.240068 | Val accuracy:   0.931818
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:03<00:00, 532.12it/s]


Train loss: 0.291028 | Train accuracy: 0.916938


100%|██████████| 187/187 [00:00<00:00, 2265.13it/s]


Val loss:   0.236431 | Val accuracy:   0.933322
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:03<00:00, 525.51it/s]


Train loss: 0.285981 | Train accuracy: 0.918179


100%|██████████| 187/187 [00:00<00:00, 2136.25it/s]


Val loss:   0.233266 | Val accuracy:   0.934659


100%|██████████| 312/312 [00:00<00:00, 1758.55it/s]


learning rate 0.01 achieved validation accuracy of: 0.9346590909090909 and test accuracy of: 0.9206730769230769
-------------
Epoch 1:


100%|██████████| 1687/1687 [00:03<00:00, 499.85it/s]


Train loss: 0.610020 | Train accuracy: 0.823633


100%|██████████| 187/187 [00:00<00:00, 2212.07it/s]


Val loss:   0.518295 | Val accuracy:   0.846925
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:03<00:00, 492.88it/s]


Train loss: 0.522522 | Train accuracy: 0.858569


100%|██████████| 187/187 [00:00<00:00, 2508.85it/s]


Val loss:   0.387432 | Val accuracy:   0.894886
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:03<00:00, 498.73it/s]


Train loss: 0.497169 | Train accuracy: 0.865664


100%|██████████| 187/187 [00:00<00:00, 2577.81it/s]


Val loss:   0.573270 | Val accuracy:   0.865307
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:03<00:00, 488.39it/s]


Train loss: 0.503275 | Train accuracy: 0.866609


100%|██████████| 187/187 [00:00<00:00, 1999.21it/s]


Val loss:   0.399847 | Val accuracy:   0.897727
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:03<00:00, 495.05it/s]


Train loss: 0.507508 | Train accuracy: 0.866164


100%|██████████| 187/187 [00:00<00:00, 2728.92it/s]


Val loss:   0.421162 | Val accuracy:   0.880013
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:03<00:00, 488.31it/s]


Train loss: 0.545011 | Train accuracy: 0.860903


100%|██████████| 187/187 [00:00<00:00, 2037.41it/s]


Val loss:   0.413657 | Val accuracy:   0.901905
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:03<00:00, 491.23it/s]


Train loss: 0.495312 | Train accuracy: 0.875259


100%|██████████| 187/187 [00:00<00:00, 2532.82it/s]


Val loss:   0.403087 | Val accuracy:   0.902741
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:03<00:00, 494.14it/s]


Train loss: 0.470934 | Train accuracy: 0.883558


100%|██████████| 187/187 [00:00<00:00, 2364.24it/s]


Val loss:   0.467854 | Val accuracy:   0.892714
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:03<00:00, 494.79it/s]


Train loss: 0.487413 | Train accuracy: 0.878501


100%|██████████| 187/187 [00:00<00:00, 2065.25it/s]


Val loss:   0.487316 | Val accuracy:   0.886531
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:03<00:00, 493.94it/s]


Train loss: 0.516685 | Train accuracy: 0.872907


100%|██████████| 187/187 [00:00<00:00, 1936.82it/s]


Val loss:   0.481226 | Val accuracy:   0.885862


100%|██████████| 312/312 [00:00<00:00, 2195.60it/s]


momentum 0.9 achieved validation accuracy of: 0.8858622994652406 and test accuracy of: 0.8729967948717948
-------------
Epoch 1:


100%|██████████| 1687/1687 [00:03<00:00, 520.64it/s]


Train loss: 0.444938 | Train accuracy: 0.871128


100%|██████████| 187/187 [00:00<00:00, 562.79it/s]


Val loss:   0.261497 | Val accuracy:   0.922627
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:03<00:00, 448.33it/s]


Train loss: 0.303264 | Train accuracy: 0.913011


100%|██████████| 187/187 [00:00<00:00, 1751.18it/s]


Val loss:   0.241508 | Val accuracy:   0.928810
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:03<00:00, 492.62it/s]


Train loss: 0.281472 | Train accuracy: 0.918809


100%|██████████| 187/187 [00:00<00:00, 2233.20it/s]


Val loss:   0.235257 | Val accuracy:   0.929646
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:03<00:00, 470.61it/s]


Train loss: 0.267219 | Train accuracy: 0.923551


100%|██████████| 187/187 [00:00<00:00, 1775.17it/s]


Val loss:   0.235469 | Val accuracy:   0.930481
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:03<00:00, 524.66it/s]


Train loss: 0.256387 | Train accuracy: 0.926052


100%|██████████| 187/187 [00:00<00:00, 2367.21it/s]


Val loss:   0.233859 | Val accuracy:   0.930816
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:03<00:00, 522.63it/s]


Train loss: 0.247782 | Train accuracy: 0.928534


100%|██████████| 187/187 [00:00<00:00, 2088.14it/s]


Val loss:   0.229305 | Val accuracy:   0.930816
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:03<00:00, 525.26it/s]


Train loss: 0.240420 | Train accuracy: 0.930553


100%|██████████| 187/187 [00:00<00:00, 2264.95it/s]


Val loss:   0.229741 | Val accuracy:   0.931317
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:03<00:00, 485.58it/s]


Train loss: 0.234297 | Train accuracy: 0.932758


100%|██████████| 187/187 [00:00<00:00, 2215.45it/s]


Val loss:   0.228470 | Val accuracy:   0.933322
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:03<00:00, 510.97it/s]


Train loss: 0.228930 | Train accuracy: 0.934610


100%|██████████| 187/187 [00:00<00:00, 2211.59it/s]


Val loss:   0.228252 | Val accuracy:   0.932487
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:03<00:00, 510.24it/s]


Train loss: 0.224515 | Train accuracy: 0.935147


100%|██████████| 187/187 [00:00<00:00, 2322.19it/s]


Val loss:   0.227615 | Val accuracy:   0.931985


100%|██████████| 312/312 [00:00<00:00, 2276.32it/s]

LeakyReLU activation achieved validation accuracy of: 0.9319852941176471 and test accuracy of: 0.9207732371794872
Best modification based on validation accuracy is: batch size 64 with accuracy: 0.9400201612903226
Best modification based on test accuracy is: batch size 64 with accuracy: 0.9314903846153846





# Improving Accuracy - Hidden 128

In [10]:
def main(modification):
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Modify the model's architecture
    model = nn.Sequential(
        nn.Linear(784, 128),
        nn.ReLU() if modification != "LeakyReLU activation" else nn.LeakyReLU(),
        nn.Linear(128, 10),
    )

    # Hyperparameters
    batch_size = 32
    lr = 0.1
    momentum = 0

    if modification == "batch size 64":
        batch_size = 64
    elif modification == "learning rate 0.01":
        lr = 0.01
    elif modification == "momentum 0.9":
        momentum = 0.9

    # Split dataset into batches
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    val_acc = train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum)

    return val_acc


if __name__ == '__main__':
    np.random.seed(12321)  # for reproducibility
    torch.manual_seed(12321)  # for reproducibility

    modifications = [
        "baseline (no modifications)",
        "batch size 64",
        "learning rate 0.01",
        "momentum 0.9",
        "LeakyReLU activation"
    ]

    best_modification = None
    best_val_acc = 0

    for modification in modifications:
        val_acc = main(modification)
        print(f"Validation accuracy with {modification}: {val_acc}")
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_modification = modification

    print(f"\nBest modification is: {best_modification} with validation accuracy: {best_val_acc}")

-------------
Epoch 1:


100%|██████████| 1687/1687 [00:03<00:00, 459.61it/s]


Train loss: 0.366369 | Train accuracy: 0.897081


100%|██████████| 187/187 [00:00<00:00, 1649.68it/s]


Val loss:   0.178036 | Val accuracy:   0.947861
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:04<00:00, 412.80it/s]


Train loss: 0.174344 | Train accuracy: 0.948966


100%|██████████| 187/187 [00:00<00:00, 1497.23it/s]


Val loss:   0.124726 | Val accuracy:   0.965241
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:04<00:00, 414.72it/s]


Train loss: 0.122234 | Train accuracy: 0.965397


100%|██████████| 187/187 [00:00<00:00, 1517.13it/s]


Val loss:   0.103370 | Val accuracy:   0.971424
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:04<00:00, 411.57it/s]


Train loss: 0.094549 | Train accuracy: 0.973251


100%|██████████| 187/187 [00:00<00:00, 1361.59it/s]


Val loss:   0.091535 | Val accuracy:   0.974432
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:04<00:00, 413.54it/s]


Train loss: 0.076735 | Train accuracy: 0.978401


100%|██████████| 187/187 [00:00<00:00, 1389.93it/s]


Val loss:   0.084775 | Val accuracy:   0.975100
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:04<00:00, 405.59it/s]


Train loss: 0.064088 | Train accuracy: 0.982124


100%|██████████| 187/187 [00:00<00:00, 1707.10it/s]


Val loss:   0.079929 | Val accuracy:   0.976103
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:04<00:00, 403.41it/s]


Train loss: 0.054388 | Train accuracy: 0.984996


100%|██████████| 187/187 [00:00<00:00, 1489.60it/s]


Val loss:   0.076648 | Val accuracy:   0.976604
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:04<00:00, 411.93it/s]


Train loss: 0.046479 | Train accuracy: 0.987348


100%|██████████| 187/187 [00:00<00:00, 1558.52it/s]


Val loss:   0.074663 | Val accuracy:   0.977440
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:04<00:00, 412.70it/s]


Train loss: 0.040037 | Train accuracy: 0.989275


100%|██████████| 187/187 [00:00<00:00, 1229.66it/s]


Val loss:   0.073389 | Val accuracy:   0.977106
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:04<00:00, 396.66it/s]


Train loss: 0.034451 | Train accuracy: 0.991090


100%|██████████| 187/187 [00:00<00:00, 1604.50it/s]


Val loss:   0.073004 | Val accuracy:   0.977607
Validation accuracy with baseline (no modifications): 0.9776069518716578
-------------
Epoch 1:


100%|██████████| 843/843 [00:02<00:00, 344.52it/s]


Train loss: 0.469601 | Train accuracy: 0.872720


100%|██████████| 93/93 [00:00<00:00, 1103.07it/s]


Val loss:   0.224432 | Val accuracy:   0.936660
-------------
Epoch 2:


100%|██████████| 843/843 [00:02<00:00, 341.78it/s]


Train loss: 0.241642 | Train accuracy: 0.930605


100%|██████████| 93/93 [00:00<00:00, 1044.69it/s]


Val loss:   0.163731 | Val accuracy:   0.956485
-------------
Epoch 3:


100%|██████████| 843/843 [00:02<00:00, 325.57it/s]


Train loss: 0.181935 | Train accuracy: 0.947731


100%|██████████| 93/93 [00:00<00:00, 1024.37it/s]


Val loss:   0.132678 | Val accuracy:   0.964886
-------------
Epoch 4:


100%|██████████| 843/843 [00:02<00:00, 332.08it/s]


Train loss: 0.146086 | Train accuracy: 0.957722


100%|██████████| 93/93 [00:00<00:00, 1112.82it/s]


Val loss:   0.114258 | Val accuracy:   0.968918
-------------
Epoch 5:


100%|██████████| 843/843 [00:02<00:00, 335.94it/s]


Train loss: 0.122010 | Train accuracy: 0.964746


100%|██████████| 93/93 [00:00<00:00, 970.67it/s]


Val loss:   0.102470 | Val accuracy:   0.971774
-------------
Epoch 6:


100%|██████████| 843/843 [00:02<00:00, 336.84it/s]


Train loss: 0.104632 | Train accuracy: 0.970066


100%|██████████| 93/93 [00:00<00:00, 1118.22it/s]


Val loss:   0.094430 | Val accuracy:   0.973118
-------------
Epoch 7:


100%|██████████| 843/843 [00:02<00:00, 338.54it/s]


Train loss: 0.091448 | Train accuracy: 0.973903


100%|██████████| 93/93 [00:00<00:00, 1146.28it/s]


Val loss:   0.088523 | Val accuracy:   0.974966
-------------
Epoch 8:


100%|██████████| 843/843 [00:02<00:00, 330.75it/s]


Train loss: 0.081047 | Train accuracy: 0.977258


100%|██████████| 93/93 [00:00<00:00, 1004.87it/s]


Val loss:   0.084345 | Val accuracy:   0.975806
-------------
Epoch 9:


100%|██████████| 843/843 [00:02<00:00, 330.84it/s]


Train loss: 0.072541 | Train accuracy: 0.979797


100%|██████████| 93/93 [00:00<00:00, 1140.48it/s]


Val loss:   0.081153 | Val accuracy:   0.977151
-------------
Epoch 10:


100%|██████████| 843/843 [00:02<00:00, 323.31it/s]


Train loss: 0.065400 | Train accuracy: 0.982058


100%|██████████| 93/93 [00:00<00:00, 963.34it/s]


Val loss:   0.078718 | Val accuracy:   0.978999
Validation accuracy with batch size 64: 0.9789986559139785
-------------
Epoch 1:


100%|██████████| 1687/1687 [00:04<00:00, 398.92it/s]


Train loss: 0.956302 | Train accuracy: 0.775600


100%|██████████| 187/187 [00:00<00:00, 1441.80it/s]


Val loss:   0.387736 | Val accuracy:   0.900067
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:04<00:00, 415.93it/s]


Train loss: 0.398657 | Train accuracy: 0.890634


100%|██████████| 187/187 [00:00<00:00, 1398.03it/s]


Val loss:   0.296485 | Val accuracy:   0.915775
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:04<00:00, 415.67it/s]


Train loss: 0.339753 | Train accuracy: 0.903805


100%|██████████| 187/187 [00:00<00:00, 1448.50it/s]


Val loss:   0.265835 | Val accuracy:   0.925134
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:04<00:00, 419.22it/s]


Train loss: 0.309340 | Train accuracy: 0.912178


100%|██████████| 187/187 [00:00<00:00, 1501.71it/s]


Val loss:   0.246282 | Val accuracy:   0.931150
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:04<00:00, 416.30it/s]


Train loss: 0.287015 | Train accuracy: 0.918883


100%|██████████| 187/187 [00:00<00:00, 1604.65it/s]


Val loss:   0.230829 | Val accuracy:   0.936330
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:04<00:00, 397.98it/s]


Train loss: 0.268372 | Train accuracy: 0.924348


100%|██████████| 187/187 [00:00<00:00, 1369.60it/s]


Val loss:   0.217386 | Val accuracy:   0.939004
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:04<00:00, 406.38it/s]


Train loss: 0.251822 | Train accuracy: 0.929220


100%|██████████| 187/187 [00:00<00:00, 1276.13it/s]


Val loss:   0.205207 | Val accuracy:   0.942179
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:04<00:00, 410.85it/s]


Train loss: 0.236815 | Train accuracy: 0.933165


100%|██████████| 187/187 [00:00<00:00, 1489.37it/s]


Val loss:   0.194136 | Val accuracy:   0.946858
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:04<00:00, 415.34it/s]


Train loss: 0.223120 | Train accuracy: 0.937333


100%|██████████| 187/187 [00:00<00:00, 1513.72it/s]


Val loss:   0.184103 | Val accuracy:   0.949699
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:04<00:00, 406.86it/s]


Train loss: 0.210722 | Train accuracy: 0.940464


100%|██████████| 187/187 [00:00<00:00, 1410.68it/s]


Val loss:   0.175199 | Val accuracy:   0.953041
Validation accuracy with learning rate 0.01: 0.9530414438502673
-------------
Epoch 1:


100%|██████████| 1687/1687 [00:04<00:00, 383.55it/s]


Train loss: 0.283897 | Train accuracy: 0.914790


100%|██████████| 187/187 [00:00<00:00, 1483.23it/s]


Val loss:   0.151576 | Val accuracy:   0.955047
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:04<00:00, 379.11it/s]


Train loss: 0.161007 | Train accuracy: 0.953486


100%|██████████| 187/187 [00:00<00:00, 1500.96it/s]


Val loss:   0.149666 | Val accuracy:   0.959893
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:04<00:00, 378.71it/s]


Train loss: 0.132469 | Train accuracy: 0.962619


100%|██████████| 187/187 [00:00<00:00, 1400.24it/s]


Val loss:   0.155296 | Val accuracy:   0.964071
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:04<00:00, 381.73it/s]


Train loss: 0.115378 | Train accuracy: 0.966916


100%|██████████| 187/187 [00:00<00:00, 1359.65it/s]


Val loss:   0.168488 | Val accuracy:   0.965241
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:04<00:00, 379.61it/s]


Train loss: 0.107321 | Train accuracy: 0.970436


100%|██████████| 187/187 [00:00<00:00, 1297.86it/s]


Val loss:   0.188538 | Val accuracy:   0.962567
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:04<00:00, 379.50it/s]


Train loss: 0.093257 | Train accuracy: 0.974641


100%|██████████| 187/187 [00:00<00:00, 1402.79it/s]


Val loss:   0.153951 | Val accuracy:   0.969586
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:04<00:00, 376.49it/s]


Train loss: 0.087206 | Train accuracy: 0.976289


100%|██████████| 187/187 [00:00<00:00, 1533.52it/s]


Val loss:   0.224246 | Val accuracy:   0.955047
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:04<00:00, 379.50it/s]


Train loss: 0.075348 | Train accuracy: 0.979735


100%|██████████| 187/187 [00:00<00:00, 1410.84it/s]


Val loss:   0.152848 | Val accuracy:   0.972594
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:04<00:00, 373.08it/s]


Train loss: 0.074434 | Train accuracy: 0.980513


100%|██████████| 187/187 [00:00<00:00, 1583.88it/s]


Val loss:   0.177508 | Val accuracy:   0.969586
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:04<00:00, 353.14it/s]


Train loss: 0.083954 | Train accuracy: 0.978920


100%|██████████| 187/187 [00:00<00:00, 1495.29it/s]


Val loss:   0.163362 | Val accuracy:   0.972594
Validation accuracy with momentum 0.9: 0.9725935828877005
-------------
Epoch 1:


100%|██████████| 1687/1687 [00:04<00:00, 399.23it/s]


Train loss: 0.369002 | Train accuracy: 0.896766


100%|██████████| 187/187 [00:00<00:00, 1399.20it/s]


Val loss:   0.174498 | Val accuracy:   0.950702
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:04<00:00, 392.88it/s]


Train loss: 0.175108 | Train accuracy: 0.949096


100%|██████████| 187/187 [00:00<00:00, 1396.95it/s]


Val loss:   0.121336 | Val accuracy:   0.963737
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:04<00:00, 393.59it/s]


Train loss: 0.124588 | Train accuracy: 0.964175


100%|██████████| 187/187 [00:00<00:00, 1330.38it/s]


Val loss:   0.100364 | Val accuracy:   0.970755
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:04<00:00, 392.75it/s]


Train loss: 0.097126 | Train accuracy: 0.971918


100%|██████████| 187/187 [00:00<00:00, 1318.27it/s]


Val loss:   0.088784 | Val accuracy:   0.974599
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:04<00:00, 391.50it/s]


Train loss: 0.079124 | Train accuracy: 0.977345


100%|██████████| 187/187 [00:00<00:00, 1421.73it/s]


Val loss:   0.082072 | Val accuracy:   0.976771
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:04<00:00, 384.60it/s]


Train loss: 0.066315 | Train accuracy: 0.981143


100%|██████████| 187/187 [00:00<00:00, 1156.68it/s]


Val loss:   0.078112 | Val accuracy:   0.977440
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:04<00:00, 393.92it/s]


Train loss: 0.056483 | Train accuracy: 0.983866


100%|██████████| 187/187 [00:00<00:00, 1260.71it/s]


Val loss:   0.074875 | Val accuracy:   0.978443
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:04<00:00, 376.76it/s]


Train loss: 0.048569 | Train accuracy: 0.986589


100%|██████████| 187/187 [00:00<00:00, 1188.15it/s]


Val loss:   0.072209 | Val accuracy:   0.980281
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:04<00:00, 389.63it/s]


Train loss: 0.041984 | Train accuracy: 0.988811


100%|██████████| 187/187 [00:00<00:00, 1200.22it/s]


Val loss:   0.071080 | Val accuracy:   0.979612
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:04<00:00, 390.57it/s]


Train loss: 0.036325 | Train accuracy: 0.990738


100%|██████████| 187/187 [00:00<00:00, 1306.89it/s]


Val loss:   0.070506 | Val accuracy:   0.979779
Validation accuracy with LeakyReLU activation: 0.9797794117647058

Best modification is: LeakyReLU activation with validation accuracy: 0.9797794117647058


# Convolutional Neural Networks

In [11]:
import _pickle as c_pickle, gzip
import numpy as np
from tqdm import tqdm
import torch
import torch.autograd as autograd
import torch.nn.functional as F
import torch.nn as nn
import sys
sys.path.append("..")


def main():
    # Load the dataset
    num_classes = 10
    X_train, y_train, X_test, y_test = get_MNIST_data()

    # We need to rehape the data back into a 1x28x28 image
    X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28))
    X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28))

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = y_train[dev_split_index:]
    X_train = X_train[:dev_split_index]
    y_train = y_train[:dev_split_index]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [y_train[i] for i in permutation]

    # Split dataset into batches
    batch_size = 32
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    #################################
    ## Model specification TODO
    model = nn.Sequential(
        nn.Conv2d(1, 32, (3, 3)),
        nn.ReLU(),
        nn.MaxPool2d((2, 2)),
        nn.Conv2d(32, 64, (3,3)),
        nn.ReLU(),
        nn.MaxPool2d((2, 2)),
        Flatten(),
        nn.Linear(1600,128),
        nn.Dropout(p = 0.5),
        nn.Linear(128,10)
    )
    ##################################

    train_model(train_batches, dev_batches, model, nesterov=True)

    ## Evaluate the model on test data
    loss, accuracy = run_epoch(test_batches, model.eval(), None)

    print ("Loss on test set:"  + str(loss) + " Accuracy on test set: " + str(accuracy))


if __name__ == '__main__':
    # Specify seed for deterministic behavior, then shuffle. Do not change seed for official submissions to edx
    np.random.seed(12321)  # for reproducibility
    torch.manual_seed(12321)
    main()


-------------
Epoch 1:


100%|██████████| 1687/1687 [00:38<00:00, 44.11it/s]


Train loss: 0.244168 | Train accuracy: 0.923292


100%|██████████| 187/187 [00:01<00:00, 119.05it/s]


Val loss:   0.059497 | Val accuracy:   0.982955
-------------
Epoch 2:


100%|██████████| 1687/1687 [00:38<00:00, 44.20it/s]


Train loss: 0.078097 | Train accuracy: 0.976586


100%|██████████| 187/187 [00:01<00:00, 117.72it/s]


Val loss:   0.043238 | Val accuracy:   0.988302
-------------
Epoch 3:


100%|██████████| 1687/1687 [00:39<00:00, 42.95it/s]


Train loss: 0.057475 | Train accuracy: 0.982902


100%|██████████| 187/187 [00:01<00:00, 110.85it/s]


Val loss:   0.041741 | Val accuracy:   0.987801
-------------
Epoch 4:


100%|██████████| 1687/1687 [00:45<00:00, 36.78it/s]


Train loss: 0.045078 | Train accuracy: 0.986292


100%|██████████| 187/187 [00:01<00:00, 105.04it/s]


Val loss:   0.033974 | Val accuracy:   0.988636
-------------
Epoch 5:


100%|██████████| 1687/1687 [00:42<00:00, 39.68it/s]


Train loss: 0.039271 | Train accuracy: 0.987830


100%|██████████| 187/187 [00:01<00:00, 104.52it/s]


Val loss:   0.033391 | Val accuracy:   0.989973
-------------
Epoch 6:


100%|██████████| 1687/1687 [00:41<00:00, 40.29it/s]


Train loss: 0.033232 | Train accuracy: 0.989867


100%|██████████| 187/187 [00:01<00:00, 109.97it/s]


Val loss:   0.036721 | Val accuracy:   0.989973
-------------
Epoch 7:


100%|██████████| 1687/1687 [00:41<00:00, 40.92it/s]


Train loss: 0.028218 | Train accuracy: 0.990923


100%|██████████| 187/187 [00:01<00:00, 113.15it/s]


Val loss:   0.038467 | Val accuracy:   0.987801
-------------
Epoch 8:


100%|██████████| 1687/1687 [00:40<00:00, 41.69it/s]


Train loss: 0.024801 | Train accuracy: 0.992090


100%|██████████| 187/187 [00:01<00:00, 101.20it/s]


Val loss:   0.031256 | Val accuracy:   0.991477
-------------
Epoch 9:


100%|██████████| 1687/1687 [00:39<00:00, 42.29it/s]


Train loss: 0.022789 | Train accuracy: 0.992405


100%|██████████| 187/187 [00:01<00:00, 110.68it/s]


Val loss:   0.035994 | Val accuracy:   0.990475
-------------
Epoch 10:


100%|██████████| 1687/1687 [00:40<00:00, 41.69it/s]


Train loss: 0.020146 | Train accuracy: 0.993646


100%|██████████| 187/187 [00:01<00:00, 111.14it/s]


Val loss:   0.035877 | Val accuracy:   0.990976


100%|██████████| 312/312 [00:02<00:00, 105.63it/s]

Loss on test set:0.030044354157752924 Accuracy on test set: 0.9895833333333334





# Overlapping, multi-digit MNIST

In [12]:
"""Training utilities."""

from tqdm import tqdm
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn


class Flatten(nn.Module):
    """A custom layer that views an input as 1D."""

    def forward(self, input):
        return input.view(input.size(0), -1)


def batchify_data(x_data, y_data, batch_size):
    """Takes a set of data points and labels and groups them into batches."""
    # Only take batch_size chunks (i.e. drop the remainder)
    N = int(len(x_data) / batch_size) * batch_size
    batches = []
    for i in range(0, N, batch_size):
        batches.append({
            'x': torch.tensor(x_data[i:i + batch_size],
                              dtype=torch.float32),
            'y': torch.tensor([y_data[0][i:i + batch_size],
                               y_data[1][i:i + batch_size]],
                              dtype=torch.int64)
        })
    return batches


def compute_accuracy(predictions, y):
    """Computes the accuracy of predictions against the gold labels, y."""
    return np.mean(np.equal(predictions.numpy(), y.numpy()))


def train_model(train_data, dev_data, model, lr=0.01, momentum=0.9, nesterov=False, n_epochs=30):
    """Train a model for N epochs given data and hyper-params."""
    # We optimize with SGD
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, nesterov=nesterov)

    for epoch in range(1, n_epochs + 1):
        print("-------------\nEpoch {}:\n".format(epoch))

        # Run **training***
        loss, acc = run_epoch(train_data, model.train(), optimizer)
        print('Train | loss1: {:.6f}  accuracy1: {:.6f} | loss2: {:.6f}  accuracy2: {:.6f}'.format(loss[0], acc[0], loss[1], acc[1]))

        # Run **validation**
        val_loss, val_acc = run_epoch(dev_data, model.eval(), optimizer)
        print('Valid | loss1: {:.6f}  accuracy1: {:.6f} | loss2: {:.6f}  accuracy2: {:.6f}'.format(val_loss[0], val_acc[0], val_loss[1], val_acc[1]))

        # Save model
        torch.save(model, 'mnist_model_fully_connected.pt')


def run_epoch(data, model, optimizer):
    """Train model for one pass of train data, and return loss, acccuracy"""
    # Gather losses
    losses_first_label = []
    losses_second_label = []
    batch_accuracies_first = []
    batch_accuracies_second = []

    # If model is in train mode, use optimizer.
    is_training = model.training

    # Iterate through batches
    for batch in tqdm(data):
        # Grab x and y
        x, y = batch['x'], batch['y']

        # Get output predictions for both the upper and lower numbers
        out1, out2 = model(x)

        # Predict and store accuracy
        predictions_first_label = torch.argmax(out1, dim=1)
        predictions_second_label = torch.argmax(out2, dim=1)
        batch_accuracies_first.append(compute_accuracy(predictions_first_label, y[0]))
        batch_accuracies_second.append(compute_accuracy(predictions_second_label, y[1]))

        # Compute both losses
        loss1 = F.cross_entropy(out1, y[0])
        loss2 = F.cross_entropy(out2, y[1])
        losses_first_label.append(loss1.data.item())
        losses_second_label.append(loss2.data.item())

        # If training, do an update.
        if is_training:
            optimizer.zero_grad()
            joint_loss = 0.5 * (loss1 + loss2)
            joint_loss.backward()
            optimizer.step()

    # Calculate epoch level scores
    avg_loss = np.mean(losses_first_label), np.mean(losses_second_label)
    avg_accuracy = np.mean(batch_accuracies_first), np.mean(batch_accuracies_second)
    return avg_loss, avg_accuracy


In [13]:
import gzip, _pickle, numpy as np
num_classes = 10
img_rows, img_cols = 42, 28

def get_data(path_to_data_dir, use_mini_dataset):
    if use_mini_dataset:
        exten = '_mini'
    else:
        exten = ''
    f = gzip.open(path_to_data_dir + 'train_multi_digit' + exten + '.pkl.gz', 'rb')
    X_train = _pickle.load(f, encoding='latin1')
    f.close()
    X_train =  np.reshape(X_train, (len(X_train), 1, img_rows, img_cols))
    f = gzip.open(path_to_data_dir + 'test_multi_digit' + exten +'.pkl.gz', 'rb')
    X_test = _pickle.load(f, encoding='latin1')
    f.close()
    X_test =  np.reshape(X_test, (len(X_test),1, img_rows, img_cols))
    f = gzip.open(path_to_data_dir + 'train_labels' + exten +'.txt.gz', 'rb')
    y_train = np.loadtxt(f)
    f.close()
    f = gzip.open(path_to_data_dir +'test_labels' + exten + '.txt.gz', 'rb')
    y_test = np.loadtxt(f)
    f.close()
    return X_train, y_train, X_test, y_test


In [14]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
path_to_data_dir = 'Datasets/'
use_mini_dataset = True

batch_size = 64
nb_classes = 10
nb_epoch = 30
num_classes = 10
img_rows, img_cols = 42, 28 # input image dimensions

class MLP(nn.Module):

    def __init__(self, input_dimension):
        super(MLP, self).__init__()
        self.flatten = Flatten()
        self.linear1 = nn.Linear(input_dimension, 64)
        self.linear2 = nn.Linear(64, 20)

    def forward(self, x):
        xf = self.flatten(x)

        xl1 = self.linear1(xf)
        xl2 = self.linear2(xl1)
        out_first_digit = xl2[:,:10]
        out_second_digit = xl2[:,10:]

        return out_first_digit, out_second_digit

def main():
    X_train, y_train, X_test, y_test = get_data(path_to_data_dir, use_mini_dataset)

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]]
    X_train = X_train[:dev_split_index]
    y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [[y_train[0][i] for i in permutation], [y_train[1][i] for i in permutation]]

    # Split dataset into batches
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    # Load model
    input_dimension = img_rows * img_cols
    model = MLP(input_dimension) # TODO add proper layers to MLP class above

    # Train
    train_model(train_batches, dev_batches, model)

    ## Evaluate the model on test data
    loss, acc = run_epoch(test_batches, model.eval(), None)
    print('Test loss1: {:.6f}  accuracy1: {:.6f}  loss2: {:.6f}   accuracy2: {:.6f}'.format(loss[0], acc[0], loss[1], acc[1]))

if __name__ == '__main__':
    # Specify seed for deterministic behavior, then shuffle. Do not change seed for official submissions to edx
    np.random.seed(12321)  # for reproducibility
    torch.manual_seed(12321)  # for reproducibility
    main()


  'y': torch.tensor([y_data[0][i:i + batch_size],


-------------
Epoch 1:


100%|██████████| 562/562 [00:01<00:00, 322.60it/s]


Train | loss1: 0.776068  accuracy1: 0.792538 | loss2: 0.798555  accuracy2: 0.777441


100%|██████████| 62/62 [00:00<00:00, 1077.76it/s]


Valid | loss1: 0.430175  accuracy1: 0.878780 | loss2: 0.457375  accuracy2: 0.860887
-------------
Epoch 2:


100%|██████████| 562/562 [00:01<00:00, 321.24it/s]


Train | loss1: 0.396823  accuracy1: 0.886927 | loss2: 0.426801  accuracy2: 0.870357


100%|██████████| 62/62 [00:00<00:00, 1070.43it/s]


Valid | loss1: 0.382405  accuracy1: 0.889113 | loss2: 0.403938  accuracy2: 0.875504
-------------
Epoch 3:


100%|██████████| 562/562 [00:01<00:00, 306.24it/s]


Train | loss1: 0.360463  accuracy1: 0.896769 | loss2: 0.390284  accuracy2: 0.883285


100%|██████████| 62/62 [00:00<00:00, 866.84it/s]


Valid | loss1: 0.367611  accuracy1: 0.894405 | loss2: 0.386381  accuracy2: 0.880544
-------------
Epoch 4:


100%|██████████| 562/562 [00:01<00:00, 303.12it/s]


Train | loss1: 0.342956  accuracy1: 0.901246 | loss2: 0.372160  accuracy2: 0.889290


100%|██████████| 62/62 [00:00<00:00, 789.51it/s]


Valid | loss1: 0.360734  accuracy1: 0.895161 | loss2: 0.376846  accuracy2: 0.886341
-------------
Epoch 5:


100%|██████████| 562/562 [00:01<00:00, 294.38it/s]


Train | loss1: 0.331562  accuracy1: 0.904443 | loss2: 0.360041  accuracy2: 0.893127


100%|██████████| 62/62 [00:00<00:00, 956.40it/s]


Valid | loss1: 0.356971  accuracy1: 0.896421 | loss2: 0.370512  accuracy2: 0.887853
-------------
Epoch 6:


100%|██████████| 562/562 [00:01<00:00, 296.47it/s]


Train | loss1: 0.323069  accuracy1: 0.906973 | loss2: 0.350922  accuracy2: 0.895880


100%|██████████| 62/62 [00:00<00:00, 843.17it/s]


Valid | loss1: 0.354749  accuracy1: 0.899950 | loss2: 0.365907  accuracy2: 0.890625
-------------
Epoch 7:


100%|██████████| 562/562 [00:01<00:00, 295.18it/s]


Train | loss1: 0.316259  accuracy1: 0.909169 | loss2: 0.343622  accuracy2: 0.898354


100%|██████████| 62/62 [00:00<00:00, 707.19it/s]


Valid | loss1: 0.353413  accuracy1: 0.900202 | loss2: 0.362405  accuracy2: 0.893397
-------------
Epoch 8:


100%|██████████| 562/562 [00:01<00:00, 293.93it/s]


Train | loss1: 0.310557  accuracy1: 0.910698 | loss2: 0.337544  accuracy2: 0.900328


100%|██████████| 62/62 [00:00<00:00, 873.31it/s]


Valid | loss1: 0.352648  accuracy1: 0.899698 | loss2: 0.359679  accuracy2: 0.894153
-------------
Epoch 9:


100%|██████████| 562/562 [00:01<00:00, 292.69it/s]


Train | loss1: 0.305646  accuracy1: 0.912172 | loss2: 0.332348  accuracy2: 0.901968


100%|██████████| 62/62 [00:00<00:00, 903.01it/s]


Valid | loss1: 0.352283  accuracy1: 0.898942 | loss2: 0.357529  accuracy2: 0.894909
-------------
Epoch 10:


100%|██████████| 562/562 [00:01<00:00, 296.95it/s]


Train | loss1: 0.301333  accuracy1: 0.913201 | loss2: 0.327817  accuracy2: 0.903025


100%|██████████| 62/62 [00:00<00:00, 873.29it/s]


Valid | loss1: 0.352215  accuracy1: 0.898438 | loss2: 0.355823  accuracy2: 0.895665
-------------
Epoch 11:


100%|██████████| 562/562 [00:01<00:00, 306.22it/s]


Train | loss1: 0.297492  accuracy1: 0.914202 | loss2: 0.323807  accuracy2: 0.904220


100%|██████████| 62/62 [00:00<00:00, 789.34it/s]


Valid | loss1: 0.352375  accuracy1: 0.899194 | loss2: 0.354468  accuracy2: 0.897429
-------------
Epoch 12:


100%|██████████| 562/562 [00:01<00:00, 306.55it/s]


Train | loss1: 0.294033  accuracy1: 0.915036 | loss2: 0.320212  accuracy2: 0.904665


100%|██████████| 62/62 [00:00<00:00, 1027.56it/s]


Valid | loss1: 0.352714  accuracy1: 0.898438 | loss2: 0.353398  accuracy2: 0.896925
-------------
Epoch 13:


100%|██████████| 562/562 [00:01<00:00, 303.03it/s]


Train | loss1: 0.290891  accuracy1: 0.916342 | loss2: 0.316959  accuracy2: 0.905333


100%|██████████| 62/62 [00:00<00:00, 987.12it/s]


Valid | loss1: 0.353195  accuracy1: 0.898185 | loss2: 0.352561  accuracy2: 0.898185
-------------
Epoch 14:


100%|██████████| 562/562 [00:01<00:00, 302.13it/s]


Train | loss1: 0.288017  accuracy1: 0.916787 | loss2: 0.313989  accuracy2: 0.906611


100%|██████████| 62/62 [00:00<00:00, 946.21it/s]


Valid | loss1: 0.353787  accuracy1: 0.898942 | loss2: 0.351919  accuracy2: 0.898185
-------------
Epoch 15:


100%|██████████| 562/562 [00:01<00:00, 293.74it/s]


Train | loss1: 0.285370  accuracy1: 0.917427 | loss2: 0.311259  accuracy2: 0.907724


100%|██████████| 62/62 [00:00<00:00, 785.99it/s]


Valid | loss1: 0.354469  accuracy1: 0.898942 | loss2: 0.351440  accuracy2: 0.898185
-------------
Epoch 16:


100%|██████████| 562/562 [00:01<00:00, 298.66it/s]


Train | loss1: 0.282918  accuracy1: 0.918038 | loss2: 0.308734  accuracy2: 0.908697


100%|██████████| 62/62 [00:00<00:00, 911.72it/s]


Valid | loss1: 0.355223  accuracy1: 0.897933 | loss2: 0.351099  accuracy2: 0.897933
-------------
Epoch 17:


100%|██████████| 562/562 [00:01<00:00, 296.85it/s]


Train | loss1: 0.280636  accuracy1: 0.918789 | loss2: 0.306384  accuracy2: 0.909419


100%|██████████| 62/62 [00:00<00:00, 875.20it/s]


Valid | loss1: 0.356033  accuracy1: 0.899194 | loss2: 0.350877  accuracy2: 0.898438
-------------
Epoch 18:


100%|██████████| 562/562 [00:01<00:00, 302.08it/s]


Train | loss1: 0.278502  accuracy1: 0.919623 | loss2: 0.304189  accuracy2: 0.910365


100%|██████████| 62/62 [00:00<00:00, 694.76it/s]


Valid | loss1: 0.356888  accuracy1: 0.897933 | loss2: 0.350755  accuracy2: 0.899194
-------------
Epoch 19:


100%|██████████| 562/562 [00:01<00:00, 288.60it/s]


Train | loss1: 0.276497  accuracy1: 0.920485 | loss2: 0.302129  accuracy2: 0.911032


100%|██████████| 62/62 [00:00<00:00, 987.06it/s]


Valid | loss1: 0.357780  accuracy1: 0.896925 | loss2: 0.350721  accuracy2: 0.899446
-------------
Epoch 20:


100%|██████████| 562/562 [00:01<00:00, 300.52it/s]


Train | loss1: 0.274608  accuracy1: 0.920930 | loss2: 0.300188  accuracy2: 0.911727


100%|██████████| 62/62 [00:00<00:00, 668.55it/s]


Valid | loss1: 0.358699  accuracy1: 0.897429 | loss2: 0.350763  accuracy2: 0.899194
-------------
Epoch 21:


100%|██████████| 562/562 [00:01<00:00, 298.57it/s]


Train | loss1: 0.272821  accuracy1: 0.921319 | loss2: 0.298353  accuracy2: 0.912200


100%|██████████| 62/62 [00:00<00:00, 1000.44it/s]


Valid | loss1: 0.359640  accuracy1: 0.896169 | loss2: 0.350871  accuracy2: 0.898942
-------------
Epoch 22:


100%|██████████| 562/562 [00:01<00:00, 303.55it/s]


Train | loss1: 0.271126  accuracy1: 0.921625 | loss2: 0.296614  accuracy2: 0.912978


100%|██████████| 62/62 [00:00<00:00, 984.98it/s]


Valid | loss1: 0.360598  accuracy1: 0.895917 | loss2: 0.351037  accuracy2: 0.899446
-------------
Epoch 23:


100%|██████████| 562/562 [00:01<00:00, 297.54it/s]


Train | loss1: 0.269513  accuracy1: 0.922070 | loss2: 0.294962  accuracy2: 0.913089


100%|██████████| 62/62 [00:00<00:00, 1127.05it/s]


Valid | loss1: 0.361567  accuracy1: 0.895413 | loss2: 0.351254  accuracy2: 0.899194
-------------
Epoch 24:


100%|██████████| 562/562 [00:01<00:00, 305.41it/s]


Train | loss1: 0.267976  accuracy1: 0.922375 | loss2: 0.293387  accuracy2: 0.913562


100%|██████████| 62/62 [00:00<00:00, 789.37it/s]


Valid | loss1: 0.362544  accuracy1: 0.894909 | loss2: 0.351516  accuracy2: 0.899194
-------------
Epoch 25:


100%|██████████| 562/562 [00:01<00:00, 300.11it/s]


Train | loss1: 0.266507  accuracy1: 0.922848 | loss2: 0.291884  accuracy2: 0.914229


100%|██████████| 62/62 [00:00<00:00, 822.37it/s]


Valid | loss1: 0.363526  accuracy1: 0.894909 | loss2: 0.351818  accuracy2: 0.898690
-------------
Epoch 26:


100%|██████████| 562/562 [00:01<00:00, 298.39it/s]


Train | loss1: 0.265100  accuracy1: 0.923376 | loss2: 0.290447  accuracy2: 0.914591


100%|██████████| 62/62 [00:00<00:00, 931.73it/s]


Valid | loss1: 0.364511  accuracy1: 0.894153 | loss2: 0.352155  accuracy2: 0.899194
-------------
Epoch 27:


100%|██████████| 562/562 [00:01<00:00, 307.76it/s]


Train | loss1: 0.263751  accuracy1: 0.923682 | loss2: 0.289070  accuracy2: 0.915008


100%|██████████| 62/62 [00:00<00:00, 984.54it/s]


Valid | loss1: 0.365495  accuracy1: 0.893649 | loss2: 0.352524  accuracy2: 0.899194
-------------
Epoch 28:


100%|██████████| 562/562 [00:02<00:00, 268.88it/s]


Train | loss1: 0.262455  accuracy1: 0.924266 | loss2: 0.287748  accuracy2: 0.915453


100%|██████████| 62/62 [00:00<00:00, 960.72it/s]


Valid | loss1: 0.366478  accuracy1: 0.892893 | loss2: 0.352921  accuracy2: 0.899446
-------------
Epoch 29:


100%|██████████| 562/562 [00:01<00:00, 299.45it/s]


Train | loss1: 0.261209  accuracy1: 0.924600 | loss2: 0.286478  accuracy2: 0.915897


100%|██████████| 62/62 [00:00<00:00, 879.00it/s]


Valid | loss1: 0.367456  accuracy1: 0.892893 | loss2: 0.353343  accuracy2: 0.898438
-------------
Epoch 30:


100%|██████████| 562/562 [00:02<00:00, 273.01it/s]


Train | loss1: 0.260008  accuracy1: 0.924766 | loss2: 0.285256  accuracy2: 0.916315


100%|██████████| 62/62 [00:00<00:00, 985.30it/s]


Valid | loss1: 0.368430  accuracy1: 0.892389 | loss2: 0.353787  accuracy2: 0.898185


100%|██████████| 62/62 [00:00<00:00, 762.45it/s]


Test loss1: 0.400817  accuracy1: 0.892389  loss2: 0.374367   accuracy2: 0.893901


In [15]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
path_to_data_dir = 'Datasets/'
use_mini_dataset = True

batch_size = 64
nb_classes = 10
nb_epoch = 30
num_classes = 10
img_rows, img_cols = 42, 28 # input image dimensions



class CNN(nn.Module):

    def __init__(self, input_dimension):
        super(CNN, self).__init__()
        self.conv2d_1 = nn.Conv2d(1, 32, (3, 3))
        self.relu = nn.ReLU()
        self.maxpool2d = nn.MaxPool2d((2,2))
        self.conv2d_2 = nn.Conv2d(32, 64, (3, 3))
        self.flatten = Flatten()
        self.linear1 = nn.Linear(2880, 64)
        self.dropout = nn.Dropout(p = 0.5)
        self.linear2 = nn.Linear(64, 20)

    def forward(self, x):
        x = self.conv2d_1(x)
        x = self.relu(x)
        x = self.maxpool2d(x)
        x = self.conv2d_2(x)
        x = self.relu(x)
        x = self.maxpool2d(x)
        x = self.flatten(x)
        x = self.linear1(x)
        x = self.dropout(x)
        x = self.linear2(x)

        out_first_digit = x[:,:10]
        out_second_digit = x[:,10:]

        return out_first_digit, out_second_digit

def main():
    X_train, y_train, X_test, y_test = get_data(path_to_data_dir, use_mini_dataset)

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]]
    X_train = X_train[:dev_split_index]
    y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [[y_train[0][i] for i in permutation], [y_train[1][i] for i in permutation]]

    # Split dataset into batches
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    # Load model
    input_dimension = img_rows * img_cols
    model = CNN(input_dimension) # TODO add proper layers to CNN class above

    # Train
    train_model(train_batches, dev_batches, model)

    ## Evaluate the model on test data
    loss, acc = run_epoch(test_batches, model.eval(), None)
    print('Test loss1: {:.6f}  accuracy1: {:.6f}  loss2: {:.6f}   accuracy2: {:.6f}'.format(loss[0], acc[0], loss[1], acc[1]))

if __name__ == '__main__':
    # Specify seed for deterministic behavior, then shuffle. Do not change seed for official submissions to edx
    np.random.seed(12321)  # for reproducibility
    torch.manual_seed(12321)  # for reproducibility
    main()


  'y': torch.tensor([y_data[0][i:i + batch_size],


-------------
Epoch 1:


100%|██████████| 562/562 [00:31<00:00, 17.72it/s]


Train | loss1: 0.821761  accuracy1: 0.731067 | loss2: 0.868580  accuracy2: 0.705377


100%|██████████| 62/62 [00:01<00:00, 45.00it/s]


Valid | loss1: 0.213145  accuracy1: 0.937248 | loss2: 0.250191  accuracy2: 0.922631
-------------
Epoch 2:


100%|██████████| 562/562 [00:32<00:00, 17.51it/s]


Train | loss1: 0.277179  accuracy1: 0.912589 | loss2: 0.326753  accuracy2: 0.890986


100%|██████████| 62/62 [00:01<00:00, 44.41it/s]


Valid | loss1: 0.140533  accuracy1: 0.955897 | loss2: 0.161719  accuracy2: 0.947077
-------------
Epoch 3:


100%|██████████| 562/562 [00:36<00:00, 15.45it/s]


Train | loss1: 0.202401  accuracy1: 0.938000 | loss2: 0.244852  accuracy2: 0.919790


100%|██████████| 62/62 [00:01<00:00, 37.42it/s]


Valid | loss1: 0.124026  accuracy1: 0.960938 | loss2: 0.130536  accuracy2: 0.959173
-------------
Epoch 4:


100%|██████████| 562/562 [00:41<00:00, 13.65it/s]


Train | loss1: 0.162752  accuracy1: 0.951485 | loss2: 0.197930  accuracy2: 0.935109


100%|██████████| 62/62 [00:01<00:00, 34.75it/s]


Valid | loss1: 0.096914  accuracy1: 0.970766 | loss2: 0.104911  accuracy2: 0.968246
-------------
Epoch 5:


100%|██████████| 562/562 [00:42<00:00, 13.36it/s]


Train | loss1: 0.135745  accuracy1: 0.958574 | loss2: 0.168264  accuracy2: 0.945257


100%|██████████| 62/62 [00:01<00:00, 34.26it/s]


Valid | loss1: 0.090426  accuracy1: 0.973538 | loss2: 0.088420  accuracy2: 0.970766
-------------
Epoch 6:


100%|██████████| 562/562 [00:38<00:00, 14.54it/s]


Train | loss1: 0.115893  accuracy1: 0.964135 | loss2: 0.148435  accuracy2: 0.952597


100%|██████████| 62/62 [00:02<00:00, 26.66it/s]


Valid | loss1: 0.087679  accuracy1: 0.973034 | loss2: 0.081316  accuracy2: 0.971018
-------------
Epoch 7:


100%|██████████| 562/562 [00:38<00:00, 14.67it/s]


Train | loss1: 0.103232  accuracy1: 0.967554 | loss2: 0.129388  accuracy2: 0.957156


100%|██████████| 62/62 [00:01<00:00, 38.94it/s]


Valid | loss1: 0.078792  accuracy1: 0.976310 | loss2: 0.077865  accuracy2: 0.972278
-------------
Epoch 8:


100%|██████████| 562/562 [00:36<00:00, 15.20it/s]


Train | loss1: 0.090754  accuracy1: 0.972031 | loss2: 0.119306  accuracy2: 0.960659


100%|██████████| 62/62 [00:01<00:00, 42.42it/s]


Valid | loss1: 0.075274  accuracy1: 0.977319 | loss2: 0.072465  accuracy2: 0.975806
-------------
Epoch 9:


100%|██████████| 562/562 [00:34<00:00, 16.16it/s]


Train | loss1: 0.082897  accuracy1: 0.973532 | loss2: 0.106868  accuracy2: 0.964885


100%|██████████| 62/62 [00:01<00:00, 40.28it/s]


Valid | loss1: 0.075246  accuracy1: 0.977571 | loss2: 0.073204  accuracy2: 0.975050
-------------
Epoch 10:


100%|██████████| 562/562 [00:33<00:00, 16.83it/s]


Train | loss1: 0.077798  accuracy1: 0.976201 | loss2: 0.097446  accuracy2: 0.966859


100%|██████████| 62/62 [00:01<00:00, 43.65it/s]


Valid | loss1: 0.070118  accuracy1: 0.980595 | loss2: 0.066994  accuracy2: 0.979335
-------------
Epoch 11:


100%|██████████| 562/562 [00:33<00:00, 16.92it/s]


Train | loss1: 0.069524  accuracy1: 0.977730 | loss2: 0.089316  accuracy2: 0.969473


100%|██████████| 62/62 [00:01<00:00, 43.52it/s]


Valid | loss1: 0.072023  accuracy1: 0.978327 | loss2: 0.069930  accuracy2: 0.978831
-------------
Epoch 12:


100%|██████████| 562/562 [00:33<00:00, 16.99it/s]


Train | loss1: 0.063524  accuracy1: 0.979899 | loss2: 0.082349  accuracy2: 0.972781


100%|██████████| 62/62 [00:01<00:00, 41.82it/s]


Valid | loss1: 0.072556  accuracy1: 0.980091 | loss2: 0.068335  accuracy2: 0.976058
-------------
Epoch 13:


100%|██████████| 562/562 [00:33<00:00, 16.86it/s]


Train | loss1: 0.059833  accuracy1: 0.980232 | loss2: 0.075553  accuracy2: 0.975395


100%|██████████| 62/62 [00:01<00:00, 42.51it/s]


Valid | loss1: 0.071178  accuracy1: 0.979587 | loss2: 0.057537  accuracy2: 0.980343
-------------
Epoch 14:


100%|██████████| 562/562 [00:33<00:00, 16.90it/s]


Train | loss1: 0.054093  accuracy1: 0.983152 | loss2: 0.070396  accuracy2: 0.976173


100%|██████████| 62/62 [00:01<00:00, 43.32it/s]


Valid | loss1: 0.073383  accuracy1: 0.978831 | loss2: 0.063471  accuracy2: 0.978831
-------------
Epoch 15:


100%|██████████| 562/562 [00:33<00:00, 16.95it/s]


Train | loss1: 0.051979  accuracy1: 0.983485 | loss2: 0.067457  accuracy2: 0.977258


100%|██████████| 62/62 [00:01<00:00, 43.86it/s]


Valid | loss1: 0.073695  accuracy1: 0.979335 | loss2: 0.059446  accuracy2: 0.980595
-------------
Epoch 16:


100%|██████████| 562/562 [00:33<00:00, 16.98it/s]


Train | loss1: 0.047138  accuracy1: 0.985126 | loss2: 0.061043  accuracy2: 0.978620


100%|██████████| 62/62 [00:01<00:00, 43.75it/s]


Valid | loss1: 0.075594  accuracy1: 0.978075 | loss2: 0.060993  accuracy2: 0.979839
-------------
Epoch 17:


100%|██████████| 562/562 [00:33<00:00, 16.72it/s]


Train | loss1: 0.043262  accuracy1: 0.985626 | loss2: 0.060172  accuracy2: 0.979315


100%|██████████| 62/62 [00:01<00:00, 42.75it/s]


Valid | loss1: 0.065323  accuracy1: 0.981351 | loss2: 0.059420  accuracy2: 0.980847
-------------
Epoch 18:


100%|██████████| 562/562 [00:33<00:00, 16.67it/s]


Train | loss1: 0.042116  accuracy1: 0.986071 | loss2: 0.055030  accuracy2: 0.980900


100%|██████████| 62/62 [00:01<00:00, 44.44it/s]


Valid | loss1: 0.066362  accuracy1: 0.981855 | loss2: 0.059717  accuracy2: 0.981099
-------------
Epoch 19:


100%|██████████| 562/562 [00:33<00:00, 16.63it/s]


Train | loss1: 0.038711  accuracy1: 0.987600 | loss2: 0.051651  accuracy2: 0.982318


100%|██████████| 62/62 [00:01<00:00, 42.38it/s]


Valid | loss1: 0.071354  accuracy1: 0.980091 | loss2: 0.057783  accuracy2: 0.979587
-------------
Epoch 20:


100%|██████████| 562/562 [00:33<00:00, 16.58it/s]


Train | loss1: 0.036706  accuracy1: 0.987934 | loss2: 0.051406  accuracy2: 0.982345


100%|██████████| 62/62 [00:01<00:00, 43.24it/s]


Valid | loss1: 0.068575  accuracy1: 0.982863 | loss2: 0.057086  accuracy2: 0.980343
-------------
Epoch 21:


100%|██████████| 562/562 [00:33<00:00, 16.55it/s]


Train | loss1: 0.034477  accuracy1: 0.988740 | loss2: 0.044964  accuracy2: 0.983819


100%|██████████| 62/62 [00:01<00:00, 42.88it/s]


Valid | loss1: 0.081492  accuracy1: 0.979839 | loss2: 0.060810  accuracy2: 0.979839
-------------
Epoch 22:


100%|██████████| 562/562 [00:33<00:00, 16.66it/s]


Train | loss1: 0.032713  accuracy1: 0.989518 | loss2: 0.042870  accuracy2: 0.984514


100%|██████████| 62/62 [00:01<00:00, 43.17it/s]


Valid | loss1: 0.071866  accuracy1: 0.982107 | loss2: 0.053185  accuracy2: 0.982107
-------------
Epoch 23:


100%|██████████| 562/562 [00:34<00:00, 16.46it/s]


Train | loss1: 0.029622  accuracy1: 0.990186 | loss2: 0.040322  accuracy2: 0.985571


100%|██████████| 62/62 [00:01<00:00, 44.21it/s]


Valid | loss1: 0.069615  accuracy1: 0.984123 | loss2: 0.054695  accuracy2: 0.980091
-------------
Epoch 24:


100%|██████████| 562/562 [00:33<00:00, 16.67it/s]


Train | loss1: 0.028448  accuracy1: 0.990770 | loss2: 0.036762  accuracy2: 0.986405


100%|██████████| 62/62 [00:01<00:00, 44.11it/s]


Valid | loss1: 0.075392  accuracy1: 0.982107 | loss2: 0.058017  accuracy2: 0.982611
-------------
Epoch 25:


100%|██████████| 562/562 [00:34<00:00, 16.49it/s]


Train | loss1: 0.026269  accuracy1: 0.990964 | loss2: 0.041088  accuracy2: 0.985487


100%|██████████| 62/62 [00:01<00:00, 43.75it/s]


Valid | loss1: 0.079950  accuracy1: 0.982359 | loss2: 0.050753  accuracy2: 0.982611
-------------
Epoch 26:


100%|██████████| 562/562 [00:34<00:00, 16.44it/s]


Train | loss1: 0.026327  accuracy1: 0.990742 | loss2: 0.035131  accuracy2: 0.987266


100%|██████████| 62/62 [00:01<00:00, 42.40it/s]


Valid | loss1: 0.073666  accuracy1: 0.983619 | loss2: 0.050447  accuracy2: 0.983367
-------------
Epoch 27:


100%|██████████| 562/562 [00:34<00:00, 16.30it/s]


Train | loss1: 0.024586  accuracy1: 0.992021 | loss2: 0.030691  accuracy2: 0.989324


100%|██████████| 62/62 [00:01<00:00, 41.61it/s]


Valid | loss1: 0.077667  accuracy1: 0.983367 | loss2: 0.052397  accuracy2: 0.984123
-------------
Epoch 28:


100%|██████████| 562/562 [00:35<00:00, 15.85it/s]


Train | loss1: 0.024190  accuracy1: 0.991854 | loss2: 0.031082  accuracy2: 0.989101


100%|██████████| 62/62 [00:01<00:00, 41.61it/s]


Valid | loss1: 0.074259  accuracy1: 0.982611 | loss2: 0.048608  accuracy2: 0.984123
-------------
Epoch 29:


100%|██████████| 562/562 [00:34<00:00, 16.19it/s]


Train | loss1: 0.022324  accuracy1: 0.992048 | loss2: 0.030676  accuracy2: 0.988907


100%|██████████| 62/62 [00:01<00:00, 41.96it/s]


Valid | loss1: 0.074200  accuracy1: 0.984879 | loss2: 0.056889  accuracy2: 0.982359
-------------
Epoch 30:


100%|██████████| 562/562 [00:34<00:00, 16.34it/s]


Train | loss1: 0.022990  accuracy1: 0.991937 | loss2: 0.029823  accuracy2: 0.989268


100%|██████████| 62/62 [00:01<00:00, 42.33it/s]


Valid | loss1: 0.075581  accuracy1: 0.983619 | loss2: 0.055958  accuracy2: 0.983115


100%|██████████| 62/62 [00:01<00:00, 42.70it/s]

Test loss1: 0.073952  accuracy1: 0.981351  loss2: 0.086194   accuracy2: 0.975050





In [16]:
class ModifiedCNN(nn.Module):
    def __init__(self, input_dimension):
        super(ModifiedCNN, self).__init__()

        self.conv1 = nn.Conv2d(1, 32, (3, 3))
        self.bn1 = nn.BatchNorm2d(32)

        self.conv2 = nn.Conv2d(32, 64, (3, 3))
        self.bn2 = nn.BatchNorm2d(64)

        self.flatten = Flatten()

        self.fc1 = nn.Linear(2880, 128)
        self.dropout1 = nn.Dropout(0.5)

        self.fc2 = nn.Linear(128, 64)
        self.dropout2 = nn.Dropout(0.5)

        self.fc3 = nn.Linear(64, 20)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.max_pool2d(x, (2, 2))

        x = F.relu(self.bn2(self.conv2(x)))
        x = F.max_pool2d(x, (2, 2))

        x = self.flatten(x)

        x = F.relu(self.fc1(x))
        x = self.dropout1(x)

        x = F.relu(self.fc2(x))
        x = self.dropout2(x)

        x = self.fc3(x)

        out_first_digit = x[:,:10]
        out_second_digit = x[:,10:]

        return out_first_digit, out_second_digit

def main():
    # Load the data
    X_train, y_train, X_test, y_test = get_data(path_to_data_dir, use_mini_dataset)

    # Split into train and dev
    dev_split_index = int(9 * len(X_train) / 10)
    X_dev = X_train[dev_split_index:]
    y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]]
    X_train = X_train[:dev_split_index]
    y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]]

    permutation = np.array([i for i in range(len(X_train))])
    np.random.shuffle(permutation)
    X_train = [X_train[i] for i in permutation]
    y_train = [[y_train[0][i] for i in permutation], [y_train[1][i] for i in permutation]]

    # Split dataset into batches
    train_batches = batchify_data(X_train, y_train, batch_size)
    dev_batches = batchify_data(X_dev, y_dev, batch_size)
    test_batches = batchify_data(X_test, y_test, batch_size)

    # Load the modified model
    model = ModifiedCNN(img_rows * img_cols)

    # Use Adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Adjust learning rate if necessary

    # Train the model
    train_model(train_batches, dev_batches, model)

    # Evaluate the model on test data
    loss, acc = run_epoch(test_batches, model.eval(), None)
    print('Test loss1: {:.6f}  accuracy1: {:.6f}  loss2: {:.6f}   accuracy2: {:.6f}'.format(loss[0], acc[0], loss[1], acc[1]))


In [17]:
if __name__ == '__main__':
    main()


  'y': torch.tensor([y_data[0][i:i + batch_size],


-------------
Epoch 1:


100%|██████████| 562/562 [00:39<00:00, 14.08it/s]


Train | loss1: 1.241333  accuracy1: 0.564530 | loss2: 1.290240  accuracy2: 0.536421


100%|██████████| 62/62 [00:01<00:00, 37.53it/s]


Valid | loss1: 0.283263  accuracy1: 0.942036 | loss2: 0.357643  accuracy2: 0.902974
-------------
Epoch 2:


100%|██████████| 562/562 [00:46<00:00, 12.00it/s]


Train | loss1: 0.630281  accuracy1: 0.792844 | loss2: 0.716535  accuracy2: 0.751279


100%|██████████| 62/62 [00:02<00:00, 22.45it/s]


Valid | loss1: 0.132607  accuracy1: 0.964214 | loss2: 0.220109  accuracy2: 0.925655
-------------
Epoch 3:


100%|██████████| 562/562 [00:53<00:00, 10.57it/s]


Train | loss1: 0.472122  accuracy1: 0.847031 | loss2: 0.541769  accuracy2: 0.817643


100%|██████████| 62/62 [00:01<00:00, 32.20it/s]


Valid | loss1: 0.131917  accuracy1: 0.963206 | loss2: 0.187460  accuracy2: 0.948085
-------------
Epoch 4:


100%|██████████| 562/562 [00:47<00:00, 11.74it/s]


Train | loss1: 0.417061  accuracy1: 0.864741 | loss2: 0.468995  accuracy2: 0.842666


100%|██████████| 62/62 [00:01<00:00, 34.16it/s]


Valid | loss1: 0.104210  accuracy1: 0.970262 | loss2: 0.116579  accuracy2: 0.964970
-------------
Epoch 5:


100%|██████████| 562/562 [00:46<00:00, 12.13it/s]


Train | loss1: 0.359774  accuracy1: 0.881673 | loss2: 0.418837  accuracy2: 0.860320


100%|██████████| 62/62 [00:01<00:00, 36.76it/s]


Valid | loss1: 0.097808  accuracy1: 0.972530 | loss2: 0.112082  accuracy2: 0.966986
-------------
Epoch 6:


100%|██████████| 562/562 [00:42<00:00, 13.21it/s]


Train | loss1: 0.331044  accuracy1: 0.892377 | loss2: 0.378947  accuracy2: 0.872470


100%|██████████| 62/62 [00:01<00:00, 37.66it/s]


Valid | loss1: 0.110347  accuracy1: 0.971018 | loss2: 0.102389  accuracy2: 0.967490
-------------
Epoch 7:


100%|██████████| 562/562 [00:41<00:00, 13.49it/s]


Train | loss1: 0.303019  accuracy1: 0.903192 | loss2: 0.342501  accuracy2: 0.884870


100%|██████████| 62/62 [00:01<00:00, 37.49it/s]


Valid | loss1: 0.081353  accuracy1: 0.978075 | loss2: 0.092702  accuracy2: 0.972026
-------------
Epoch 8:


100%|██████████| 562/562 [00:42<00:00, 13.36it/s]


Train | loss1: 0.278186  accuracy1: 0.910587 | loss2: 0.320227  accuracy2: 0.893989


100%|██████████| 62/62 [00:01<00:00, 38.73it/s]


Valid | loss1: 0.075271  accuracy1: 0.979587 | loss2: 0.094211  accuracy2: 0.972782
-------------
Epoch 9:


100%|██████████| 562/562 [00:41<00:00, 13.49it/s]


Train | loss1: 0.260021  accuracy1: 0.915063 | loss2: 0.290509  accuracy2: 0.900411


100%|██████████| 62/62 [00:01<00:00, 36.51it/s]


Valid | loss1: 0.089141  accuracy1: 0.976815 | loss2: 0.097636  accuracy2: 0.970262
-------------
Epoch 10:


100%|██████████| 562/562 [00:42<00:00, 13.37it/s]


Train | loss1: 0.239426  accuracy1: 0.921680 | loss2: 0.270260  accuracy2: 0.906500


100%|██████████| 62/62 [00:01<00:00, 38.05it/s]


Valid | loss1: 0.073748  accuracy1: 0.981603 | loss2: 0.084649  accuracy2: 0.973286
-------------
Epoch 11:


100%|██████████| 562/562 [00:41<00:00, 13.43it/s]


Train | loss1: 0.221769  accuracy1: 0.928575 | loss2: 0.254015  accuracy2: 0.911115


100%|██████████| 62/62 [00:01<00:00, 37.68it/s]


Valid | loss1: 0.079522  accuracy1: 0.980091 | loss2: 0.079017  accuracy2: 0.978075
-------------
Epoch 12:


100%|██████████| 562/562 [00:41<00:00, 13.42it/s]


Train | loss1: 0.207294  accuracy1: 0.931884 | loss2: 0.238316  accuracy2: 0.917232


100%|██████████| 62/62 [00:01<00:00, 37.94it/s]


Valid | loss1: 0.082568  accuracy1: 0.977823 | loss2: 0.072548  accuracy2: 0.979335
-------------
Epoch 13:


100%|██████████| 562/562 [00:42<00:00, 13.33it/s]


Train | loss1: 0.191614  accuracy1: 0.938612 | loss2: 0.223063  accuracy2: 0.922209


100%|██████████| 62/62 [00:01<00:00, 37.91it/s]


Valid | loss1: 0.065614  accuracy1: 0.982107 | loss2: 0.069658  accuracy2: 0.980091
-------------
Epoch 14:


100%|██████████| 562/562 [00:42<00:00, 13.37it/s]


Train | loss1: 0.181158  accuracy1: 0.940781 | loss2: 0.213059  accuracy2: 0.925684


100%|██████████| 62/62 [00:01<00:00, 35.76it/s]


Valid | loss1: 0.068596  accuracy1: 0.982611 | loss2: 0.065431  accuracy2: 0.982359
-------------
Epoch 15:


100%|██████████| 562/562 [00:42<00:00, 13.32it/s]


Train | loss1: 0.180175  accuracy1: 0.940419 | loss2: 0.200227  accuracy2: 0.931022


100%|██████████| 62/62 [00:01<00:00, 37.62it/s]


Valid | loss1: 0.066408  accuracy1: 0.983367 | loss2: 0.061693  accuracy2: 0.983115
-------------
Epoch 16:


100%|██████████| 562/562 [00:42<00:00, 13.26it/s]


Train | loss1: 0.169696  accuracy1: 0.943783 | loss2: 0.195270  accuracy2: 0.930827


100%|██████████| 62/62 [00:01<00:00, 37.80it/s]


Valid | loss1: 0.066919  accuracy1: 0.983871 | loss2: 0.066840  accuracy2: 0.981855
-------------
Epoch 17:


100%|██████████| 562/562 [00:42<00:00, 13.15it/s]


Train | loss1: 0.157478  accuracy1: 0.947370 | loss2: 0.186531  accuracy2: 0.934831


100%|██████████| 62/62 [00:01<00:00, 38.34it/s]


Valid | loss1: 0.072588  accuracy1: 0.984375 | loss2: 0.061137  accuracy2: 0.983115
-------------
Epoch 18:


100%|██████████| 562/562 [00:42<00:00, 13.19it/s]


Train | loss1: 0.153925  accuracy1: 0.947731 | loss2: 0.187742  accuracy2: 0.935109


100%|██████████| 62/62 [00:01<00:00, 37.56it/s]


Valid | loss1: 0.073087  accuracy1: 0.983619 | loss2: 0.066391  accuracy2: 0.981099
-------------
Epoch 19:


100%|██████████| 562/562 [00:42<00:00, 13.27it/s]


Train | loss1: 0.147145  accuracy1: 0.950039 | loss2: 0.179782  accuracy2: 0.935721


100%|██████████| 62/62 [00:01<00:00, 36.58it/s]


Valid | loss1: 0.068694  accuracy1: 0.985635 | loss2: 0.061901  accuracy2: 0.982863
-------------
Epoch 20:


100%|██████████| 562/562 [00:42<00:00, 13.30it/s]


Train | loss1: 0.140600  accuracy1: 0.953931 | loss2: 0.172266  accuracy2: 0.939752


100%|██████████| 62/62 [00:01<00:00, 36.64it/s]


Valid | loss1: 0.069987  accuracy1: 0.984375 | loss2: 0.059295  accuracy2: 0.984879
-------------
Epoch 21:


100%|██████████| 562/562 [00:42<00:00, 13.19it/s]


Train | loss1: 0.140443  accuracy1: 0.953097 | loss2: 0.160626  accuracy2: 0.942226


100%|██████████| 62/62 [00:01<00:00, 37.76it/s]


Valid | loss1: 0.073888  accuracy1: 0.983871 | loss2: 0.060814  accuracy2: 0.984375
-------------
Epoch 22:


100%|██████████| 562/562 [00:43<00:00, 13.05it/s]


Train | loss1: 0.135930  accuracy1: 0.954682 | loss2: 0.156117  accuracy2: 0.944061


100%|██████████| 62/62 [00:01<00:00, 36.26it/s]


Valid | loss1: 0.070600  accuracy1: 0.986139 | loss2: 0.064387  accuracy2: 0.983619
-------------
Epoch 23:


100%|██████████| 562/562 [00:42<00:00, 13.19it/s]


Train | loss1: 0.133319  accuracy1: 0.956100 | loss2: 0.154345  accuracy2: 0.945146


100%|██████████| 62/62 [00:01<00:00, 37.38it/s]


Valid | loss1: 0.066708  accuracy1: 0.985887 | loss2: 0.072817  accuracy2: 0.981855
-------------
Epoch 24:


100%|██████████| 562/562 [00:43<00:00, 12.96it/s]


Train | loss1: 0.127875  accuracy1: 0.955850 | loss2: 0.142592  accuracy2: 0.948009


100%|██████████| 62/62 [00:01<00:00, 37.76it/s]


Valid | loss1: 0.071355  accuracy1: 0.984123 | loss2: 0.066471  accuracy2: 0.982107
-------------
Epoch 25:


100%|██████████| 562/562 [00:43<00:00, 13.06it/s]


Train | loss1: 0.124030  accuracy1: 0.958880 | loss2: 0.144795  accuracy2: 0.947398


100%|██████████| 62/62 [00:01<00:00, 37.35it/s]


Valid | loss1: 0.068735  accuracy1: 0.985383 | loss2: 0.053494  accuracy2: 0.986391
-------------
Epoch 26:


100%|██████████| 562/562 [00:43<00:00, 12.88it/s]


Train | loss1: 0.120742  accuracy1: 0.960187 | loss2: 0.139710  accuracy2: 0.949288


100%|██████████| 62/62 [00:01<00:00, 36.35it/s]


Valid | loss1: 0.064421  accuracy1: 0.983871 | loss2: 0.056840  accuracy2: 0.985131
-------------
Epoch 27:


100%|██████████| 562/562 [00:43<00:00, 12.94it/s]


Train | loss1: 0.118031  accuracy1: 0.959825 | loss2: 0.138857  accuracy2: 0.951457


100%|██████████| 62/62 [00:01<00:00, 37.56it/s]


Valid | loss1: 0.077371  accuracy1: 0.984627 | loss2: 0.064628  accuracy2: 0.984879
-------------
Epoch 28:


100%|██████████| 562/562 [00:42<00:00, 13.13it/s]


Train | loss1: 0.113583  accuracy1: 0.961799 | loss2: 0.130712  accuracy2: 0.952152


100%|██████████| 62/62 [00:01<00:00, 37.41it/s]


Valid | loss1: 0.074155  accuracy1: 0.984375 | loss2: 0.064454  accuracy2: 0.984627
-------------
Epoch 29:


100%|██████████| 562/562 [00:43<00:00, 13.02it/s]


Train | loss1: 0.114393  accuracy1: 0.960854 | loss2: 0.130432  accuracy2: 0.953014


100%|██████████| 62/62 [00:01<00:00, 37.51it/s]


Valid | loss1: 0.069517  accuracy1: 0.986139 | loss2: 0.059857  accuracy2: 0.986391
-------------
Epoch 30:


100%|██████████| 562/562 [00:46<00:00, 11.97it/s]


Train | loss1: 0.119076  accuracy1: 0.958964 | loss2: 0.121931  accuracy2: 0.955655


100%|██████████| 62/62 [00:01<00:00, 34.03it/s]


Valid | loss1: 0.073040  accuracy1: 0.985383 | loss2: 0.065144  accuracy2: 0.985383


100%|██████████| 62/62 [00:01<00:00, 36.10it/s]


Test loss1: 0.095378  accuracy1: 0.982863  loss2: 0.093674   accuracy2: 0.981099
