In [1]:
import torch
import torch.nn as nn
from pathlib import Path
import gzip
import numpy as np
# from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sys import stdout
from matplotlib import pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# torch.rand(3,3)
# torch.cuda.is_available()
# torch.cuda.device_count()
# torch.cuda.get_device_name(torch.cuda.current_device())

# Load data

In [3]:
def to_categorical(y, num_classes):
    """ 1-hot encodes a tensor """
    return np.eye(num_classes, dtype='uint8')[y]

# print(to_categorical(y=3, num_classes=4))

In [4]:
# load train data
data_dir = Path('C:/Amin/Workspace/Data/MNIST')

# load train data
f = gzip.open(data_dir/'train-images-idx3-ubyte.gz','r')
image_size = 28
num_images = 60000


f.read(16)
buf = f.read(image_size * image_size * num_images)
train_data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
train_data = train_data.reshape(num_images, image_size, image_size, 1)

# image = np.asarray(train_data[2]).squeeze()
# plt.imshow(image, cmap="Greys")
# plt.show()

# labels
f = gzip.open(data_dir/'train-labels-idx1-ubyte.gz','r')
f.read(8)

buf = f.read(num_images)
train_labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
# train_labels = to_categorical(y=train_labels, num_classes=10)
# print(train_labels, type(train_labels), np.shape(train_labels))

In [5]:
# load test data
data_dir = Path('C:/Amin/Workspace/Data/MNIST')

# load test data
f = gzip.open(data_dir/'t10k-images-idx3-ubyte.gz','r')
image_size = 28
num_images = 10000


f.read(16)
buf = f.read(image_size * image_size * num_images)
test_data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
test_data = test_data.reshape(num_images, image_size, image_size, 1)

# image = np.asarray(test_data[2]).squeeze()
# plt.imshow(image, cmap="Greys")
# plt.show()

# labels
f = gzip.open(data_dir/'t10k-labels-idx1-ubyte.gz','r')
f.read(8)

buf = f.read(num_images)
test_labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
# test_labels = to_categorical(y=test_labels, num_classes=10)
# print(test_labels)

# define model

In [6]:
    
class MyConvNet(nn.Module):
    # define model elements
    def __init__(self, in_channels):
        super().__init__() # MyConvNet, self
        
        #first hidden layer
        self.hidden_1 = nn.Conv2d(in_channels=in_channels, out_channels=32, kernel_size=3, stride=1, padding=1)
        nn.init.kaiming_uniform_(self.hidden_1.weight, nonlinearity='relu')
        self.Norm_1 = nn.BatchNorm2d(num_features=32)
        self.activation_1 = nn.ReLU()
        self.pool_1 = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        
        # second hidden layer
        self.hidden_2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1)
        nn.init.kaiming_uniform_(self.hidden_2.weight, nonlinearity='relu')
        self.Norm_2 = nn.BatchNorm2d(num_features=32)
        self.activation_2 = nn.ReLU()
        self.pool_2 = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        
        # third hidden layer 
        self.hidden_3 = nn.Linear(in_features=7*7*32, out_features=100)
        nn.init.kaiming_uniform_(self.hidden_3.weight, nonlinearity='relu')
        self.Norm_3 = nn.BatchNorm1d(num_features=100)
        self.activation_3 = nn.ReLU()
        
        # output layer
        self.hidden_4 = nn.Linear(in_features=100, out_features=10)
        nn.init.xavier_uniform_(self.hidden_4.weight)
        self.activation_4 = nn.Softmax(dim=1)

        
    def forward(self, X):
        # input to first hidden layer
        out_hidden_1 = self.hidden_1(X)
        out_Norm_1 = self.Norm_1(out_hidden_1)
        out_activation_1 = self.activation_1(out_Norm_1)
        out_pool_1 = self.pool_1(out_activation_1)
        
        # second hidden layer
        out_hidden_2 = self.hidden_2(out_pool_1)
        out_Norm_2 = self.Norm_2(out_hidden_2)
        out_activation_2 = self.activation_2(out_Norm_2)
        out_pool_2 = self.pool_2(out_activation_2)
        
        # flatten
        out_flatten = out_pool_2.view(out_pool_2.size(0), -1)
        
        
        
        # third hidden layer
        out_hidden_3 = self.hidden_3(out_flatten)
        out_Norm_3 = self.Norm_3(out_hidden_3)
        out_activation_3 = self.activation_3(out_Norm_3)
        
        # output layer
        out_hidden_4 = self.hidden_4(out_activation_3)
        out_activation_4 = self.activation_4(out_hidden_4)
        
        return out_activation_4
    

In [7]:
model = MyConvNet(in_channels=1)

# Train model

In [8]:
# define the optimization
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# enumerate epochs
for epoch in range(10):
    print("\nEpoch ", epoch, ": ")
    
    train_data_record_indices = range(0, train_data.shape[0])
    train_data_record_indices_shuffled = shuffle(train_data_record_indices)
    
    batch_size = 64
    num_batches = int(train_data.shape[0]/batch_size)
    chunk_indices = np.array_split(train_data_record_indices_shuffled, num_batches)
            
            
    # enumerate mini batches
    for i in range(num_batches):  # range(num_batches)  
#         print("batch: ", i, "out of ", num_batches)
        a = int((i*10/num_batches))+1
        b = "=" * a
        stdout.write("\r"+ "["  + b + "] " + str(int(i*100/num_batches)+1) + " %")     
        del b
        stdout.flush()
        
        inputs, targets = train_data[chunk_indices[i]], train_labels[chunk_indices[i]]
        inputs = np.reshape(inputs, (inputs.shape[0], 1, 28, 28))
        inputs = torch.Tensor(inputs)
        targets = torch.Tensor(targets)
        targets = targets.type(torch.LongTensor)
        # clear the gradients
        optimizer.zero_grad()
        # compute the model output
        yhat = model(inputs)
        # calculate loss
        loss = criterion(yhat, targets)
        # credit assignment
        loss.backward()
        # update model weights
        optimizer.step()


Epoch  0 : 
Epoch  1 : 
Epoch  2 : 
Epoch  3 : 
Epoch  4 : 
Epoch  5 : 
Epoch  6 : 
Epoch  7 : 
Epoch  8 : 
Epoch  9 : 

# evaluate

In [9]:
inputs, targets = test_data, test_labels
inputs = np.reshape(inputs, (inputs.shape[0], 1, 28, 28))
inputs = torch.Tensor(inputs)
targets = torch.Tensor(targets)
targets = targets.type(torch.LongTensor)


# evaluate the model on the test set
yhat = model(inputs)
# retrieve numpy array
yhat = yhat.detach().numpy()
actual = targets.numpy()
# convert to class labels
yhat = np.argmax(yhat, axis=1)
# reshape for stacking
actual = actual.reshape((len(actual), 1))
yhat = yhat.reshape((len(yhat), 1))


acc = accuracy_score(actual, yhat)
print("accuracy_score: ", acc)

accuracy_score:  0.9905
