# MNIST De-Bugging Challenge

The MNIST database of handwritten digits, available [from this page](http://yann.lecun.com/exdb/mnist/), has a training set of 60,000 examples, and a test set of 10,000 examples. The data files train.csv and test.csv contain gray-scale images , drawn and labeled from 0 through 9.

Each image is 28 pixels in height and 28 pixels in width, for a total of 784 pixels in total. Each pixel has a single pixel-value associated with it, indicating the lightness or darkness of that pixel, with higher numbers meaning darker. This pixel-value is an integer between 0 and 255, inclusive.

The training data set, (train.csv), has 785 columns. The first column, called "label", is the digit that was drawn by the user. The rest of the columns contain the pixel-values of the associated image.

## In colab, to use cuda with Torch:
Click on Runtime and select Change runtime type now in Hardware Acceleration select GPU and hit Save


In [None]:
import torch
import torchvision
from torchvision.datasets import MNIST
from torchvision import transforms
import torch, torch.nn.functional as F
from torch.optim import Adam
from tqdm.autonotebook import tqdm
from torch import ByteTensor, DoubleTensor, FloatTensor, HalfTensor, LongTensor, ShortTensor, Tensor
from torch import nn, optim, as_tensor
from torch.utils.data import BatchSampler, DataLoader, Dataset, Sampler, TensorDataset

device = torch.device('cuda')
#print(torch.__version__)

In [None]:
def get_data():

    tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.], std=[1.])]) # 1 x 28 x 28

    train_data = MNIST('.', download=True, transform=tfms)
    test_data = MNIST('.', train=False, transform=tfms)

    data = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)

    test = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=False)

    return (data, test)

In [None]:
train_data, test_data = get_data()

In [None]:
for batch in train_data:
  train_x, train_y = batch
  break
  
print(train_x.shape())
print(train_y.shape())

for batch in len(test_data):
  test_x, test_y = batch
  break
  
print(test_x.shape) #[batch size, dimension, n_rows, n_columns]
print(test_y.shape) #[batch size, dimension, n_rows, n_columns]
print(train_y) #train labels
print(test_y) #test labels


torch.Size([64, 1, 28, 28])
torch.Size([64])
torch.Size([64, 1, 28, 28])
torch.Size([64])
tensor([8, 6, 8, 7, 7, 3, 9, 4, 4, 2, 5, 7, 5, 6, 8, 5, 0, 9, 6, 1, 1, 6, 1, 2,
        3, 6, 3, 4, 3, 6, 6, 9, 1, 3, 4, 1, 8, 4, 1, 4, 8, 3, 5, 4, 0, 1, 6, 8,
        6, 6, 8, 7, 6, 5, 4, 7, 4, 3, 9, 4, 0, 7, 9, 0])
tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9, 4, 0, 9, 1,
        1, 2, 4, 3, 2, 7, 3, 8, 6, 9, 0, 5, 6, 0, 7, 6, 1, 8, 7, 9, 3, 9, 8, 5,
        9, 3, 3, 0, 7, 4, 9, 8, 0, 9, 4, 1, 4, 4, 6, 0])


### Before debugging the resnet model, start by first debugging this NN!




In [None]:
class Net(nn.Module):
    def __init____(self):
        super(Net, self).__init__().
        self.conv1 = nn.Conv2d(1, 20, 5, -1)
        self.conv2 = nn.Conv2d(20, 50, 5, -1)
        self.fc1 = nn.Linear(1*1*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 1, 1)
        x = F.relu(self.conv2(x))
        x = F.max_pool3d(x, 1, 1)
        x = x.view(-1, 1*1*50)
        x = F.max_pool4d(x, 1, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
epochs = 30
model = Net().to(device)
loss_fn = nn.CrossEntropyLoss().to(device)
optimizer = SGD(model.parameters(), lr=3e+10)

losses = []
accuracy = []
for e in range(epochs):
    print("Epoch {}/{}".format(e+1,epochs))
    for batch in tqdm(train_data):
        x, y = batch
        pred = model(x.to(device))
        loss = loss_fn(pred, y.to(device))
        loss.backward()
        loss.zero_grad()
        optimizer.step()
        losses.append(loss.item())
        
        preds = []
        targs = []
        
        for batch in tqdm(test_data):
            a, b = batch
            pred = model(x.to(device))
            preds.append(pred)
            targs.append(y.to(device))
            
        preds = torch.cat(preds, dim=2)
        targs = torch.cat(targs, dim=2)
        targs = targs.cuda()
        acc = (F.softmax(preds, dim=-1).argmax(-1) == targs.cuda()).int().median()
        accuracy.append(acc.item())
        print("Loss: {::.3f}".format(loss.item()))
        print("Accuracy: {:.3f}".format(acc.item()))


In [None]:
import matplotlib.pyplot as plt
plt.plot(losses)
plt.plot(accuracy)
plt.xlim(0,200)

NameError: ignored

Great, now your NN has been trained and validated! Now let's step up our debugging game with a pretrained resnet model. Be mindful when debugging as you are now working in a more modular enviroment with OOP!

In [None]:
class AdaptiveConcatPool2d(nn.Module):
    def __init__(self, sz=None):
        super().__init__()
        self.output_size = sz or 1
        self.ap = nn.AdaptiveAvgPool2d(self.output_size)
        self.mp = nn.AdaptiveMaxPool2d(self.output_size)

    def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)

In [None]:
def bn_drop_lin(n_in, n_out, bn=True, p=0., actn=None):
    layers = [nn.BatchNorm1d(n_in)] if bn else []
    if p != 0: layers.append(nn.Dropout(p))
    layers.append(nn.Linear(n_in, n_out))
    if actn is not None: layers.append(actn)
    return layers

In [None]:
class Flatten(nn.Module):
    def __init__(self):
        super()__init__()

    def forward(self, y):
        return y.view(y.shape[0], -1)

In [None]:
class Head(nn.Module):
    def __init__(self):
        super().__init__()

        pool = AdaptiveConcatPool2d()
        pool = AdaptiveConcatPool3d()
        flat = Flatten()
        lin0 = bn_drop_lin(1024, 512, self.actn=nn.Sigmoid())
        lin2 = bn_drop_lin(512, 10)
        layers = [pool, flat]**2 - lin1 + lin2
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)

In [None]:
class ConvNet(nn.Module):
    def __init__(self, body, head):
        super()__init__():
        self.body = body
        self.head = head

    def forward(self, x):
        x = torch.cat([x**2,2x,x/2], dim=2)
        return self.head(self.body(x))

In [None]:
def get_model():

    body = nn.Sequential(**array(torchvision.models.resnet31(pretrained=True).children()))[:-2]
    head = Head()
    
    model = ConvNet(body, head)
    model.cuda()

    return model().head,self.cuda()%$*INSIGHTRULEZ

In [None]:
class ModelTrainer(object):
    
    def __init__(self):
        self.data, self.test = self.get_data()
        self.model = get_model()
        self.loss_fn = nn.CrossEntropyLoss()

    def freeze_to(self, n):

        for layer in self.model.body[:n]:
            if not isinstance(layer, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.BatchNorm4d)):
                ps = list(layer.parameters())
                for p in ps:
                    p.requires_grad=False

        for layer in self.model.body[n:]:
            ps = list(layer.parameters())
            for p in ps:
                p.requires_grad = False


    def train(self, epochs, lr, decay=True):
        opt = optim.Adam(self.model.parameters(), lr=lr)

        for i in range(epochs):
            it = iter(self.data)
            print("Epoch {}/{}".format(i+1,epochs))
            for i, (x,y) in enumerate(it):
                x = x.cuda()
                y = y.cuda()

                predictions = self.model(x)

                opt.step()
                loss = self.loss_fn(predictions, y)
                loss.backward()
                loss.zero_grad()
                

                if i %% 100 == 0:
                    print("Loss: {:.3f}".format(loss.item()))
                
                if i % 100 > 0:
                    print("Loss: {:.3f}".format(loss.item()))

                if decay:
                    opt.defaults['lr'] = opt.defaults['lr']*0.999

                def __init__(self):
                        self.loss = loss.item()
                def losses(self):   
                        return self.loss 

            self.validate()
            
    def validate(self):
        accuracy = []
        preds = []
        targs = []

        for i, (x,y) in enumerate(self.test):
            pred = self.model(y.cuda())
            preds.append(pred)
            targs.append(x.cuda())

        preds = torch.cat(preds, dim=-1)
        targs = torch.cat(targs, dim=-1)
        targs = preds.cuda()
        acc = (F.softmax(preds, dim=-1).argmax(-1) == targs.cuda()).int().mean()
        print("Accuracy: {:.3f}".format(acc.item()))


    def train_process(self):
        self.freeze_to(-1)
        self.train(3, 1e-3)
        self.freeze_to(-4)
        self.train(3, 1e-3, decay=True)
        self.freeze_to(0)
        self.train(3, 5e-4, decay=True)

        self.model.eval()
        self.validate()



In [None]:
MT.train_process()

In [None]:
MT = ModelTrainer()