# Contents
<p>
    <div class = "lev1 toc-item">
        <a href = "#Import-Data" data-toc-modified-id="Import-Data-1">
            <span class="toc-item-num">1&nbsp;&nbsp;</span>
            Import Data
        </a>
    </div>
    <div class = "lev1 toc-item">
        <a href = "#Build-Model" data-toc-modified-id="Build-Model-2">
            <span class="toc-item-num">2&nbsp;&nbsp;</span>
            Build Model
        </a>
    </div>
    <div class = "lev1 toc-item">
        <a href = "#Other-Functions" data-toc-modified-id="Other-Functions-3">
            <span class="toc-item-num">3&nbsp;&nbsp;</span>
            Other Functions
        </a>
    </div>
    <div class = "lev1 toc-item">
        <a href = "#Criterion-and-Optimizer" data-toc-modified-id="Criterion-and-Optimizer-4">
            <span class="toc-item-num">4&nbsp;&nbsp;</span>
            Criterion and Optimizer
        </a>
    </div>
    <div class = "lev1 toc-item">
        <a href = "#Training-the-Model" data-toc-modified-id="Training-the-Model-5">
            <span class="toc-item-num">5&nbsp;&nbsp;</span>
            Training the Model
        </a>
    </div>

In [1]:
import numpy as np
import torch
from torch import nn
import torchvision
import torch.nn.functional as F
from torchvision import transforms
from tqdm import trange
import tqdm as tq
import pandas as pd
import sys

# Import Data

In [2]:
normalize = transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
tfms = transforms.Compose([
    transforms.ToTensor()
])

In [3]:
data = torchvision.datasets.MNIST(root='../data/', train=True, transform=tfms, download=False)
test = torchvision.datasets.MNIST(root='../data/', train=False, transform=tfms, download=False)

In [4]:
data_l = torch.utils.data.DataLoader(data, batch_size=64, num_workers=8, shuffle=True)
test_l = torch.utils.data.DataLoader(test, batch_size=32, num_workers=8)

# Build Model

In [5]:
def conv3(in_c, out_c, k=3, p=1, s=1):
    return nn.Conv2d(in_channels=in_c, out_channels=out_c, kernel_size=(k,k), padding=(p,p), stride=s, bias=False)

In [6]:
class ConvNet(nn.Module):
    def __init__(self, c):
        super().__init__()
        self.conv1 = conv3(1, 16)
        self.conv2 = conv3(16, 32)
        self.conv3 = conv3(32, 64)
        self.max = nn.MaxPool2d(kernel_size=2)
        self.relu = nn.ReLU()
        self.ada  = nn.AdaptiveAvgPool2d(8)
        self.lin = nn.Linear(32*8*8, c)
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight)
        
    def forward(self, x):
        out = self.max(self.relu(self.conv1(x)))
        out = self.max(self.relu(self.conv2(out)))
        out = self.ada(out)
        out = out.view(out.shape[0], -1)
        return self.lin(out)

In [7]:
model = ConvNet(10)

In [8]:
model = model.cuda()

# Other Functions

In [9]:
def V(x, rg): return torch.autograd.Variable(x, requires_grad=rg)

In [10]:
def accuracy (preds,targ):
    preds = torch.max(preds, dim=1)[1]
    return (preds == targ).float().mean()

In [12]:
def close_tqdm():
    inst = getattr(tq.tqdm, '_instances', None)
    if not inst: return None
    else:
        try:
            for i in range(len(inst)): inst.pop().close()
        except Exception:
            pass

def tqdm(*args, **kwargs):
    close_tqdm()
    return tq.tqdm(*args, file=sys.stdout, **kwargs)

# Criterion and Optimizer

In [11]:
crit = nn.CrossEntropyLoss()
opt = torch.optim.SGD(model.parameters(), lr=1e-2)

# Training the Model

In [15]:
def train(loader, val, model, crit, opt, epochs):
    acc = 0.
    avg_l = 0.
    avg_t = 0.
    for epoch in range(1,epochs+1):
        tot = len(loader)
        t = tqdm(iter(loader), total=tot)
        i=0
        for (*inp, targ) in t:
            i += 1
            inp = inp[0]
            if torch.cuda.is_available():
                inp = inp.cuda()
            
            inp = V(inp, False)
            targ = V(targ, False)
            
            opt.zero_grad()
            
            out = model(inp)
            out = out.cpu()
            
            loss = crit(out, targ)
            loss.backward()
            opt.step()
            
            avg_l = (avg_l*i + loss.data)/(i+1)
            t.set_description('Loss = {l:.3f}; '
                              'Accuracy: {a:.3f} '.format(l=avg_l.tolist(),
                                                           a=acc
                                                          )
                             )
            t.update(1)
            if i%100== 0:
                tot = len(val.dataset)
                correct = 0
                for _, (inp, targ) in enumerate(val):
                    inp = V(inp, False).cuda()
                    targ = V(targ, False)
                    out = model(inp)
                    out = out.cpu()#.view(targ.shape)
                    pred = torch.max(out.data, dim=1)[1]
                    correct += (pred == targ).sum().float()
                    correct = correct.tolist()
                acc = correct*100.0/tot

In [16]:
train(data_l, test_l, model, crit, opt, 2)

Loss = 0.101; Accuracy: 97.020 : 100%|██████████| 938/938 [00:11<00:00, 84.13it/s] 
Loss = 0.087; Accuracy: 97.390 : 100%|██████████| 938/938 [00:11<00:00, 71.39it/s] 
