In [None]:
%pylab inline
gray()

# Install Torch

Follow the instructions **appropriate for your system**
https://pytorch.org/get-started/locally/ 

On _your laptops_ which probably DO NOT have CUDA
```bash
conda install -y pytorch-cpu torchvision-cpu -c pytorch
```

**The next example is for a GPU system, do NOT do both(!)**

On _my_ system (GTX1080 GPU, NVIDIA Cuda v9.0)
```bash
conda install -y pytorch torchvision -c pytorch
```


**Restart the notbook** so that the python interpreter can process some of the changes made during installation of torchvision. 

In [None]:
import torch

In [None]:
import torchvision

# An Example

In [None]:
from torchvision.datasets import MNIST

In [None]:
training_data = MNIST('./mndist', train=True,  download=True)

In [None]:
training_data

In [None]:
??torch.utils.data.Dataset

In [None]:
training_data[0]

In [None]:
im, target = training_data[0]

In [None]:
imshow(im);
title(int(target));
xticks([0, im.size[1]-1]);
yticks([0, im.size[0]-1]);

In [None]:
from torchvision import transforms

In [None]:
??transforms.ToTensor

In [None]:
??transforms.Compose

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor()])

In [None]:
training_data = MNIST('./mndist', train=True,  download=True, transform=transform)

In [None]:
data, target = training_data[0]
#data
print(type(data))

In [None]:
train_loader = torch.utils.data.DataLoader(training_data, batch_size=4, shuffle=True, num_workers=2)

In [None]:
!nvidia-smi

In [None]:
train_iter = iter(train_loader)
images, targets = train_iter.next()

In [None]:
images.shape

In [None]:
targets.shape

In [None]:
def show_batch(images, targets):
    ncols = ceil(sqrt(len(images)))
    nrows = ceil(len(images)/ncols)
    for i in range(len(images)):
        subplot(nrows, ncols, i+1)
        imshow(images[i].numpy().squeeze())
        xticks([]); yticks([]); 
        if targets is not None:
            xlabel(str(targets[i].numpy()))

In [None]:
show_batch(images, targets)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

In [None]:
x = images
x.shape

In [None]:
x = x.reshape(x.shape[0], -1)
x.shape

In [None]:
fc = nn.Linear(784, 10)
fc

In [None]:
W, b = fc.parameters()
W.shape, b.shape

In [None]:
imshow(W.detach().numpy())
xlim(0,50)

In [None]:
a = fc(x)
a.shape

In [None]:
y = a.argmax(1)
y.shape

In [None]:
y

In [None]:
def show_batch(images, targets, predictions=None):
    ncols = ceil(sqrt(len(images)))
    nrows = ceil(len(images)/ncols)
    for i in range(len(images)):
        subplot(nrows, ncols, i+1)
        imshow(images[i].numpy().squeeze())
        xticks([]); yticks([]); 
        if predictions is not None:
            xlabel("P:{}, T:{}".format( predictions[i].numpy(), targets[i].numpy()))
        elif targets is not None:
            xlabel("T:{}".format(targets[i].numpy()))


In [None]:
show_batch(images, targets, y)

In [None]:
class MyNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(784, 10)
        
    def forward(self, x):
        x = x.reshape(x.shape[0], -1)
        a = self.fc(x)
        return a  # Will learn to treat 'a' as the natural parameters of a multinomial distr. 

In [None]:
net = MyNet()

In [None]:
net

In [None]:
net.state_dict()

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # Will discuss lr and momentum later

While the next cell is running, open up a terminal and type `nvidia-smi` to make sure you are using 100% of your GPU. This is not so important on such a small dataset / model, but for larger problems the (linear) time sivings add up. 

In [None]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader):
        # get the inputs
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)               # Predict
        loss = criterion(outputs, labels)   # Grade / Evaluate
        loss.backward()                     # Determine how each parameter effected the loss
        optimizer.step()                    # Update parameters 

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')


In [None]:
images, targets = iter(train_loader).next()

In [None]:
y = net(images).argmax(1)

In [None]:
show_batch(images, targets, y)
savefig('examples.jpg')

**ALERT: This was ALL on TRAINING data -- we NEED TEST!!!***

In [None]:
test_data = MNIST('./mndist', train=False,  download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=4, num_workers=2)

In [None]:
with torch.no_grad():
    cm = np.zeros((10,10), dtype=np.int64)
    for x, y in iter(test_loader):
        p = net(x).argmax(1).numpy()
        for pred, true in zip(p, y):
            cm[int(pred), int(true)] += 1

In [None]:
import pandas as pd
df_cm = pd.DataFrame(cm, index = arange(10), columns = arange(10))
df_cm

Install $\texttt{seaborn}$ to plot confusion matrices (and many other things) easily.   

In [None]:
import seaborn as sn

In [None]:
figure(figsize = (10,7))
sn.heatmap(df_cm, fmt='d', annot=True);
ylabel('pred')
xlabel('true');
savefig('cm.jpg')

In [None]:
acc = np.trace(cm)/ np.sum(cm)
acc

In [None]:
def report_from_cm(cm, labels, beta=1):
    df = pd.DataFrame(index=list(labels)+['avg/total'], columns=['precision', 'recall', 'fmeasure','support'])
    for i, label in enumerate(labels):
        tp = cm[i,i]
        p = cm[i,:].sum()
        support = cm[:,i].sum()
        fp = p-tp
        fn = support-tp
        precision = tp / (tp + fp)
        recall = tp / (tp + fn)
        fmeasure = (1+beta**2)*precision*recall/((beta**2)*precision + recall)
        df['precision'][label] = precision
        df['recall'][label] = recall
        df['fmeasure'][label] = fmeasure
        df['support'][label] = support
    df['precision']['avg/total'] = df['precision'][:'avg/total'].mean()
    df['recall']['avg/total'] = df['recall'][:'avg/total'].mean()
    df['fmeasure']['avg/total'] = df['fmeasure'][:'avg/total'].mean()
    df['support']['avg/total'] = df['support'][:'avg/total'].sum()
    return df

In [None]:
report_from_cm(cm, range(10))

In [None]:
net.state_dict()

In [None]:
# Convention: Extention 'pth' seems to be from PyTorcH. Files are compressed with tar

In [None]:
torch.save(dict(epoch=epoch, 
                params=net.state_dict(),
                optimizer=optimizer.state_dict()), 
           'mnist-linear.pth.tar')

In [None]:
state = torch.load('mnist-linear.pth.tar')
state

In [None]:
# To resume later
net.load_state_dict(state['params'])

In [None]:
torch.save??

I typically write a class for doing the training. 

* I calculate test-loss every epoch
* I produce a plot of the running train /test losses
* I look for indications that there was a problem (NaN's, dead layers) after each epoch
* I save the model to a file (checkpoint.pth.tar) every epoch
    * If diskspace is not an issue, I save all of the checkpoints. This lets me go back and visualize how the net changed during learning
* I save the best model to a file (best.pth.tar) if it changes at each epoch
* I save the entire history to a log file
* I use progressbars from https://pypi.org/project/tqdm/ for each epoch


A training _curriculum_ will train multiple times, on increasingly complex versions of the problem. 
* Start with small images [8x8] and then increase the size
* Start with one layer, then add more
* Start with a pretrained model and freeze only the output, or input, layers initially. 

Next:
- Look at https://github.com/pytorch/tnt
- Look at https://github.com/ncullen93/torchsample