Reading classics [Deep Learning Models](https://nbviewer.jupyter.org/github/rasbt/deeplearning-models/tree/master/pytorch_ipynb/)

## Code Modules & Functions

In [1]:
import numpy as np,pandas as pd,pylab as pl
import h5py,torch
from torchvision.datasets import MNIST as tmnist
from torchvision import transforms
from torch.utils.data import DataLoader as tdl
from torch.utils.data import Dataset as tds
import torch.nn.functional as tnnf
from sklearn.datasets import make_classification
from IPython.core.magic import register_line_magic
dev=torch.device("cuda:0" if torch.cuda.is_available() 
                 else "cpu")

In [2]:
def model_acc(model,data_loader,num_features):
    correct_preds,num_examples=0,0    
    for features,targets in data_loader:
        features=features.view(-1,num_features).to(dev)
        targets=targets.to(dev)
        logits,probs=model(features)
        _,pred_labels=torch.max(probs,1)
        num_examples+=targets.size(0)
        correct_preds+=(pred_labels==targets).sum()        
    return correct_preds.float()/num_examples*100

In [3]:
@register_line_magic
def print_acc(t):
    if t=='test':
        print('Test accuracy: %.4f%%'%\
        (model_acc(model,test_loader,num_features)))
    if t=='train':
        print('Train accuracy: %.4f%%'%\
        (model_acc(model,train_loader,num_features)))

In [4]:
@register_line_magic
def print_acc2(t):
    if t=='test':
        print('Test accuracy: %.4f%%'%\
        (model_acc(model,test_loader2,num_features2)))
    if t=='train':
        print('Train accuracy: %.4f%%'%\
        (model_acc(model,train_loader2,num_features2)))

In [5]:
@register_line_magic
def train_run(epochs):
    epochs=int(epochs)
    for epoch in range(epochs):
        for batch_ids,(features,targets) in enumerate(train_loader):        
            features=features.view(-1,num_features).to(dev)
            targets=targets.to(dev)
            logits,probs=model(features)
            cost=tnnf.cross_entropy(logits,targets)
            optimizer.zero_grad(); cost.backward()
            optimizer.step()
            if not batch_ids%300:
                print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                       %(epoch+1,epochs,batch_ids, 
                         len(train)//batch_size,cost))           
        with torch.set_grad_enabled(False):
            print('Epoch: %03d/%03d train accuracy: %.2f%%'%\
                  (epoch+1,epochs,model_acc(model,train_loader,
                                            num_features)))

In [6]:
@register_line_magic
def train_run2(epochs):
    epochs=int(epochs)
    for epoch in range(epochs):
        for batch_ids,(features,targets) in enumerate(train_loader2):        
            features=features.view(-1,num_features2).to(dev)
            targets=targets.to(dev)
            logits,probs=model(features)
            cost=tnnf.cross_entropy(logits,targets.long())
            optimizer.zero_grad(); cost.backward()
            optimizer.step()
            if not batch_ids%300:
                print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                       %(epoch+1,epochs,batch_ids, 
                         len(train2)//batch_size2,cost))           
        with torch.set_grad_enabled(False):
            print('Epoch: %03d/%03d train accuracy: %.2f%%'%\
                  (epoch+1,epochs,model_acc(model,train_loader2,
                                            num_features2)))

## Data

In [7]:
random_seed=1; batch_size=64
train=tmnist(root='data',train=True,download=True,
            transform=transforms.ToTensor())
test=tmnist(root='data',train=False, 
            transform=transforms.ToTensor())
train_loader=tdl(dataset=train,shuffle=True, 
                 batch_size=batch_size)
test_loader=tdl(dataset=test,shuffle=False, 
                batch_size=batch_size)
for images,labels in train_loader:  
    print('Image dimensions: %s'%str(images.shape))
    print('Label dimensions: %s'%str(labels.shape))
    break

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw
Processing...



Done!
Image dimensions: torch.Size([64, 1, 28, 28])
Label dimensions: torch.Size([64])




In [8]:
fpath='../input/classification-of-handwritten-letters/'
f='LetterColorImages_123.h5'
f=h5py.File(fpath+f,'r')
keys=list(f.keys()); print(keys)
X=np.array(f[keys[1]],dtype='float32')/255
y=np.array(f[keys[2]],dtype='int32')-1
N=len(y); n=int(.2*N); batch_size=16
shuffle_ids=np.arange(N)
np.random.RandomState(23).shuffle(shuffle_ids)
X,y=X[shuffle_ids],y[shuffle_ids]
X_test,X_train=X[:n],X[n:]
y_test,y_train=y[:n],y[n:]
X_train.shape,y_train.shape

['backgrounds', 'images', 'labels']


((11352, 32, 32, 3), (11352,))

In [9]:
random_seed=1; batch_size2=64
class TData(tds):
    def __init__(self,X,y):   
        self.X=torch.tensor(X,dtype=torch.float32)
        self.y=torch.tensor(y,dtype=torch.int32)
    def __getitem__(self,index):
        train_img,train_lbl=self.X[index],self.y[index]
        return train_img,train_lbl
    def __len__(self):
        return self.y.shape[0]
train2=TData(X_train,y_train)
test2=TData(X_test,y_test)
train_loader2=tdl(dataset=train2,batch_size=batch_size2,shuffle=True)
test_loader2=tdl(dataset=test2,batch_size=batch_size2,shuffle=False)
for images,labels in train_loader2:  
    print('Image dimensions: %s'%str(images.shape))
    print('Label dimensions: %s'%str(labels.shape))
    break

Image dimensions: torch.Size([64, 32, 32, 3])
Label dimensions: torch.Size([64])


## MLP

In [10]:
num_features=784; num_classes=10
hidden1=512; hidden2=256; hidden3=128
class MLP(torch.nn.Module):
    def __init__(self,num_features,num_classes):
        super(MLP,self).__init__()
        self.linear1=torch.nn.Linear(num_features,hidden1)
        self.linear1.weight.detach().normal_(0.,.1)
        self.linear1.bias.detach().zero_()
        self.linear2=torch.nn.Linear(hidden1,hidden2)
        self.linear2.weight.detach().normal_(0.,.1)
        self.linear2.bias.detach().zero_()
        self.linear3=torch.nn.Linear(hidden2,hidden3)
        self.linear3.weight.detach().normal_(0.,.1)
        self.linear3.bias.detach().zero_()
        self.linear_out=torch.nn.Linear(hidden3,num_classes)
        self.linear_out.weight.detach().normal_(0.,.1)
        self.linear_out.bias.detach().zero_()        
    def forward(self,x):
        y=self.linear1(x); y=tnnf.relu(y)
        y=self.linear2(y); y=tnnf.relu(y)
        y=self.linear3(y); y=tnnf.relu(y)
        logits=self.linear_out(y)
        probs=tnnf.log_softmax(logits,dim=1)
        return logits,probs   
torch.manual_seed(random_seed)
model=MLP(num_features=num_features,
          num_classes=num_classes)
model=model.to(dev); learning_rate=.01
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate) 

In [11]:
%train_run 20

Epoch: 001/020 | Batch 000/3750 | Cost: 3.0801
Epoch: 001/020 | Batch 300/3750 | Cost: 0.5278
Epoch: 001/020 | Batch 600/3750 | Cost: 0.3894
Epoch: 001/020 | Batch 900/3750 | Cost: 0.2452
Epoch: 001/020 train accuracy: 91.56%
Epoch: 002/020 | Batch 000/3750 | Cost: 0.4965
Epoch: 002/020 | Batch 300/3750 | Cost: 0.1878
Epoch: 002/020 | Batch 600/3750 | Cost: 0.1800
Epoch: 002/020 | Batch 900/3750 | Cost: 0.2359
Epoch: 002/020 train accuracy: 93.44%
Epoch: 003/020 | Batch 000/3750 | Cost: 0.1883
Epoch: 003/020 | Batch 300/3750 | Cost: 0.3404
Epoch: 003/020 | Batch 600/3750 | Cost: 0.3519
Epoch: 003/020 | Batch 900/3750 | Cost: 0.1873
Epoch: 003/020 train accuracy: 94.70%
Epoch: 004/020 | Batch 000/3750 | Cost: 0.0820
Epoch: 004/020 | Batch 300/3750 | Cost: 0.1872
Epoch: 004/020 | Batch 600/3750 | Cost: 0.2102
Epoch: 004/020 | Batch 900/3750 | Cost: 0.1166
Epoch: 004/020 train accuracy: 95.37%
Epoch: 005/020 | Batch 000/3750 | Cost: 0.1772
Epoch: 005/020 | Batch 300/3750 | Cost: 0.0971
Ep

In [12]:
%print_acc train
%print_acc test

Train accuracy: 99.0717%
Test accuracy: 97.1700%


In [13]:
num_features2=3072; num_classes2=33
hidden1=1024; hidden2=256; hidden3=256
class MLP2(torch.nn.Module):
    def __init__(self,num_features,num_classes):
        super(MLP2,self).__init__()
        self.linear1=torch.nn.Linear(num_features,hidden1)
        self.linear1.weight.detach().normal_(0.,.1)
        self.linear1.bias.detach().zero_()
        self.linear2=torch.nn.Linear(hidden1,hidden2)
        self.linear2.weight.detach().normal_(0.,.1)
        self.linear2.bias.detach().zero_()
        self.linear3=torch.nn.Linear(hidden2,hidden3)
        self.linear3.weight.detach().normal_(0.,.1)
        self.linear3.bias.detach().zero_()
        self.linear_out=torch.nn.Linear(hidden3,num_classes)
        self.linear_out.weight.detach().normal_(0.,.1)
        self.linear_out.bias.detach().zero_()        
    def forward(self,x):
        y=self.linear1(x); y=tnnf.relu(y)
        y=self.linear2(y); y=tnnf.relu(y)
        y=self.linear3(y); y=tnnf.relu(y)
        logits=self.linear_out(y)
        probs=tnnf.log_softmax(logits,dim=1)
        return logits,probs   
torch.manual_seed(random_seed)
model=MLP2(num_features=num_features2,
           num_classes=num_classes2)
model=model.to(dev); learning_rate=.001
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate)

In [14]:
%train_run2 30

Epoch: 001/030 | Batch 000/177 | Cost: 18.8860
Epoch: 001/030 train accuracy: 4.49%
Epoch: 002/030 | Batch 000/177 | Cost: 3.6310
Epoch: 002/030 train accuracy: 5.16%
Epoch: 003/030 | Batch 000/177 | Cost: 3.6160
Epoch: 003/030 train accuracy: 5.30%
Epoch: 004/030 | Batch 000/177 | Cost: 3.8010
Epoch: 004/030 train accuracy: 7.14%
Epoch: 005/030 | Batch 000/177 | Cost: 3.5244
Epoch: 005/030 train accuracy: 4.70%
Epoch: 006/030 | Batch 000/177 | Cost: 3.7635
Epoch: 006/030 train accuracy: 6.37%
Epoch: 007/030 | Batch 000/177 | Cost: 3.6686
Epoch: 007/030 train accuracy: 10.59%
Epoch: 008/030 | Batch 000/177 | Cost: 3.1434
Epoch: 008/030 train accuracy: 5.98%
Epoch: 009/030 | Batch 000/177 | Cost: 3.6903
Epoch: 009/030 train accuracy: 11.55%
Epoch: 010/030 | Batch 000/177 | Cost: 2.9752
Epoch: 010/030 train accuracy: 13.68%
Epoch: 011/030 | Batch 000/177 | Cost: 3.0289
Epoch: 011/030 train accuracy: 12.86%
Epoch: 012/030 | Batch 000/177 | Cost: 3.1778
Epoch: 012/030 train accuracy: 14.05

In [15]:
%print_acc2 train
%print_acc2 test

Train accuracy: 22.2428%
Test accuracy: 20.8950%


## MLP with Dropouts

In [16]:
num_features=784; num_classes=10
hidden1=1024; hidden2=256; hidden3=64
dropout_prob=.1
class MLPD(torch.nn.Module):
    def __init__(self,num_features,num_classes):
        super(MLPD,self).__init__()
        self.linear1=torch.nn.Linear(num_features,hidden1)
        self.linear1.weight.detach().normal_(0.,.1)
        self.linear1.bias.detach().zero_()
        self.linear2=torch.nn.Linear(hidden1,hidden2)
        self.linear2.weight.detach().normal_(0.,.1)
        self.linear2.bias.detach().zero_()
        self.linear3=torch.nn.Linear(hidden2,hidden3)
        self.linear3.weight.detach().normal_(0.,.1)
        self.linear3.bias.detach().zero_()
        self.linear_out=torch.nn.Linear(hidden3,num_classes)
        self.linear_out.weight.detach().normal_(0.,.1)
        self.linear_out.bias.detach().zero_()        
    def forward(self,x):
        y=self.linear1(x); y=tnnf.relu(y)
        y=tnnf.dropout(y,p=dropout_prob,
                       training=self.training)
        y=self.linear2(y); y=tnnf.relu(y)
        y=tnnf.dropout(y,p=dropout_prob,
                       training=self.training)
        y=self.linear3(y); y=tnnf.relu(y)
        logits=self.linear_out(y)
        probs=tnnf.log_softmax(logits,dim=1)
        return logits,probs   
torch.manual_seed(random_seed)
model=MLPD(num_features=num_features,
           num_classes=num_classes)
model=model.to(dev); learning_rate=.01
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate) 

In [17]:
%train_run 20

Epoch: 001/020 | Batch 000/3750 | Cost: 2.6523
Epoch: 001/020 | Batch 300/3750 | Cost: 0.3701
Epoch: 001/020 | Batch 600/3750 | Cost: 0.5918
Epoch: 001/020 | Batch 900/3750 | Cost: 0.2439
Epoch: 001/020 train accuracy: 88.25%
Epoch: 002/020 | Batch 000/3750 | Cost: 0.6572
Epoch: 002/020 | Batch 300/3750 | Cost: 0.2765
Epoch: 002/020 | Batch 600/3750 | Cost: 0.2195
Epoch: 002/020 | Batch 900/3750 | Cost: 0.3743
Epoch: 002/020 train accuracy: 90.86%
Epoch: 003/020 | Batch 000/3750 | Cost: 0.3815
Epoch: 003/020 | Batch 300/3750 | Cost: 0.2312
Epoch: 003/020 | Batch 600/3750 | Cost: 0.4057
Epoch: 003/020 | Batch 900/3750 | Cost: 0.3104
Epoch: 003/020 train accuracy: 92.54%
Epoch: 004/020 | Batch 000/3750 | Cost: 0.1880
Epoch: 004/020 | Batch 300/3750 | Cost: 0.2484
Epoch: 004/020 | Batch 600/3750 | Cost: 0.1055
Epoch: 004/020 | Batch 900/3750 | Cost: 0.5453
Epoch: 004/020 train accuracy: 93.25%
Epoch: 005/020 | Batch 000/3750 | Cost: 0.2724
Epoch: 005/020 | Batch 300/3750 | Cost: 0.2178
Ep

In [18]:
%print_acc train
%print_acc test

Train accuracy: 97.4367%
Test accuracy: 96.4400%


In [19]:
num_features2=3072; num_classes2=33
hidden1=hidden2=1024; hidden3=hidden4=256
dropout_prob2=.5
class MLPD2(torch.nn.Module):
    def __init__(self,num_features,num_classes):
        super(MLPD2,self).__init__()
        self.linear1=torch.nn.Linear(num_features,hidden1)
        self.linear1.weight.detach().normal_(0.,.1)
        self.linear1.bias.detach().zero_()
        self.linear2=torch.nn.Linear(hidden1,hidden2)
        self.linear2.weight.detach().normal_(0.,.1)
        self.linear2.bias.detach().zero_()
        self.linear3=torch.nn.Linear(hidden2,hidden3)
        self.linear3.weight.detach().normal_(0.,.1)
        self.linear3.bias.detach().zero_()
        self.linear4=torch.nn.Linear(hidden3,hidden4)
        self.linear4.weight.detach().normal_(0.,.1)
        self.linear4.bias.detach().zero_()
        self.linear_out=torch.nn.Linear(hidden4,num_classes)
        self.linear_out.weight.detach().normal_(0.,.1)
        self.linear_out.bias.detach().zero_()        
    def forward(self,x):
        y=self.linear1(x); y=tnnf.relu(y)
        y=self.linear2(y); y=tnnf.relu(y)
        y=tnnf.dropout(y,p=dropout_prob2,
                       training=self.training)
        y=self.linear3(y); y=tnnf.relu(y)
        y=self.linear4(y); y=tnnf.relu(y)
        logits=self.linear_out(y)
        probs=tnnf.log_softmax(logits,dim=1)
        return logits,probs   
torch.manual_seed(random_seed)
model=MLPD2(num_features=num_features2,
            num_classes=num_classes2)
model=model.to(dev); learning_rate=.005
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate)

In [20]:
%train_run2 30

Epoch: 001/030 | Batch 000/177 | Cost: 60.9865
Epoch: 001/030 train accuracy: 3.49%
Epoch: 002/030 | Batch 000/177 | Cost: 3.6780
Epoch: 002/030 train accuracy: 3.77%
Epoch: 003/030 | Batch 000/177 | Cost: 3.4730
Epoch: 003/030 train accuracy: 4.39%
Epoch: 004/030 | Batch 000/177 | Cost: 3.5862
Epoch: 004/030 train accuracy: 5.02%
Epoch: 005/030 | Batch 000/177 | Cost: 3.4611
Epoch: 005/030 train accuracy: 5.26%
Epoch: 006/030 | Batch 000/177 | Cost: 3.5591
Epoch: 006/030 train accuracy: 5.44%
Epoch: 007/030 | Batch 000/177 | Cost: 3.4554
Epoch: 007/030 train accuracy: 6.25%
Epoch: 008/030 | Batch 000/177 | Cost: 3.4636
Epoch: 008/030 train accuracy: 5.87%
Epoch: 009/030 | Batch 000/177 | Cost: 3.1915
Epoch: 009/030 train accuracy: 7.32%
Epoch: 010/030 | Batch 000/177 | Cost: 3.3853
Epoch: 010/030 train accuracy: 7.56%
Epoch: 011/030 | Batch 000/177 | Cost: 3.3996
Epoch: 011/030 train accuracy: 7.75%
Epoch: 012/030 | Batch 000/177 | Cost: 3.4983
Epoch: 012/030 train accuracy: 8.54%
Epo

In [21]:
%print_acc2 train
%print_acc2 test

Train accuracy: 14.6670%
Test accuracy: 13.2840%
