In [12]:
import torch
import torch.nn as nn
from torch.nn.functional import tanh

class leaky_tanh(nn.Module):
    def __init__(self, alpha, gamma, rho):
        """
        Initialize learnable parameters:
        alpha = controls influence of past information
        gamma = controls amplitude from nonlinear term
        """
        super(leaky_tanh,self).__init__()
        self.a = alpha
        self.g = gamma
    
    def forward(self, x):
        return (1-self.a)*x + self.a*self.g*tanh(x)

In [2]:
import numpy as np
import torchvision
import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import optim

In [3]:
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

trainset = datasets.MNIST('./Data/mnist', download=True, train=True, transform=transform)
valset = datasets.MNIST('./Data/mnist', download=True, train=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True)

In [4]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

print(images.shape)
print(labels.shape)

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [13]:
input_size = 784
hidden_sizes = [128, 64]
output_size = 10


l1 = 128
l2 = 64

a1 = torch.rand((l1,1),requires_grad=True)
a2 = torch.rand((l2,1),requires_grad=True)

g1 = 2.*torch.rand((l1,1),requires_grad=True)
g2 = 2.*torch.rand((l2,1),requires_grad=True)

r1 = 2.*torch.rand((l1,1),requires_grad=True)
r2 = 2.*torch.rand((l2,1),requires_grad=True)

#Lky1 = leaky_tanh(a1,g1,r2)   #128
#Lky2 = leaky_tanh(a2,g2,r2)   #64                

model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      leaky_tanh(a1,g1,r2),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      leaky_tanh(a2,g2,r2),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.LogSoftmax(dim=1)).cuda()
print(model)

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): leaky_tanh()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): leaky_tanh()
  (4): Linear(in_features=64, out_features=10, bias=True)
  (5): LogSoftmax(dim=1)
)


In [14]:
criterion = nn.NLLLoss()
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

#logps = model(images) #log probabilities
#loss = criterion(logps, labels) #calculate the NLL loss

In [15]:
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
time0 = time()
epochs = 15
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
        labels = labels.cuda()
    
        # Training pass
        optimizer.zero_grad()
        
        output = model(images.cuda())
        loss = criterion(output, labels)
        
        #This is where the model learns by backpropagating
        loss.backward()
        
        #And optimizes its weights here
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(trainloader)))
print("\nTraining Time (in minutes) =",(time()-time0)/60)

RuntimeError: The size of tensor a (128) must match the size of tensor b (64) at non-singleton dimension 0

In [22]:
correct_count, all_count = 0, 0
for images,labels in valloader:
    for i in range(len(labels)):
        img = images[i].view(1, 784)
        img = img.cuda()
        labels = labels.cuda()
        
        with torch.no_grad():
            logps = model(img)


        ps = torch.exp(logps)
        ps = ps.detach().cpu()
        probab = list(ps.numpy()[0])
        pred_label = probab.index(max(probab))
        true_label = labels.detach().cpu().numpy()[i]
        if(true_label == pred_label):
            correct_count += 1
        all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

Number Of Images Tested = 10000

Model Accuracy = 0.9755
