In [30]:
# Imports
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from tqdm import tqdm

In [19]:
# Hyperparams
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Running on",device)
learning_rate = 0.001
batch_size = 64
num_epochs = 5

# Case specific params
sequence_length = 28
embedding_dim = 28
hidden_size = 256
num_classes = 10
num_layers = 2

Running on cuda


In [8]:
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=transforms.ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=transforms.ToTensor()
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:02<00:00, 13049227.18it/s]


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 206674.43it/s]


Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:01<00:00, 3803493.32it/s]


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 9752609.30it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw






In [9]:
def check_accuracy(y_pred,y):
    _,predpos=y_pred.max(1)
    num_samples=len(y)
    num_correct=(predpos==y).sum()
    return (num_correct/num_samples)*100

In [11]:
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

In [64]:
class BasicRNN(nn.Module):
    def __init__(self,embedding_dim,num_layers,hidden_size,num_classes):
        super(BasicRNN,self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(embedding_dim,self.hidden_size,self.num_layers,batch_first=True)
        self.fc = nn.Linear(sequence_length*self.hidden_size,num_classes) # Note : The fc layer takes hidden states from all time steps as input
    
    def forward(self,X):
        # X shape = [Batch_Size , 1 , 28 , 28] - as we are using gray scale image
        X = X.squeeze(1)
        # X shape = [Batch_Size , Sequence_Length , Embedding_Dim]
        X,hidden_state = self.rnn(X)
        # X shape = [Batch_Size , Sequence_Length , Hidden_Size] , hidden_state shape = [Num_Layers , Hidden_Size]
        X = X.reshape(X.shape[0],-1)
        # X shape = [Batch_Size , Sequence_Length * Hidden_Size]
        X = F.sigmoid(self.fc(X))
        # X shape = [Batch_Size , Num_Classes]
        return X

In [65]:
model = BasicRNN(embedding_dim,num_layers,hidden_size,num_classes)
print(model)

BasicRNN(
  (rnn): RNN(28, 256, num_layers=2, batch_first=True)
  (fc): Linear(in_features=7168, out_features=10, bias=True)
)


In [66]:
loss_fun = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(),learning_rate)

In [67]:
x,y = next(iter(train_dataloader))
print(x.shape,y.shape)

torch.Size([64, 1, 28, 28]) torch.Size([64])


In [68]:
def train_loop(model,dataloader,loss_fun,optimizer):
    size = len(dataloader.dataset)
    model.train()
    model.to(device)
    for epoch in range(num_epochs):
        losses = []
        accuracies = []
        loop = tqdm(enumerate(dataloader), total=len(dataloader), leave=True)
        for batch,(x,y) in loop:
            # put on cuda
            x = x.to(device)
            y = y.to(device)
    
            # forward pass
            y_pred = model(x)
            
            # calculate loss & accuracy
            loss = loss_fun(y_pred,y)
            losses.append(loss.detach().item())
            
            accuracy = check_accuracy(y_pred.detach(),y)
            accuracies.append(accuracy.item())
            
            # zero out prior gradients
            optimizer.zero_grad()
            
            # # backprop
            loss.backward()
            
            # update weights
            optimizer.step()
            
            # Update TQDM progress bar
            loop.set_description(f"Epoch [{epoch}/{num_epochs}] ")
            loop.set_postfix(loss=loss.detach().item(), accuracy=accuracy.item())

In [69]:
train_loop(model,train_dataloader,loss_fun,optim)

Epoch [0/5] : 100%|██████████| 938/938 [00:13<00:00, 72.15it/s, accuracy=68.8, loss=1.61]
Epoch [1/5] : 100%|██████████| 938/938 [00:12<00:00, 73.33it/s, accuracy=75, loss=1.57]  
Epoch [2/5] : 100%|██████████| 938/938 [00:12<00:00, 72.36it/s, accuracy=68.8, loss=1.57]
Epoch [3/5] : 100%|██████████| 938/938 [00:13<00:00, 72.14it/s, accuracy=75, loss=1.57]  
Epoch [4/5] : 100%|██████████| 938/938 [00:12<00:00, 73.07it/s, accuracy=71.9, loss=1.6] 


In [70]:
def test_loop(model,dataloader,loss_fun):
    model.eval()
    model.to(device)
    losses = []
    samples,correct = 0,0
    loop = tqdm(enumerate(dataloader), total=len(dataloader), leave=True)
    with torch.no_grad():
        for batch,(x,y) in loop:
            # put on cuda
            x = x.to(device)
            y = y.to(device)

            # forward pass
            y_pred = model(x)
            
            # caclulate test loss
            loss = loss_fun(y_pred,y)
            losses.append(loss.item())

            # accuracy over entire dataset
            _,predpos=y_pred.max(1)
            samples+=len(y)
            correct+=(predpos==y).sum().item()
            
            # Update TQDM progress bar
            loop.set_postfix(loss=loss.item())

    print("Final Accuracy = ",100 * (correct/samples))

In [71]:
test_loop(model,test_dataloader,loss_fun)

100%|██████████| 157/157 [00:01<00:00, 91.55it/s, loss=1.53]

Final Accuracy =  77.7





In [74]:
class BasicRNNwithGRU(nn.Module):
    def __init__(self,embedding_dim,num_layers,hidden_size,num_classes):
        super(BasicRNNwithGRU,self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(embedding_dim,self.hidden_size,self.num_layers,batch_first=True)
        self.fc = nn.Linear(sequence_length*self.hidden_size,num_classes) # Note : The fc layer takes hidden states from all time steps as input
    
    def forward(self,X):
        # X shape = [Batch_Size , 1 , 28 , 28] - as we are using gray scale image
        X = X.squeeze(1)
        # X shape = [Batch_Size , Sequence_Length , Embedding_Dim]
        X,hidden_state = self.gru(X)
        # X shape = [Batch_Size , Sequence_Length , Hidden_Size] , hidden_state shape = [Num_Layers , Hidden_Size]
        X = X.reshape(X.shape[0],-1)
        # X shape = [Batch_Size , Sequence_Length * Hidden_Size]
        X = F.sigmoid(self.fc(X))
        # X shape = [Batch_Size , Num_Classes]
        return X

In [75]:
model = BasicRNNwithGRU(embedding_dim,num_layers,hidden_size,num_classes)
print(model)

BasicRNNwithGRU(
  (gru): GRU(28, 256, num_layers=2, batch_first=True)
  (fc): Linear(in_features=7168, out_features=10, bias=True)
)


In [78]:
loss_fun = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(),learning_rate)

In [79]:
train_loop(model,train_dataloader,loss_fun,optim)

Epoch [0/5] : 100%|██████████| 938/938 [00:15<00:00, 62.47it/s, accuracy=53.1, loss=1.61]
Epoch [1/5] : 100%|██████████| 938/938 [00:14<00:00, 63.19it/s, accuracy=75, loss=1.57]  
Epoch [2/5] : 100%|██████████| 938/938 [00:14<00:00, 62.81it/s, accuracy=84.4, loss=1.55]
Epoch [3/5] : 100%|██████████| 938/938 [00:14<00:00, 63.70it/s, accuracy=75, loss=1.54]  
Epoch [4/5] : 100%|██████████| 938/938 [00:14<00:00, 62.77it/s, accuracy=81.2, loss=1.52]


In [80]:
test_loop(model,test_dataloader,loss_fun)

100%|██████████| 157/157 [00:01<00:00, 87.09it/s, loss=1.51]

Final Accuracy =  83.31





In [82]:
class BasicRNNwithLSTM(nn.Module):
    def __init__(self,embedding_dim,num_layers,hidden_size,num_classes):
        super(BasicRNNwithLSTM,self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(embedding_dim,self.hidden_size,self.num_layers,batch_first=True)
        self.fc = nn.Linear(sequence_length*self.hidden_size,num_classes) # Note : The fc layer takes hidden states from all time steps as input
    
    def forward(self,X):
        # X shape = [Batch_Size , 1 , 28 , 28] - as we are using gray scale image
        X = X.squeeze(1)
        # X shape = [Batch_Size , Sequence_Length , Embedding_Dim]
        X,(hidden_state,cell_state) = self.lstm(X)
        # X shape = [Batch_Size , Sequence_Length , Hidden_Size] , hidden_state shape = cell_state shape [Num_Layers , Hidden_Size]
        X = X.reshape(X.shape[0],-1)
        # X shape = [Batch_Size , Sequence_Length * Hidden_Size]
        X = F.sigmoid(self.fc(X))
        # X shape = [Batch_Size , Num_Classes]
        return X

In [83]:
model = BasicRNNwithLSTM(embedding_dim,num_layers,hidden_size,num_classes)
print(model)

BasicRNNwithLSTM(
  (lstm): LSTM(28, 256, num_layers=2, batch_first=True)
  (fc): Linear(in_features=7168, out_features=10, bias=True)
)


In [84]:
loss_fun = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(),learning_rate)

In [85]:
train_loop(model,train_dataloader,loss_fun,optim)

Epoch [0/5] : 100%|██████████| 938/938 [00:15<00:00, 61.60it/s, accuracy=46.9, loss=1.59]
Epoch [1/5] : 100%|██████████| 938/938 [00:14<00:00, 62.68it/s, accuracy=71.9, loss=1.57]
Epoch [2/5] : 100%|██████████| 938/938 [00:14<00:00, 62.60it/s, accuracy=75, loss=1.55]  
Epoch [3/5] : 100%|██████████| 938/938 [00:14<00:00, 63.58it/s, accuracy=81.2, loss=1.53]
Epoch [4/5] : 100%|██████████| 938/938 [00:14<00:00, 62.81it/s, accuracy=87.5, loss=1.52]


In [86]:
test_loop(model,test_dataloader,loss_fun)

100%|██████████| 157/157 [00:01<00:00, 81.96it/s, loss=1.55]

Final Accuracy =  86.06



