In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets as datasets
from torchvision import transforms as transforms
import numpy as np
from tqdm import tqdm

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
device

'cuda'

In [4]:
#hyperparameters

learning_rate = 0.001
batch_size = 64
num_epochs = 2
num_classes = 10
input_size = 784
sequence_length=28

In [5]:
train_data = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw
Processing...
Done!





  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [8]:
class ANN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(ANN, self).__init__()
        self.linear1 = nn.Linear(input_size, 1024)
        self.linear2 = nn.Linear(1024, 512)
        self.linear3 = nn.Linear(512, num_classes)
    
    def forward(self,x):
        x = F.relu(self.linear1(x))
        #print(x.shape)
        x = F.relu(self.linear2(x))
        #print(x.shape)
        x = self.linear3(x)
        #print(x.shape)
        return x
    
class RNN(nn.Module):
    def __init__(self,input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size = input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size*sequence_length, num_classes)
    
    def forward(self,x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        out, _ = self.rnn(x,h0)
        out = out.reshape(out.shape[0], -1)
        out = self.linear(out)
        return out

In [9]:
model = ANN(input_size, num_classes).to(device)
#model = RNN(input_size = 28, hidden_size=256, num_layers = 2, num_classes=10)

In [None]:
#random_data = torch.rand(32 ,784)

In [None]:
#output = model(random_data)

In [10]:
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [11]:
#training
                
num_samples = 0 
total_corrects = 0
total_loss = 0.0
total_acc = 0

model = ANN(input_size, num_classes).to(device)

training_dict = {'train':train_loader, "valid":test_loader}

for epoch in range(num_epochs):


    for phase in ["train", "valid"]:
        if phase == "train":
            model.train()
        else:
            model.eval()

        num_samples = 0 
        total_corrects = 0
        total_loss =0.0
        total_acc = 0
        
        loop = tqdm(enumerate(training_dict[phase]),total=len(training_dict[phase]))
        

        for btch, (inputs, outputs) in loop:
            
            #print(inputs.size())
            inputs = inputs.to(device)
            outputs = outputs.to(device)
            
            optimizer.zero_grad()
            
            with torch.set_grad_enabled(phase == "train"):
                
                inputs = inputs.reshape(inputs.shape[0], -1)
                pred = model(inputs)
                loss = criterion(pred, outputs)
                _,predictions = pred.max(1)
                #print(pred.max(1))
                
                if phase == "train":
                    loss.backward()
                    optimizer.step()

                #print(predictions.shape, num_samples)
                num_samples += predictions.size(0)

                #print(pred, outputs)
                total_corrects += torch.sum(predictions==outputs)
                #print(predictions, outputs, predictions==outputs)
                total_loss += loss.item() * inputs.size(0)
                #print(num_samples, total_corrects, pred.shape)
        print('Epoch {}/{} , loss:{}'.format(epoch,num_epochs,loss.item()))
        
        #print(total_corrects, num_samples)
        total_acc = (total_corrects.double() * 100) / num_samples
        total_loss = (total_loss * 100) / num_samples
        print('Epoch:{} phase:{} loss:{} Accu:{}'.format(epoch, phase, total_loss, total_acc))

100%|██████████| 938/938 [00:04<00:00, 198.78it/s]


Epoch 0/2 , loss:2.312607526779175


 10%|█         | 16/157 [00:00<00:00, 155.89it/s]

Epoch:0 phase:train loss:230.127674369812 Accu:12.925


100%|██████████| 157/157 [00:00<00:00, 240.25it/s]
  2%|▏         | 22/938 [00:00<00:04, 218.66it/s]

Epoch 0/2 , loss:2.3005924224853516
Epoch:0 phase:valid loss:230.1007992553711 Accu:12.63


100%|██████████| 938/938 [00:04<00:00, 229.83it/s]
 15%|█▌        | 24/157 [00:00<00:00, 234.02it/s]

Epoch 1/2 , loss:2.312126398086548
Epoch:1 phase:train loss:230.12767345428466 Accu:12.925


100%|██████████| 157/157 [00:00<00:00, 242.80it/s]

Epoch 1/2 , loss:2.30816912651062
Epoch:1 phase:valid loss:230.10079692840577 Accu:12.63





In [None]:
#training RNN
model = RNN(input_size = 28, hidden_size=256, num_layers = 2, num_classes=10)

        
                
num_samples = 0 
total_corrects = 0
total_loss = 0.0
total_acc = 0

training_dict = {'train':train_loader, "valid":test_loader}

for epoch in range(num_epochs):

    for phase in ["train", "valid"]:
        if phase == "train":
            model.train()
        else:
            model.eval()
        
                
        num_samples = 0 
        total_corrects = 0
        total_loss = 0.0

    
        loop = tqdm(enumerate(training_dict[phase]),total=len(training_dict[phase]))
        

        for btch, (inputs, outputs) in loop:
            
            #print(inputs.size())
            inputs = inputs.to(device).squeeze(1)
            outputs = outputs.to(device)
            
            optimizer.zero_grad()
                            
            with torch.set_grad_enabled(phase == "train"):
                
                pred = model(inputs)
                loss = criterion(pred, outputs)
                _,pred = pred.max(1)
                #print(pred)
                
                if phase == "train":
                    loss.backward()
                    optimizer.step()
                
                num_samples += pred.size(0)
                #print(pred, outputs)
                total_corrects += (pred==outputs).sum()
                total_loss += loss.item() * inputs.size(0)
                #print(num_samples, total_corrects, pred.shape)
        
        print('Epoch {}/{} , loss:{}'.format(epoch,num_epochs,loss.item()))
        
        #print(total_corrects, num_samples)
        total_acc = (total_corrects.double() * 100) / num_samples
        total_loss = (total_loss * 100) / num_samples
        print('Epoch:{} phase:{} loss:{} Accu:{}'.format(epoch, phase, total_loss, total_acc))