<a href="https://colab.research.google.com/github/TaraRasti/Deep_Learning_F20_Assignments/blob/master/Assignment%208/dropout/mnist_Adam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torchvision import utils 
import matplotlib.pyplot as plt
import numpy as np
from numpy import *
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
train_df = pd.read_csv('/content/drive/My Drive/mnist_train.csv')
print('train set shape : ',train_df.shape)
test_df = pd.read_csv('/content/drive/My Drive/mnist_test.csv')
print('test set shape : ',test_df.shape)

train set shape :  (60000, 785)
test set shape :  (10000, 785)


In [3]:
y_train = train_df['label'].values
X_train = train_df.drop(['label'],1).values
y_test = test_df['label'].values
X_test = test_df.drop(['label'],1).values
print('X train shape : ',X_train.shape)
print('y train shape : ',y_train.shape)
print('X test shape : ',X_test.shape)
print('y test shape : ',y_test.shape)

X train shape :  (60000, 784)
y train shape :  (60000,)
X test shape :  (10000, 784)
y test shape :  (10000,)


In [4]:
X_train=X_train.reshape(60000,28,28)
X_test=X_test.reshape(10000,28,28)

print('X train new shape : ',X_train.shape)
print('X test new shape : ',X_test.shape)

X_train_tensor=torch.from_numpy(X_train)
X_test_tensor=torch.from_numpy(X_test)
print('type of X train : ',type(X_train_tensor))
print('type of X test : ',type(X_test_tensor))

if len(X_train.shape)==3:
    X_train_tensor = X_train_tensor.unsqueeze(1) 
print('X train size : ',X_train_tensor.shape)
if len(X_test.shape)==3: 
    X_test_tensor = X_test_tensor.unsqueeze(1)
print('X test size : ',X_test_tensor.shape)

y_train_tensor=torch.from_numpy(y_train)
y_test_tensor=torch.from_numpy(y_test)

X train new shape :  (60000, 28, 28)
X test new shape :  (10000, 28, 28)
type of X train :  <class 'torch.Tensor'>
type of X test :  <class 'torch.Tensor'>
X train size :  torch.Size([60000, 1, 28, 28])
X test size :  torch.Size([10000, 1, 28, 28])


In [5]:
from torch.utils.data import TensorDataset

train_ds = TensorDataset(X_train_tensor, y_train_tensor) 
test_ds = TensorDataset(X_test_tensor, y_test_tensor)

from torch.utils.data import DataLoader

train_dl = DataLoader(train_ds, batch_size=8) 
test_dl = DataLoader(test_ds, batch_size=8)

In [6]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_input = nn.Linear(784,256)
        self.layer_hidden_one = nn.Linear(256,128)
        self.layer_hidden_two = nn.Linear(128,64)
        self.layer_output = nn.Linear(64,10)
        
        # 20% Dropout here
        self.dropout = nn.Dropout(p=0.2)
    def forward(self, x):
        x = x.view(x.shape[0],-1)
        x = self.dropout(F.relu(self.layer_input(x)))
        x = self.dropout(F.relu(self.layer_hidden_one(x)))
        x = self.dropout(F.relu(self.layer_hidden_two(x)))
        x = F.log_softmax(self.layer_output(x),dim=1)
        return x

In [7]:
model=Net()
print(next(model.parameters()).device)

cpu


In [8]:
device = torch.device("cuda:0") 
model.to(device)
print(next(model.parameters()).device)

cuda:0


In [9]:
loss_func = nn.NLLLoss(reduction="sum")

In [10]:
for xb, yb in train_dl:
  xb=xb.type(torch.float).to(device) 
  yb=yb.to(device)
  out=model(xb)
  loss = loss_func(out, yb)
  print (loss.item())
  break

29.9489688873291


In [11]:
from torch import optim
opt = optim.Adam(model.parameters(), lr=1e-4)

In [12]:
opt.step()
opt.zero_grad()

In [13]:
def loss_batch(loss_func, xb, yb,yb_h, opt=None): 
  loss = loss_func(yb_h, yb)
  metric_b = metrics_batch(yb,yb_h)  
  if opt is not None:
    loss.backward() 
    opt.step() 
    opt.zero_grad()
  return loss.item(), metric_b

In [14]:
def metrics_batch(target, output):
  pred = output.argmax(dim=1, keepdim=True)
  corrects=pred.eq(target.view_as(pred)).sum().item() 
  return corrects

In [15]:
def loss_epoch(model,loss_func,dataset_dl,opt=None): 
  loss=0.0
  metric=0.0 
  len_data=len(dataset_dl.dataset) 
  for xb, yb in dataset_dl:
    xb=xb.type(torch.float).to(device) 
    yb=yb.to(device)
    yb_h=model(xb)
    loss_b,metric_b=loss_batch(loss_func, xb, yb,yb_h, opt) 
    loss+=loss_b
    if metric_b is not None:
        metric+=metric_b 
  loss/=len_data
  metric/=len_data 
  return loss, metric

In [16]:
def train_val(epochs, model, loss_func, opt, train_dl, test_dl):
    for epoch in range(epochs):
        model.train()
        train_loss, train_metric=loss_epoch(model,loss_func,train_dl,opt)
        model.eval()
        with torch.no_grad():
          val_loss, val_metric=loss_epoch(model,loss_func,test_dl) 
          accuracy=100*val_metric
          print("epoch: %d, train loss: %.6f, val loss: %.6f, accuracy: %.2f" %(epoch, train_loss,val_loss,accuracy))

In [17]:
num_epochs=10
train_val(num_epochs, model, loss_func, opt, train_dl, test_dl)

epoch: 0, train loss: 0.508101, val loss: 0.169718, accuracy: 94.80
epoch: 1, train loss: 0.201595, val loss: 0.127866, accuracy: 96.31
epoch: 2, train loss: 0.153336, val loss: 0.111120, accuracy: 96.77
epoch: 3, train loss: 0.123061, val loss: 0.094572, accuracy: 97.36
epoch: 4, train loss: 0.104485, val loss: 0.095348, accuracy: 97.26
epoch: 5, train loss: 0.090032, val loss: 0.088171, accuracy: 97.61
epoch: 6, train loss: 0.078680, val loss: 0.084056, accuracy: 97.79
epoch: 7, train loss: 0.072495, val loss: 0.084186, accuracy: 98.00
epoch: 8, train loss: 0.067264, val loss: 0.083867, accuracy: 97.85
epoch: 9, train loss: 0.061501, val loss: 0.084694, accuracy: 98.05
