<a href="https://colab.research.google.com/github/HENILCHOPRA/pytorch-learning/blob/main/pytorch_103.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# GPU training

In [None]:
import torch
import numpy as np
import torchvision
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader

import torch.nn.functional as F
import torch.nn as nn

In [None]:
dataset = MNIST(root = '/content',
                download = True,
                transform = ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /content/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting /content/MNIST/raw/train-images-idx3-ubyte.gz to /content/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /content/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting /content/MNIST/raw/train-labels-idx1-ubyte.gz to /content/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /content/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting /content/MNIST/raw/t10k-images-idx3-ubyte.gz to /content/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /content/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting /content/MNIST/raw/t10k-labels-idx1-ubyte.gz to /content/MNIST/raw



In [None]:
def split_indices(n, val_percent):
  n_val = int(n * val_percent)

  idxs = np.random.permutation(n)
  return idxs[n_val:], idxs[:n_val]

In [None]:
train_idx,val_idx = split_indices(len(dataset), 0.3)
len(train_idx),len(val_idx)

(42000, 18000)

In [None]:
batch_size = 100

train_sampler = SubsetRandomSampler(train_idx)
train_dl = DataLoader(dataset,
                      batch_size,
                      sampler = train_sampler)

val_sampler = SubsetRandomSampler(val_idx)
val_dl = DataLoader(dataset,
                      batch_size,
                      sampler = train_sampler)

In [None]:
class MnistModel(nn.Module):
  def __init__(self, in_size, hidden_size, out_size):
    super().__init__()
    self.linear = nn.Linear(in_size, hidden_size)
    self.linear2 = nn.Linear(hidden_size,out_size)

  def forward(self, xb):
    xb = xb.view(xb.size(0), -1) #view() uses same block of memory
    out = self.linear(xb)
    out = F.relu(out)

    out = self.linear2(out)
    return out

In [None]:
input_size = 784
num_classes = 10

model = MnistModel(input_size, hidden_size = 64, out_size = num_classes)

In [None]:
for layer in model.parameters():
  print(layer.shape)

torch.Size([64, 784])
torch.Size([64])
torch.Size([10, 64])
torch.Size([10])


In [None]:
for images, labels in train_dl:
  outputs = model(images)
  loss = F.cross_entropy(outputs, labels)
  print(loss.item())
  break

F.softmax(outputs)[:10]

2.297187089920044


  import sys


tensor([[0.0951, 0.1318, 0.0981, 0.1128, 0.0904, 0.0840, 0.0828, 0.0782, 0.0991,
         0.1276],
        [0.0870, 0.1118, 0.1088, 0.1078, 0.0886, 0.0958, 0.0808, 0.0929, 0.1106,
         0.1159],
        [0.0895, 0.1148, 0.1189, 0.1042, 0.0918, 0.0961, 0.0815, 0.0899, 0.1047,
         0.1087],
        [0.0906, 0.1118, 0.1154, 0.1139, 0.0931, 0.0950, 0.0820, 0.0853, 0.1028,
         0.1100],
        [0.0992, 0.1108, 0.1119, 0.1059, 0.0965, 0.0983, 0.0848, 0.0871, 0.1028,
         0.1028],
        [0.0986, 0.1080, 0.1035, 0.1097, 0.0966, 0.0922, 0.0827, 0.0928, 0.1063,
         0.1095],
        [0.0932, 0.1117, 0.1152, 0.1056, 0.0992, 0.0889, 0.0846, 0.0866, 0.1054,
         0.1097],
        [0.0890, 0.0983, 0.1004, 0.1132, 0.0943, 0.0966, 0.0811, 0.0939, 0.1126,
         0.1207],
        [0.0890, 0.1153, 0.1145, 0.0994, 0.0973, 0.0908, 0.0838, 0.0919, 0.1042,
         0.1139],
        [0.0995, 0.1058, 0.1045, 0.1131, 0.0916, 0.0888, 0.0814, 0.0930, 0.1091,
         0.1131]], grad_fn=<

*Use GPU*

In [None]:
torch.cuda.is_available()

True

In [None]:
torch.device('cuda') #use GPU 

device(type='cuda')

In [None]:
from numpy.lib.arraysetops import isin
device = torch.device('cuda')

def to_device(data, device):
  if isinstance(data, (list, tuple)):
    return [to_device(x, device) for x in data]
  else:
    return data.to(device, non_blocking = True)

class DeviceDataLoader():
  def __init__(self, dl, device):
    self.dl = dl
    self.device = device

  def __iter__(self):
    for b in self.dl:
      yield to_device(b, self.device)
  def __len__(self):
    return len(self.dl)

In [None]:
def loss_batch(model, loss_func, xb, yb, opt = None, metric = None): # train_batch function
  preds = model(xb)

  loss = loss_func(preds, yb)

  if opt is not None:
    loss.backward()

    opt.step()

    opt.zero_grad()

  metric_result = None

  if metric is not None:
    metric_result = metric(preds, yb)

  return loss.item(), len(xb), metric_result

def evaluate(model, loss_fn, val_dl, metric = None):
  with torch.no_grad():
    results = [loss_batch(model, loss_fn, xb, yb, metric = metric) for xb, yb in val_dl]

  losses, nums, metrics = zip(*results)
  total = np.sum(nums)

  avg_loss = np.sum(np.multiply(losses, nums)) / total
  avg_metric = None
  if metric is not None:
    avg_metric = np.sum(np.multiply(metrics, nums)) / total

  return avg_loss, total, avg_metric


In [None]:
from re import L

def fit(epochs, lr, model, loss_fn, train_dl, val_dl, metric = None, opt_fn = None):
  losses, metrics = [], []
  if opt_fn is None:opt_fn = torch.optim.SGD
  opt = torch.optim.SGD(model.parameters(), lr = lr)
  
  # result = evaluate(model, loss_fn, val_dl, metric)

  for epoch in range(epochs):

    for xb, yb in train_dl:
      loss_batch(model, loss_fn, xb, yb, opt)

    val_loss, total, val_metric = evaluate(model, loss_fn, val_dl, metric)

    losses.append(val_loss)
    metrics.append(val_metric)
    if metric is None:
      print(epoch, " : ", val_loss)

    else:
      print(epoch, " : ", val_loss," \nmetric: ", val_metric)
  return losses, metrics

In [None]:
def accuracy(outputs, labels):
  _, preds = torch.max(outputs, dim = 1)
  return torch.sum(preds == labels).item() / len(preds)

In [None]:
model = MnistModel(input_size, hidden_size = 128, out_size = num_classes)
to_device(model, device)        
val_dl = DeviceDataLoader(val_dl, device)
train_dl = DeviceDataLoader(train_dl, device)

In [None]:
val_loss, total, val_acc = evaluate(model, F.cross_entropy, val_dl, metric = accuracy)
val_loss, val_acc

In [None]:
fit(5, 0.1, model, F.cross_entropy, train_dl, val_dl, metric = accuracy)

In [None]:
exit

# CNN/ ResNet

In [None]:
import os
import torch
import tarfile
from torchvision.datasets.utils import download_url

from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader

import torch.nn.functional as F
import torch.nn as nn

In [None]:
# dataset_url = "http://files.fast.ai/data/cifar10.tgz"
# download_url(dataset_url, '/content/')

In [None]:
with tarfile.open("/content/cifar10.tgz") as tar:
  tar.extractall(path = "/content/")

In [None]:
data_dir = "/content/cifar10"
classes = os.listdir(data_dir + '/train')
classes

['deer',
 'horse',
 'truck',
 'airplane',
 'frog',
 'dog',
 'cat',
 'ship',
 'automobile',
 'bird']

In [None]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor

In [None]:
dataset = ImageFolder(data_dir + '/train', transform = ToTensor())

In [None]:
img, label = dataset[0]
print(img.shape, label)

torch.Size([3, 32, 32]) 0


In [None]:
print(dataset.classes)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [None]:
import matplotlib.pyplot as plt

def show_example(img, label):
  print('label: ', dataset.classes[label])

  plt.imshow(img.permute([1,2,0]))

In [None]:
# show_example(*dataset[10000])

In [None]:
import numpy as np

def split_idx(n, val_percent):
  n_val = int(n * val_percent)
  idxs = np.random.permutation(n)
  return idxs[n_val:], idxs[:n_val]

val_pct = 0.3
train_idx, val_idx = split_idx(len(dataset), val_pct)


In [None]:
batch_size = 100
train_sampler = SubsetRandomSampler(train_idx)
train_dl = DataLoader(dataset,
                      batch_size= batch_size,
                      sampler = train_sampler)

val_sampler = SubsetRandomSampler(val_idx)
val_dl = DataLoader(dataset,
                      batch_size= batch_size,
                      sampler = val_sampler)

In [None]:
for x, y in val_dl:
  print(x.shape, y.shape)
  break

In [None]:
simple_model = nn.Sequential(
    nn.Conv2d(3,8, kernel_size = 3, stride = 1, padding = 1), 
    #nn.Conv2d(input_channels, output_channels: number of filters)
    nn.MaxPool2d(2,2)
)
for images, labels in train_dl:
  print(images.shape)
  out = simple_model(images)
  print(out.shape)
  break

In [None]:
# model = nn.Sequential(
#     nn.Conv2d(3,16, kernel_size = 3, stride = 1, padding = 1),
#     nn.ReLU(),
#     nn.MaxPool2d((2,2)), #output: bs x 16 x 16 x 16

#     nn.Conv2d(16,16, kernel_size = 3, stride = 1, padding = 1),
#     nn.ReLU(),
#     nn.MaxPool2d((2,2)), #output: bs x 16 x 8 x 8

#     nn.Conv2d(16,16, kernel_size = 3, stride = 1, padding = 1),
#     nn.ReLU(),
#     nn.MaxPool2d((2,2)), #output: bs x 16 x 4 x 4

#     nn.Conv2d(16,16, kernel_size = 3, stride = 1, padding = 1),
#     nn.ReLU(),
#     nn.MaxPool2d((2,2)), #output: bs x 16 x 2 x 2

#     nn.Conv2d(16,16, kernel_size = 3, stride = 1, padding = 1),
#     nn.ReLU(),
#     nn.MaxPool2d((2,2)), #output: bs x 16 x 1 x 1

#     nn.Flatten(), #output: bs x 16
#     nn.Linear(16,10)  #output: bs x 10
# )

model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Flatten(), 
            nn.Linear(256*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 10))

In [None]:
model[3].weight.shape

In [None]:
for images, labels in train_dl:
  print(images.shape)
  out = model(images)
  print(out.shape)
  break

In [None]:
probs = F.softmax(out, dim = 1)
max_values, max_indxs = torch.max(probs, dim = 1)

In [None]:
def get_default_device():
  if torch.cuda.is_available():
    return torch.device('cuda')
  else:
    return torch.device('cpu')

def to_device(data, device):
  if isinstance(data, (list, tuple)):
    return [to_device(x, device) for x in data]

  else:
    return data.to(device, non_blocking = True)

class DeviceDataLoader():
  def __init__(self, dl, device):
    self.dl = dl
    self.device = device

  def __iter__(self):
    for b in self.dl:
      yield to_device(b, self.device)

  def __len__(self):
    return len(self.dl)

In [None]:
device = get_default_device()
device

In [None]:
model = to_device(model, device)
train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)

In [None]:
model

In [None]:
# def loss_batch(model, loss_fn, xb, yb, opt = None, metric = None):
#   preds = model(xb)
#   loss = loss_fn(preds, yb)

#   if opt is not None:
#     loss.backward()
#     opt.step()
#     opt.zero_grad()

#   metric_result = None
#   if metric is not None:
#     metric_result = metric(preds, yb)

#   return loss.item(), len(xb), metric_result

# def evaluate(model, loss_fn, val_dl, metric = None):
#   with torch.no_grad():
#     results = [loss_batch(model, loss_fn, xb, yb, metric = metric) for xb, yb in val_dl]

#   losses, num, metrics = zip(*results)
#   total = np.sum(num)

#   avg_loss = np.sum(np.multiply(losses, num)/total)
#   avg_metric = None
#   if metric is not None:
#     avg_metric = np.sum(np.multiply(metrics, num)) / total

#   return avg_loss, total, avg_metric


def loss_batch(model, loss_func, xb, yb, opt = None, metric = None): # train_batch function
  preds = model(xb)

  loss = loss_func(preds, yb)

  if opt is not None:
    loss.backward()

    opt.step()

    opt.zero_grad()

  metric_result = None
  if metric is not None:
    metric_result = metric(preds, yb)

  return loss.item(), len(xb), metric_result

def evaluate(model, loss_fn, val_dl, metric = None):
  with torch.no_grad():
    results = [loss_batch(model, loss_fn, xb, yb, metric = metric) for xb, yb in val_dl]

  losses, nums, metrics = zip(*results)
  total = np.sum(nums)

  avg_loss = np.sum(np.multiply(losses, nums)) / total
  avg_metric = None
  if metric is not None:
    avg_metric = np.sum(np.multiply(metrics, nums)) / total

  return avg_loss, total, avg_metric

In [None]:
def fit(epochs, model, loss_fn, train_dl, val_dl,
        opt = None, lr = None, metric = None):
  train_losses, val_losses, val_metrics = [], [], []
  
  if opt is None: opt = torch.optim.SGD
  opt = opt(model.parameters(), lr = lr)

  for epoch in range(epochs):
    # model.train() ##############################################
    for xb, yb in train_dl:
      train_loss, _, _ = loss_batch(model, loss_fn, xb, yb, opt)
    # model.eval()
    val_loss, total, val_metric = evaluate(model, loss_fn, val_dl, metric)
    
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    val_metrics.append(val_metric)

    if metric is None:
      print(f'Epoch [{epoch}/{epochs}], train_loss: {train_loss}, val_loss: {val_loss}')
    else:
          print(f'Epoch [{epoch}/{epochs}], train_loss: {train_loss}, val_loss: {val_loss}, metric:{val_metric}')

In [None]:
def accuracy(outputs, labels):
  _, preds = torch.max(outputs, dim = 1)
  return torch.sum(preds == labels).item() / len(preds)

In [None]:
evaluate(model, F.cross_entropy, val_dl, metric = accuracy)

In [None]:
fit(5, model, F.cross_entropy, train_dl, val_dl,
        opt = torch.optim.SGD, lr = 0.01, metric = accuracy)