<a href="https://colab.research.google.com/github/Shrey-Viradiya/PyTorch_for_DL/blob/master/PyTorch_Debugging_PyTorch_Models_GPU_computing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PyTorch_Debugging PyTorch Models_GPU computing

In [1]:
!nvidia-smi

Sun Oct  4 10:23:48 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.23.05    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   57C    P0    30W /  70W |      0MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import torch
import torchvision
from torch import optim
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms,models
import torch.utils.data
from PIL import Image
import numpy as np
import time

In [3]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download -d lucassj/dogs-vs-cats-train-validadion-and-evaluation
! unzip -n '/content/dogs-vs-cats-train-validadion-and-evaluation.zip'

mkdir: cannot create directory ‘/root/.kaggle’: File exists
dogs-vs-cats-train-validadion-and-evaluation.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  /content/dogs-vs-cats-train-validadion-and-evaluation.zip


## Bad Random Process
### Not using GPU ... More CPU process
#### Time Consuming

In [4]:
model = models.resnet18(pretrained=True)
device = "cuda:0"

In [5]:
class BadRandom(object):
    def __call__(self, img):
        img_np = np.array(img)
        random = np.random.random_sample(img_np.shape)
        out_np = img_np + random
        out = Image.fromarray(out_np.astype('uint8'), 'RGB')
        return out

    def __repr__(self):
        str = f"{self.__class__.__name__  }"
        return str

In [6]:
train_data_path = "./data/train/"
model.to(device)
image_transforms = torchvision.transforms.Compose([transforms.Resize((224,224)),BadRandom(), transforms.ToTensor()])

In [7]:
train_data = torchvision.datasets.ImageFolder(root=train_data_path,transform=image_transforms)
batch_size=32
train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)

In [8]:
optimizer = optim.Adam(model.parameters(), lr=2e-2)
criterion = nn.CrossEntropyLoss()

In [9]:
def train(model, optimizer, loss_fn,  train_loader, val_loader, epochs=20, device='cuda:0'):
    model.to(device)
    start = time.time()
    for epoch in range(epochs):
        print(f"epoch {epoch}")
        model.train()
        for batch in train_loader:
            optimizer.zero_grad()
            ww, target = batch
            ww = ww.to(device)
            target= target.to(device)
            output = model(ww)
            loss = loss_fn(output, target)
            loss.backward()
            optimizer.step()

        model.eval()
        num_correct = 0
        num_examples = 0
        for batch in val_loader:
            ww, target = batch
            ww = ww.to(device)
            target= target.to(device)
            output = model(ww)
            correct = torch.eq(torch.max(output, dim=1)[1], target).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]
        print("Epoch {}, accuracy = {:.2f}, time = {:.2f}".format(epoch, num_correct / num_examples, time.time() - start))

In [10]:
train(model,optimizer,criterion,train_data_loader,train_data_loader,epochs=2)

epoch 0
Epoch 0, accuracy = 0.50, time = 283.50
epoch 1
Epoch 1, accuracy = 0.50, time = 556.19


## Good Random Process
### Using GPU ... Less CPU process
#### Less Time Consuming

In [11]:
def add_gpu_noise(device, tensor):
    a = torch.randn_like(tensor).to(device)
    return tensor + a

In [12]:
train_data_path = "./data/train/"
model.to(device)
image_transforms = torchvision.transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor()])

train_data = torchvision.datasets.ImageFolder(root=train_data_path,transform=image_transforms)
batch_size=32
train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)

In [13]:
optimizer = optim.Adam(model.parameters(), lr=2e-2)
criterion = nn.CrossEntropyLoss()

In [14]:
def train(model, optimizer, loss_fn,  train_loader, val_loader, epochs=20, device='cuda:0'):
    model.to(device)
    start = time.time()
    for epoch in range(epochs):
        print(f"epoch {epoch}")
        model.train()
        for batch in train_loader:
            optimizer.zero_grad()
            ww, target = batch
            ww = ww.to(device)
            ww = add_gpu_noise(device,ww)
            target= target.to(device)
            output = model(ww)
            loss = loss_fn(output, target)
            loss.backward()
            optimizer.step()

        model.eval()
        num_correct = 0
        num_examples = 0
        for batch in val_loader:
            ww, target = batch
            ww = ww.to(device)
            target= target.to(device)
            output = model(ww)
            correct = torch.eq(torch.max(output, dim=1)[1], target).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]
        print("Epoch {}, accuracy = {:.2f}, time = {:.2f}".format(epoch, num_correct / num_examples, time.time() - start))

In [15]:
train(model,optimizer,criterion,train_data_loader,train_data_loader,epochs=2)

epoch 0
Epoch 0, accuracy = 0.50, time = 199.64
epoch 1
Epoch 1, accuracy = 0.50, time = 398.92
