<a href="https://www.kaggle.com/code/aleksandrmorozov123/computer-vision-with-pytorch?scriptVersionId=154897582" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Begin with foundations - tensors**

In [2]:
import torch
x = torch.tensor ([[1,2,3], [4,5,6]])
y = torch.tensor ([[7,8,9], [10,11,12]])
z = x + y
print (z)

tensor([[ 8, 10, 12],
        [14, 16, 18]])


**Create the tensors**

In [3]:
import numpy 

# created from preexisting arrays
w = torch.tensor ([1,2,3])                 # from a list
w = torch.tensor ([1,2,3])                 # from a tuple
w = torch.tensor (numpy.array ([1,2,3]))   # from a numpy.array

# initialized by size
w = torch.empty (100, 200)                 # uninitialized, element values are not predictable
w = torch.zeros (100, 200)                 # all elements initialized with 0.0
w = torch.ones (100, 200)                  # all elements initialized with 1.0

# initialized by size with random values
w = torch.rand (100, 200)
w = torch.randn (100, 200)
w = torch.randint (5, 10, (100, 200))

# initialized to have the same size, data type and device as another tensor
x = torch.empty_like (w)

# specify the data type at creation using dtype
w = torch.tensor ([1,2,3], dtype = torch.float32)

# use the casting method to cast to a new data type
w.int ()        # w remains a float 32 after the cast
w = w.int ()    # w changes to an int32 after the cast

# use the to() method to cast to a new type
w = w.to (torch.float64)
w = w.to (dtype = torch.float64)

# Python automatucally converts data types during operations
x = torch.tensor ([1,2,3], dtype = torch.int32)
y = torch.tensor ([1,2,3], dtype = torch.float32)
z = x + y
print (z.dtype)

torch.float32


**Indexing, sdlicing, combining and splitting tensors**

In [4]:
x = torch.tensor ([[1, 2], [3, 4], [5, 6], [7, 8]])
print (x)

tensor([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]])


In [5]:
# indexing, returns a tensor
print (x[1, 1])

tensor(4)


In [6]:
# indexing, returns a value as a Python number
print (x[1,1].item ())

4


In [7]:
# slicing
print (x[:2, 1])

tensor([2, 4])


In [8]:
# Boolean indexing
# only keeps elements less than 4
print (x [x < 4])

tensor([1, 2, 3])


In [9]:
# transpose array
print (x.t())

tensor([[1, 3, 5, 7],
        [2, 4, 6, 8]])


In [10]:
# change shape
print (x.view ((2,4)))

tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])


In [11]:
# combining tensors 
y = torch.stack ((x, x))
print (y)

tensor([[[1, 2],
         [3, 4],
         [5, 6],
         [7, 8]],

        [[1, 2],
         [3, 4],
         [5, 6],
         [7, 8]]])


In [12]:
# splitting tensors 
a, b = x.unbind (dim = 1)
print (a, b)

tensor([1, 3, 5, 7]) tensor([2, 4, 6, 8])


**Automatic differentiation**

In [13]:
x = torch.tensor ([[1,2,3], [4,5,6]], 
                 dtype = torch.float, requires_grad = True)
print (x)

tensor([[1., 2., 3.],
        [4., 5., 6.]], requires_grad=True)


In [14]:
f = x.pow(2).sum()
print (f)

tensor(91., grad_fn=<SumBackward0>)


In [15]:
f.backward ()
print (x.grad)  # df/dx = 2x

tensor([[ 2.,  4.,  6.],
        [ 8., 10., 12.]])


**Deep learning with PyTorch**

In [16]:
# import dataset CIFAR10
from torchvision.datasets import CIFAR10

# load train data
train_data = CIFAR10 (root = "./train/", train = True, download = True)
print (train_data)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./train/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:06<00:00, 25903156.34it/s]


Extracting ./train/cifar-10-python.tar.gz to ./train/
Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./train/
    Split: Train


In [17]:
# mapping numeric labels to class names
print (train_data.class_to_idx)

{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}


In [18]:
# load text data
test_data = CIFAR10 (root = "./test/", train = False, download = True)
print (test_data)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./test/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 29714078.79it/s]


Extracting ./test/cifar-10-python.tar.gz to ./test/
Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ./test/
    Split: Test


In [19]:
# transorm train data
# import library
from torchvision import transforms

train_transforms = transforms.Compose ([
    transforms.RandomCrop (32, padding = 4),
    transforms.RandomHorizontalFlip (),
    transforms.ToTensor (),
    # the mean and standard deviation values here were predetermined
    transforms.Normalize (
    mean = (0.4914, 0.4822, 0.4465),
    std = (0.2023, 0.1994, 0.2010))])

train_data = CIFAR10 (root = "./train/", train = True,
                   download = True, transform = train_transforms)
print (train_data.transforms)

Files already downloaded and verified
StandardTransform
Transform: Compose(
               RandomCrop(size=(32, 32), padding=4)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
           )


In [20]:
# transform test data
test_transforms = transforms.Compose ([
    transforms.ToTensor (),
    transforms.Normalize (
    (0.4914, 0.4822, 0.4465),
    (0.2023, 0.1994, 0.2010))])

test_data = CIFAR10 (root = "./test/", train = False,
                                         transform = test_transforms)

print (test_data)

Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ./test/
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
           )


In [21]:
# data batching
trainloader = torch.utils.data.DataLoader (train_data,
                                          batch_size = 16, shuffle = True)
data_batch, labels_batch = next (iter (trainloader))
print (data_batch.size ())

torch.Size([16, 3, 32, 32])


In [22]:
# create a dataloader for test data
testloader = torch.utils.data.DataLoader (test_data,
                                         batch_size = 16, shuffle = False)
torch.utils.data.DataLoader (test_data, batch_size = 1,
                            shuffle = False, sampler = None,
                            batch_sampler = None, num_workers = 0,
                            collate_fn = None, pin_memory = False,
                            drop_last = False, timeout = 0,
                            worker_init_fn = None, multiprocessing_context = None,
                            generator = None)

<torch.utils.data.dataloader.DataLoader at 0x7e022cd53790>

In [23]:
# create a simple model with torch.nn
import torch.nn as nn
import torch.nn.functional as F

class SimpleNet (nn.Module):
    
    def __init__(self):
        # create layers
        super (SimpleNet, self).__init__()
        self.fc1 = nn.Linear (2048, 256)
        self.fc2 = nn.Linear (256, 64)
        self.fc3 = nn.Linear(64, 2)
    
    def forward (self, x):
        # define how to model processes data
        x = x.view (-1, 2048)
        x = F.relu (self.fc1 (x))
        x = F.relu (self.fc2 (x))
        x = F.softmax (self.fc3 (x), dim = 1)
        return x
    
# create the model
simplenet = SimpleNet ()
print (simplenet)

SimpleNet(
  (fc1): Linear(in_features=2048, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=2, bias=True)
)


In [24]:
# forward pass
input = torch.rand (2048)
output = simplenet (input)

In [25]:
# create a modernized version of the LeNet5 model
from torch import nn

class LeNet5 (nn.Module):
    def __init__(self):
        super (LeNet5, self).__init__()
        self.conv1 = nn.Conv2d (3, 6, 5)
        self.conv2 = nn.Conv2d (6, 16, 5)
        self.fc1 = nn.Linear (16 * 5 * 5, 120)
        self.fc2 = nn.Linear (120, 84)
        self.fc3 = nn.Linear (84, 10)
        
    def forward (self, x):
        x = F.max_pool2d (F.relu (self.conv1 (x)), (2, 2))
        x = F.max_pool2d (F.relu (self.conv2 (x)), 2)
        x = x.view (-1, int (x.nelement () / x.shape [0]))
        x = F.relu (self.fc1 (x))
        x = F.relu (self.fc2 (x))
        x = self.fc3 (x)
        return x
device = ("cuda" if torch.cuda.is_available() else 'cpu')
model = LeNet5 ().to (device = device)
    
# define the loss function and the optimizer
from torch import optim

criterion = nn.CrossEntropyLoss ()
optimizer = optim.SGD (model.parameters (), lr = 0.001, momentum = 0.9)

# create the fundamental training loop
N_EPOCHS = 10
for epoch in range (N_EPOCHS):
    # outer training loop, loop over 10 epochs
    epoch_loss = 0.0
    for inputs, labels in trainloader:
        inputs = inputs.to(device)
        labels = labels.to (device)
        optimizer.zero_grad ()
        outputs = model (inputs)
        loss = criterion (outputs, labels)
        loss.backward ()
        optimizer.step ()
        
        epoch_loss += loss.item()
    print ("Epoch: {} Loss: {}".format (epoch, epoch_loss / len (trainloader)))


Epoch: 0 Loss: 1.9531448023605347
Epoch: 1 Loss: 1.6247903436470033
Epoch: 2 Loss: 1.4883510054206848
Epoch: 3 Loss: 1.4103709985351562
Epoch: 4 Loss: 1.3501436413955687
Epoch: 5 Loss: 1.3082493627357483
Epoch: 6 Loss: 1.2679695242118836
Epoch: 7 Loss: 1.2338845904159546
Epoch: 8 Loss: 1.197625019454956
Epoch: 9 Loss: 1.176326509809494


In [26]:
# split training dataset into a training dataset and a validation dataset
from torch.utils.data import random_split
train_set, val_set = random_split (train_data, [40000, 10000])
trainloader = torch.utils.data.DataLoader (train_set, batch_size = 16,
                                          shuffle = True)
valloader = torch.utils.data.DataLoader (val_set,
                                        batch_size = 16, shuffle = True)
print (len (trainloader))

2500


In [27]:
# define model, loss function and optimizer
from torch import optim

model = LeNet5 ().to (device)
criterion = nn.CrossEntropyLoss ()
optimizer = optim.SGD (model.parameters (), lr = 0.001, momentum = 0.9)

N_EPOCHS = 10
for epoch in range (N_EPOCHS):
    
    # Training 
    train_loss = 0.0
    model.train ()
    for inputs, labels in trainloader:
        inputs = inputs.to (device)
        labels = labels.to (device)
        
        optimizer.zero_grad ()
        
        outputs = model (inputs)
        loss = criterion (outputs, labels)
        loss.backward ()
        optimizer.step ()
        
        train_loss += loss.item ()
   
    # Validation
    val_loss = 0.0
    model.eval ()
    for inputs, labels in valloader:
        inputs = inputs.to (device)
        labels = labels.to (device)
        outputs = model (inputs)
        loss = criterion (outputs, labels)
        
        val_loss += loss.item ()
    
    print ("Epoch: {} Train loss: {} Val Loss: {}".format (
    epoch, train_loss / len (trainloader),
    val_loss / len (valloader)))

Epoch: 0 Train loss: 2.0599504963874815 Val Loss: 1.8614707792282104
Epoch: 1 Train loss: 1.691152241897583 Val Loss: 1.5756614126205444
Epoch: 2 Train loss: 1.5380944088935853 Val Loss: 1.486184539413452
Epoch: 3 Train loss: 1.4408530799627304 Val Loss: 1.374595050239563
Epoch: 4 Train loss: 1.3662251239061356 Val Loss: 1.3154202425003052
Epoch: 5 Train loss: 1.3141163203001023 Val Loss: 1.2709862680912019
Epoch: 6 Train loss: 1.2736498784780503 Val Loss: 1.2697033602714538
Epoch: 7 Train loss: 1.2357619614005089 Val Loss: 1.2212958835601806
Epoch: 8 Train loss: 1.2091693448066712 Val Loss: 1.2436211812019349
Epoch: 9 Train loss: 1.1807600442767143 Val Loss: 1.153017613697052


In [28]:
# testing
num_correct = 0.0
for x_test_batch, y_test_batch in testloader:
    model.eval()
    y_test_batch = y_test_batch.to(device)
    x_test_batch = x_test_batch.to(device)
    y_pred_batch = model(x_test_batch)
    _, predicted = torch.max(y_pred_batch, 1)
    num_correct += (predicted == y_test_batch).float().sum()
    accuracy = num_correct/(len(testloader) * testloader.batch_size)
    
print(len(testloader), testloader.batch_size)

625 16


In [29]:
# save the model
torch.save(model.state_dict(), "./lenet5_model.pt")
model = LeNet5().to(device)
model.load_state_dict(torch.load("./lenet5_model.pt"))

<All keys matched successfully>

In [30]:
import torch
vgg16 = torch.hub.load('pytorch/vision','vgg16', pretrained=True)
dependencies = ['torch']
from torchvision.models.vgg import vgg16
dependencies = ['torch']
from torchvision.models.vgg import vgg16 as _vgg16
# vgg16 is the name of the entrypoint
def vgg16(weights=False, **kwargs):
    """ # This docstring shows up in hub.help(): VGG16 model
    pretrained (bool): kwargs,
    load pretrained weights into the model
    """
    # Call the model; load pretrained weights
    model = _vgg16(weights=pretrained, **kwargs)
    return model

Downloading: "https://github.com/pytorch/vision/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:14<00:00, 37.0MB/s]
