In [1]:
import os
import io
import requests
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms.v2 as v2
from torchsummary import summary
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torchvision.utils import make_grid
from torchvision.datasets import ImageFolder
from torchvision.datasets import MNIST, FashionMNIST, CIFAR10
from torchvision.models import resnet50, ResNet50_Weights
from PIL import Image
from pathlib import Path

In [2]:
class MyDataset():
    def __init__(self, root='./data/dogs-vs-cats', transform=None):
        self.root = root
        self.image_paths = []
        self.__classes = {'Cat':0, 'Dog':1}
        self.labels = []

        for dname in os.listdir(self.root):
            print(type(dname))
            print(os.path.isdir(Path(root, dname)))
            new_path = Path(root, dname)
            if os.path.isdir(new_path):
                for file in os.listdir(new_path):
                    # print(file)
                    self.image_paths.append(str(Path(new_path, file)))
                    self.labels.append(self.__classes[dname])

        self.transform = transform

    def __getitem__(self, idx):
        path = self.image_paths[idx]
        # print(path)
        image = Image.open(path)
        print(self.transform(image).shape)
        return self.transform(image), self.labels[idx]

    def __len__(self):
        return len(self.labels)

In [3]:
transform = v2.Compose([
            v2.RandomResizedCrop(size=(224, 224), antialias=True),
            v2.ToTensor(),
            v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
dataset = MyDataset('./data/dogs-vs-cats', transform)
data, label = dataset[0]
data.shape, label

<class 'str'>
True
<class 'str'>
True
torch.Size([3, 224, 224])




(torch.Size([3, 224, 224]), 0)

In [4]:
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [5]:
for X_train, y_label in data_loader:
    print(X_train.shape, y_label.shape)
    break

torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([32, 3, 224, 224]) torch.Size([32])


In [6]:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 6, 5, padding='same'),
            nn.ReLU(),
            nn.Dropout(),
            nn.MaxPool2d(2),
            nn.Conv2d(6, 16, 5, padding='same'),
            nn.ReLU(),
            nn.Dropout(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 5, padding='same'),
            nn.ReLU(),
            nn.Dropout(),
            nn.MaxPool2d(2),
        )
        self.flatten = nn.Flatten()
        self.classifier = nn.Sequential(
            nn.Linear(32 * 28 * 28, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 2),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.flatten(x)
        x = self.classifier(x)
        return x

In [7]:
loss_fn = nn.CrossEntropyLoss()

In [8]:
model = LeNet5()
color_image = torch.rand(32, 3, 224, 224)
model(color_image).shape

torch.Size([32, 2])

In [9]:
for X_train, y_label in data_loader:
    outputs = model(X_train)
    print(outputs.shape)
    break

torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([32, 2])


In [10]:
loss_fn = nn.CrossEntropyLoss()

In [11]:
summary(model, (3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 32, 28, 28]          --
|    └─Conv2d: 2-1                       [-1, 6, 224, 224]         456
|    └─ReLU: 2-2                         [-1, 6, 224, 224]         --
|    └─Dropout: 2-3                      [-1, 6, 224, 224]         --
|    └─MaxPool2d: 2-4                    [-1, 6, 112, 112]         --
|    └─Conv2d: 2-5                       [-1, 16, 112, 112]        2,416
|    └─ReLU: 2-6                         [-1, 16, 112, 112]        --
|    └─Dropout: 2-7                      [-1, 16, 112, 112]        --
|    └─MaxPool2d: 2-8                    [-1, 16, 56, 56]          --
|    └─Conv2d: 2-9                       [-1, 32, 56, 56]          12,832
|    └─ReLU: 2-10                        [-1, 32, 56, 56]          --
|    └─Dropout: 2-11                     [-1, 32, 56, 56]          --
|    └─MaxPool2d: 2-12                   [-1, 32, 28, 28]          --
├─Flatt

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 32, 28, 28]          --
|    └─Conv2d: 2-1                       [-1, 6, 224, 224]         456
|    └─ReLU: 2-2                         [-1, 6, 224, 224]         --
|    └─Dropout: 2-3                      [-1, 6, 224, 224]         --
|    └─MaxPool2d: 2-4                    [-1, 6, 112, 112]         --
|    └─Conv2d: 2-5                       [-1, 16, 112, 112]        2,416
|    └─ReLU: 2-6                         [-1, 16, 112, 112]        --
|    └─Dropout: 2-7                      [-1, 16, 112, 112]        --
|    └─MaxPool2d: 2-8                    [-1, 16, 56, 56]          --
|    └─Conv2d: 2-9                       [-1, 32, 56, 56]          12,832
|    └─ReLU: 2-10                        [-1, 32, 56, 56]          --
|    └─Dropout: 2-11                     [-1, 32, 56, 56]          --
|    └─MaxPool2d: 2-12                   [-1, 32, 28, 28]          --
├─Flatt

In [12]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

for X_train, y_label in data_loader:
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = loss_fn(outputs, y_label)
    loss.backward()
    optimizer.step()
    break

torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])


In [13]:
EPOCHS = 1
for n in range(EPOCHS):
    print('EPOCHS {}:'.format(n + 1))
    epoch_loss = 0
    step_loss = 0
    for idx, (X_train, y_label) in enumerate(data_loader):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = loss_fn(outputs, y_label)
        loss.backward()
        optimizer.step()

        step_loss += loss.item()
        print(' batch {} loss: {}'.format(idx + 1, step_loss))
        step_loss = 0

EPOCHS 1:
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
 batch 1 loss: 0.7144557237625122
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 

In [14]:
train_dataset, test_dataset = random_split(dataset, [0.8, 0.2])
print(len(train_dataset), len(test_dataset))

402 100


In [15]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

for X_train, y_label in train_loader:
    print(X_train.shape, y_label.shape)
    break

torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([32, 3, 224, 224]) torch.Size([32])


In [16]:
EPOCHS = 1

for n in range(EPOCHS):
    print('EPOCHS {}'.format(n + 1))
    epoch_loss = 0
    step_loss = 0
    for idx, (X_train, y_label) in enumerate(data_loader):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = loss_fn(outputs, y_label)
        loss.backward()
        optimizer.step()

        step_loss += loss.item()
        train_acc = torch.sum(torch.argmax(outputs, dim=1) == y_label)
        print(' batch {} loss: {}'.format(idx + 1, step_loss))
        step_loss = 0

EPOCHS 1
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
 batch 1 loss: 0.6912333369255066
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 2

In [17]:
from torchvision.models import alexnet, AlexNet_Weights

In [18]:
model = alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [19]:
for p in model.parameters():
    print(p.names, type(p), p.shape, p.requires_grad)
    p.reqrires_grad =False

(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([64, 3, 11, 11]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([64]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([192, 64, 5, 5]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([192]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([384, 192, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([384]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 384, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 256, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) True
(None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([4096, 9216]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([4096]) True
(None, Non

In [20]:
summary(model, (3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 64, 55, 55]          23,296
|    └─ReLU: 2-2                         [-1, 64, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 27, 27]          --
|    └─Conv2d: 2-4                       [-1, 192, 27, 27]         307,392
|    └─ReLU: 2-5                         [-1, 192, 27, 27]         --
|    └─MaxPool2d: 2-6                    [-1, 192, 13, 13]         --
|    └─Conv2d: 2-7                       [-1, 384, 13, 13]         663,936
|    └─ReLU: 2-8                         [-1, 384, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 256, 13, 13]         884,992
|    └─ReLU: 2-10                        [-1, 256, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 256, 13, 13]         590,080
|    └─ReLU: 2-12                        [-1, 256, 13, 13]   

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 64, 55, 55]          23,296
|    └─ReLU: 2-2                         [-1, 64, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 27, 27]          --
|    └─Conv2d: 2-4                       [-1, 192, 27, 27]         307,392
|    └─ReLU: 2-5                         [-1, 192, 27, 27]         --
|    └─MaxPool2d: 2-6                    [-1, 192, 13, 13]         --
|    └─Conv2d: 2-7                       [-1, 384, 13, 13]         663,936
|    └─ReLU: 2-8                         [-1, 384, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 256, 13, 13]         884,992
|    └─ReLU: 2-10                        [-1, 256, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 256, 13, 13]         590,080
|    └─ReLU: 2-12                        [-1, 256, 13, 13]   

In [21]:
model.classifier = nn.Sequential(
    nn.Dropout(),
    nn.Linear(9216, 512),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(512, 64),
    nn.ReLU(),
    nn.Linear(64, 2),
    nn.Softmax(dim=1)
)
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=512, bias=True)
  

In [22]:
summary(model, (3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 64, 55, 55]          23,296
|    └─ReLU: 2-2                         [-1, 64, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 27, 27]          --
|    └─Conv2d: 2-4                       [-1, 192, 27, 27]         307,392
|    └─ReLU: 2-5                         [-1, 192, 27, 27]         --
|    └─MaxPool2d: 2-6                    [-1, 192, 13, 13]         --
|    └─Conv2d: 2-7                       [-1, 384, 13, 13]         663,936
|    └─ReLU: 2-8                         [-1, 384, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 256, 13, 13]         884,992
|    └─ReLU: 2-10                        [-1, 256, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 256, 13, 13]         590,080
|    └─ReLU: 2-12                        [-1, 256, 13, 13]   

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 64, 55, 55]          23,296
|    └─ReLU: 2-2                         [-1, 64, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 27, 27]          --
|    └─Conv2d: 2-4                       [-1, 192, 27, 27]         307,392
|    └─ReLU: 2-5                         [-1, 192, 27, 27]         --
|    └─MaxPool2d: 2-6                    [-1, 192, 13, 13]         --
|    └─Conv2d: 2-7                       [-1, 384, 13, 13]         663,936
|    └─ReLU: 2-8                         [-1, 384, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 256, 13, 13]         884,992
|    └─ReLU: 2-10                        [-1, 256, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 256, 13, 13]         590,080
|    └─ReLU: 2-12                        [-1, 256, 13, 13]   

In [23]:
image = torch.rand(32, 3, 224, 224)
model(image)

tensor([[0.4565, 0.5435],
        [0.4988, 0.5012],
        [0.5210, 0.4790],
        [0.4783, 0.5217],
        [0.4716, 0.5284],
        [0.4968, 0.5032],
        [0.4322, 0.5678],
        [0.5366, 0.4634],
        [0.5233, 0.4767],
        [0.4824, 0.5176],
        [0.4891, 0.5109],
        [0.5381, 0.4619],
        [0.4803, 0.5197],
        [0.4733, 0.5267],
        [0.5316, 0.4684],
        [0.5244, 0.4756],
        [0.4705, 0.5295],
        [0.5452, 0.4548],
        [0.4666, 0.5334],
        [0.4877, 0.5123],
        [0.4828, 0.5172],
        [0.4917, 0.5083],
        [0.4442, 0.5558],
        [0.4737, 0.5263],
        [0.4894, 0.5106],
        [0.5184, 0.4816],
        [0.4892, 0.5108],
        [0.5113, 0.4887],
        [0.4806, 0.5194],
        [0.5291, 0.4709],
        [0.4461, 0.5539],
        [0.4831, 0.5169]], grad_fn=<SoftmaxBackward0>)

In [24]:
for p in model.parameters():
    print(p.names, type(p), p.shape, p.requires_grad)

(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([64, 3, 11, 11]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([64]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([192, 64, 5, 5]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([192]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([384, 192, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([384]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 384, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 256, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) True
(None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([512, 9216]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([512]) True
(None, None)

In [25]:
my_transform = v2.Compose([
    v2.Resize(size=(224, 224)),
    v2.ToTensor(),    
])

root = './data/dogs-vs-cats'
dataset = ImageFolder(root, transform=my_transform)

In [26]:
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)

for X_train, y_label in data_loader:
    print(X_train.shape, y_label.shape)
    break

torch.Size([32, 3, 224, 224]) torch.Size([32])


In [27]:
loss_fn = nn.CrossEntropyLoss()

In [28]:
optimizer = optim.Adam(model.parameters())

In [29]:
def fit(model, data_loader, loss_fn, optimizer, epochs):
    for i in range(epochs):
        for X_train, y_label in data_loader:
            optimizer.zero_grad()
            outputs = model(X_train)
            loss = loss_fn(outputs, y_label)
            loss.backward()
            optimizer.step()
    return model

In [30]:
fit(model, data_loader, loss_fn, optimizer, 1)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=512, bias=True)
  

In [31]:
for X_train, y_label in data_loader:
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = loss_fn(outputs, y_label)
    loss.backward()
    optimizer.step()
    break

In [32]:
from torchvision.models import vgg16, VGG16_Weights

model = vgg16(weights=VGG16_Weights.IMAGENET1K_V1)
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [33]:
summary(model, (3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 7, 7]           --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        1,792
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        36,928
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       73,856
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       147,584
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
|    └─Conv2d: 2-11                      [-1, 256, 56, 56]         295,168
|    └─ReLU: 2-12                        [-1, 256, 56, 56]      

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 7, 7]           --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        1,792
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        36,928
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       73,856
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       147,584
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
|    └─Conv2d: 2-11                      [-1, 256, 56, 56]         295,168
|    └─ReLU: 2-12                        [-1, 256, 56, 56]      

In [34]:
for p in model.parameters():
    print(p.names, type(p), p.shape, p.requires_grad)
    p.reqrires_grad =False

(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([64, 3, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([64]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([64, 64, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([64]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([128, 64, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([128]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([128, 128, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([128]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 128, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 256, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) True


In [35]:
model.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 1024),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(1024, 2),
)

In [36]:
summary(model, (3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 7, 7]           --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        1,792
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        36,928
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       73,856
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       147,584
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
|    └─Conv2d: 2-11                      [-1, 256, 56, 56]         295,168
|    └─ReLU: 2-12                        [-1, 256, 56, 56]      

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 7, 7]           --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        1,792
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        36,928
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       73,856
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       147,584
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
|    └─Conv2d: 2-11                      [-1, 256, 56, 56]         295,168
|    └─ReLU: 2-12                        [-1, 256, 56, 56]      

In [37]:
from torchvision.models import resnet50, ResNet50_Weights

model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [48]:
# for p in model.parameters():
#     print(p.names, type(p), p.shape, p.requires_grad)
#     p.reqrires_grad =False

In [49]:
model.fc = nn.Sequential(
    nn.Linear(2048, 512),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(512, 32),
    nn.ReLU(),
    nn.Dropout(),
    nn.Linear(32, 2)
)
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [50]:
summary(model, (3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 64, 112, 112]        9,408
├─BatchNorm2d: 1-2                       [-1, 64, 112, 112]        128
├─ReLU: 1-3                              [-1, 64, 112, 112]        --
├─MaxPool2d: 1-4                         [-1, 64, 56, 56]          --
├─Sequential: 1-5                        [-1, 256, 56, 56]         --
|    └─Bottleneck: 2-1                   [-1, 256, 56, 56]         --
|    |    └─Conv2d: 3-1                  [-1, 64, 56, 56]          4,096
|    |    └─BatchNorm2d: 3-2             [-1, 64, 56, 56]          128
|    |    └─ReLU: 3-3                    [-1, 64, 56, 56]          --
|    |    └─Conv2d: 3-4                  [-1, 64, 56, 56]          36,864
|    |    └─BatchNorm2d: 3-5             [-1, 64, 56, 56]          128
|    |    └─ReLU: 3-6                    [-1, 64, 56, 56]          --
|    |    └─Conv2d: 3-7                  [-1, 256, 56, 56]         16,38

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 64, 112, 112]        9,408
├─BatchNorm2d: 1-2                       [-1, 64, 112, 112]        128
├─ReLU: 1-3                              [-1, 64, 112, 112]        --
├─MaxPool2d: 1-4                         [-1, 64, 56, 56]          --
├─Sequential: 1-5                        [-1, 256, 56, 56]         --
|    └─Bottleneck: 2-1                   [-1, 256, 56, 56]         --
|    |    └─Conv2d: 3-1                  [-1, 64, 56, 56]          4,096
|    |    └─BatchNorm2d: 3-2             [-1, 64, 56, 56]          128
|    |    └─ReLU: 3-3                    [-1, 64, 56, 56]          --
|    |    └─Conv2d: 3-4                  [-1, 64, 56, 56]          36,864
|    |    └─BatchNorm2d: 3-5             [-1, 64, 56, 56]          128
|    |    └─ReLU: 3-6                    [-1, 64, 56, 56]          --
|    |    └─Conv2d: 3-7                  [-1, 256, 56, 56]         16,38

In [51]:
model = resnet50(pretrained=True)
image = torch.rand(32, 3, 224, 224)
model(image)



tensor([[ 0.6606,  0.7701,  0.4644,  ..., -0.3869,  1.2991, -0.3736],
        [-1.8694, -1.0012, -0.7484,  ..., -1.1758,  0.6394,  0.4355],
        [ 0.0532, -1.7126, -1.5921,  ...,  0.3957,  0.5392,  0.4867],
        ...,
        [ 2.7959,  0.4902,  1.6259,  ...,  1.3645,  1.6104,  0.4789],
        [-0.6404, -1.0699, -3.6660,  ..., -1.1782,  1.7109,  1.3718],
        [-1.1707,  0.1454, -1.6684,  ..., -0.1361,  1.6032,  2.6673]],
       grad_fn=<AddmmBackward0>)

In [52]:
my_transform = v2.Compose([
               v2.RandomResizedCrop(size=(224, 224), antialias=True),
               v2.ToTensor()
])

root = './data/catanddog'
dataset = ImageFolder(root, transform=my_transform)
image, label = dataset[0]
type(image), type(label)



(torch.Tensor, int)

In [53]:
data_loader = DataLoader(dataset, batch_size=32)

for X_train, y_label in data_loader:
    outputs = model(X_train)
    print(X_train.shape, y_label.shape)
    print(outputs.shape)
    break

torch.Size([32, 3, 224, 224]) torch.Size([32])
torch.Size([32, 1000])


In [54]:
image_paths = []
labels = []

for dname in os.listdir(root):
    print(dname)
    print(type(dname))
    print(os.path.isdir(Path(root, dname)))
    new_path = Path(root, dname)
    if os.path.isdir(new_path):
        for file in os.listdir(new_path):
            print(file)
            image_paths.append(str(Path(new_path, file)))
            labels.append(dname)

print(len(image_paths), len(labels), labels.count('Cat'), labels.count('Dog'))

Cat
<class 'str'>
True
cat.0.jpg
cat.1.jpg
cat.10.jpg
cat.100.jpg
cat.101.jpg
cat.102.jpg
cat.103.jpg
cat.104.jpg
cat.105.jpg
cat.106.jpg
cat.107.jpg
cat.108.jpg
cat.109.jpg
cat.11.jpg
cat.110.jpg
cat.111.jpg
cat.112.jpg
cat.113.jpg
cat.114.jpg
cat.115.jpg
cat.116.jpg
cat.117.jpg
cat.118.jpg
cat.119.jpg
cat.12.jpg
cat.120.jpg
cat.121.jpg
cat.122.jpg
cat.123.jpg
cat.124.jpg
cat.125.jpg
cat.126.jpg
cat.127.jpg
cat.128.jpg
cat.129.jpg
cat.13.jpg
cat.130.jpg
cat.131.jpg
cat.132.jpg
cat.133.jpg
cat.134.jpg
cat.135.jpg
cat.136.jpg
cat.137.jpg
cat.138.jpg
cat.139.jpg
cat.14.jpg
cat.140.jpg
cat.141.jpg
cat.142.jpg
cat.143.jpg
cat.144.jpg
cat.145.jpg
cat.146.jpg
cat.147.jpg
cat.148.jpg
cat.149.jpg
cat.15.jpg
cat.150.jpg
cat.151.jpg
cat.152.jpg
cat.153.jpg
cat.154.jpg
cat.155.jpg
cat.156.jpg
cat.157.jpg
cat.158.jpg
cat.159.jpg
cat.16.jpg
cat.160.jpg
cat.161.jpg
cat.162.jpg
cat.163.jpg
cat.164.jpg
cat.165.jpg
cat.166.jpg
cat.167.jpg
cat.168.jpg
cat.169.jpg
cat.17.jpg
cat.170.jpg
cat.171.jpg
cat.1

In [55]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [56]:
def train_model(model, data_loader, loss_fn, optimizer, device, num_epochs=13, is_train=True):
    since = time.time()
    acc_history = []
    loss_history = []
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('EPOCH {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            model.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            _, preds = torch.max(outputs, 1)
            loss.backward()
            optimiser.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(data_loader.dataset)
        epoch_acc = running_corrects.double() / len(data_loader.dataset)

        print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))

        if epoch_acc > best_acc:
            best_acc = epoch_acc

        acc_history.append(epoch_acc.item())
        loss_history.append(epoch_loss)
        torch.save(model.state_dict(), os.path.join('./data/dogs-vs-cats', '{0.0=2d}.pth'.format(epoch)))
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best Acc: {:4f}'.format(best_acc))
    return acc_history, loss_history

In [57]:
params_to_update = []
for name, param in resnet50.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t", name)

optimizer = optim.Adam(params_to_update)

AttributeError: 'function' object has no attribute 'named_parameters'

In [None]:
'cuda' if