In [1]:
import torch


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from torchvision import transforms


class ImageTransform:
    def __init__(self, resize, mean, std) -> None:
        self.data_transform = {
            "train": transforms.Compose(
                [
                    transforms.RandomResizedCrop(resize, scale=(0.5, 1.0)),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean, std),
                ]
            ),
            "val": transforms.Compose(
                [
                    transforms.Resize(256),
                    transforms.CenterCrop(resize),
                    transforms.ToTensor(),
                    transforms.Normalize(mean, std),
                ]
            ),
        }

    def __call__(self, img, phase):
        return self.data_transform[phase](img)


In [3]:
import os
import random
import cv2

cat_directory = os.path.join("..", "dataset", "chap06", "dogs-vs-cats", "Cat")
dog_directory = os.path.join("..", "dataset", "chap06", "dogs-vs-cats", "Dog")
cat_image_file_paths = sorted(
    [os.path.join(cat_directory, file_name) for file_name in os.listdir(cat_directory)]
)
dog_image_file_paths = sorted(
    [os.path.join(dog_directory, file_name) for file_name in os.listdir(dog_directory)]
)
image_filepaths = [*cat_image_file_paths, *dog_image_file_paths]
correct_images_filepaths = [
    path for path in image_filepaths if cv2.imread(path) is not None
]
random.seed(42)
random.shuffle(correct_images_filepaths)
train_images_filepaths = correct_images_filepaths[:400]
val_images_filepaths = correct_images_filepaths[400:-10]
test_images_filepaths = correct_images_filepaths[-10:]


In [4]:
from torch.utils.data import Dataset
from PIL import Image


class DogVsCatDataset(Dataset):
    def __init__(self, file_list, transform=None, phase="train") -> None:
        super().__init__()
        self.file_list = file_list
        self.transform = transform
        self.phase = phase

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        img_path = self.file_list[index]
        img = Image.open(img_path)
        img_transformed = self.transform(img, self.phase)
        label = img_path.split("/")[-1].split(".")[0]
        if label == "dog":
            label = 1
        elif label == "cat":
            label = 0
        return img_transformed, label


In [5]:
size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32


In [6]:
train_dataset = DogVsCatDataset(
    file_list=train_images_filepaths,
    transform=ImageTransform(size, mean, std),
    phase="train",
)
val_dataset = DogVsCatDataset(
    file_list=val_images_filepaths,
    transform=ImageTransform(size, mean, std),
    phase="val",
)


In [7]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size)
dataloader_dict = {"train": train_dataloader, "val": val_dataloader}


In [8]:
from torch import nn


class LeNet(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.cnn1 = nn.Conv2d(
            in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=0
        )
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
        
        self.cnn2 = nn.Conv2d(
            in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0
        )
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        
        self.fc1 = nn.Linear(32 * 53 * 53, 512)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(512, 2)
        self.ouput = nn.Softmax(dim=1)
    
    def forward(self, x):
        x = self.cnn1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.cnn2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        x = self.ouput(x)
        return x

In [9]:
model = LeNet()
print(model)

LeNet(
  (cnn1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))
  (relu1): ReLU()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (cnn2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=89888, out_features=512, bias=True)
  (relu3): ReLU()
  (fc2): Linear(in_features=512, out_features=2, bias=True)
  (ouput): Softmax(dim=1)
)


In [10]:
from torchsummary import summary
summary(model, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 220, 220]           1,216
              ReLU-2         [-1, 16, 220, 220]               0
         MaxPool2d-3         [-1, 16, 110, 110]               0
            Conv2d-4         [-1, 32, 106, 106]          12,832
              ReLU-5         [-1, 32, 106, 106]               0
         MaxPool2d-6           [-1, 32, 53, 53]               0
            Linear-7                  [-1, 512]      46,023,168
              ReLU-8                  [-1, 512]               0
            Linear-9                    [-1, 2]           1,026
          Softmax-10                    [-1, 2]               0
Total params: 46,038,242
Trainable params: 46,038,242
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 19.47
Params size (MB): 175.62
Es

In [11]:
from torch import optim

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()

In [12]:
from time import time
from tqdm import tqdm


def train_model(model, dataloader_dict, criterion, optimizer, num_epoch, device):
    since = time()
    best_acc = 0.0
    for epoch in range(num_epoch):
        print("="* 20)
        print(f"Epoch{epoch+1:3}/{num_epoch:3}")
        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            elif phase == "val":
                model.eval()
            else:
                raise KeyError

            epoch_loss = 0.0
            epoch_corrects = 0
            for inputs, labels in tqdm(dataloader_dict[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, dim=1)
                    loss = criterion(outputs, labels)
                    if phase == "train":
                        loss.backward()
                        optimizer.step()
                    epoch_loss += loss.item() * inputs.size(0)
                    epoch_corrects += torch.sum(preds == labels.data)
            epoch_loss = epoch_loss / len(dataloader_dict[phase].dataset)
            epoch_acc = epoch_corrects.double() / len(dataloader_dict[phase].dataset)
            print(f"{phase:5} Loss: {epoch_loss :.4f} Acc: {epoch_acc:.4f}")
            if phase == "val" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_weights = model.state_dict()
    time_elapsed = time() - since
    print(f"Training complete in {time_elapsed // 60}m {time_elapsed % 60:.1f}s")
    print("Best val Acc", best_acc)
    return model, best_model_weights


num_epoch = 10
from torch import device
from torch.cuda import is_available

model, best_model_weights = train_model(
    model,
    dataloader_dict,
    criterion,
    optimizer,
    num_epoch,
    device("cuda" if is_available() else "cpu"),
)


Epoch  1/ 10


100%|██████████| 13/13 [00:09<00:00,  1.34it/s]


train Loss: 0.6942 Acc: 0.4725


100%|██████████| 3/3 [00:02<00:00,  1.48it/s]


val   Loss: 0.6946 Acc: 0.5217
Epoch  2/ 10


100%|██████████| 13/13 [00:13<00:00,  1.04s/it]


train Loss: 0.6893 Acc: 0.5300


100%|██████████| 3/3 [00:02<00:00,  1.27it/s]


val   Loss: 0.6964 Acc: 0.4565
Epoch  3/ 10


100%|██████████| 13/13 [00:14<00:00,  1.15s/it]


train Loss: 0.6861 Acc: 0.5750


100%|██████████| 3/3 [00:01<00:00,  1.60it/s]


val   Loss: 0.6981 Acc: 0.5000
Epoch  4/ 10


100%|██████████| 13/13 [00:15<00:00,  1.17s/it]


train Loss: 0.6815 Acc: 0.5675


100%|██████████| 3/3 [00:02<00:00,  1.39it/s]


val   Loss: 0.7022 Acc: 0.5217
Epoch  5/ 10


100%|██████████| 13/13 [00:12<00:00,  1.02it/s]


train Loss: 0.6788 Acc: 0.6175


100%|██████████| 3/3 [00:01<00:00,  1.55it/s]


val   Loss: 0.7010 Acc: 0.5543
Epoch  6/ 10


100%|██████████| 13/13 [00:15<00:00,  1.19s/it]


train Loss: 0.6695 Acc: 0.6575


100%|██████████| 3/3 [00:02<00:00,  1.40it/s]


val   Loss: 0.7052 Acc: 0.5217
Epoch  7/ 10


100%|██████████| 13/13 [00:16<00:00,  1.28s/it]


train Loss: 0.6710 Acc: 0.6225


100%|██████████| 3/3 [00:01<00:00,  1.52it/s]


val   Loss: 0.7027 Acc: 0.5652
Epoch  8/ 10


100%|██████████| 13/13 [00:14<00:00,  1.09s/it]


train Loss: 0.6625 Acc: 0.6250


100%|██████████| 3/3 [00:01<00:00,  1.62it/s]


val   Loss: 0.6928 Acc: 0.5761
Epoch  9/ 10


100%|██████████| 13/13 [00:15<00:00,  1.18s/it]


train Loss: 0.6614 Acc: 0.6225


100%|██████████| 3/3 [00:01<00:00,  1.51it/s]


val   Loss: 0.6962 Acc: 0.5870
Epoch 10/ 10


100%|██████████| 13/13 [00:13<00:00,  1.05s/it]


train Loss: 0.6630 Acc: 0.5925


100%|██████████| 3/3 [00:01<00:00,  1.70it/s]

val   Loss: 0.6956 Acc: 0.5761
Training complete in 2.0m 41.6s
Best val Acc tensor(0.5870, dtype=torch.float64)



