In [9]:
from torchvision import transforms


class ImageTransform:
    def __init__(self, resize, mean, std) -> None:
        self.data_transform = {
            "train": transforms.Compose(
                [
                    transforms.RandomResizedCrop(resize, scale=(0.5, 1.0)),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean, std),
                ]
            ),
            "val": transforms.Compose(
                [
                    transforms.Resize(256),
                    transforms.CenterCrop(resize),
                    transforms.ToTensor(),
                    transforms.Normalize(mean, std),
                ]
            ),
        }

    def __call__(self, img, phase):
        return self.data_transform[phase](img)


In [10]:
import os
import random
import cv2

cat_directory = os.path.join("..", "dataset", "chap06", "dogs-vs-cats", "Cat")
dog_directory = os.path.join("..", "dataset", "chap06", "dogs-vs-cats", "Dog")
cat_image_file_paths = sorted(
    [os.path.join(cat_directory, file_name) for file_name in os.listdir(cat_directory)]
)
dog_image_file_paths = sorted(
    [os.path.join(dog_directory, file_name) for file_name in os.listdir(dog_directory)]
)
image_filepaths = [*cat_image_file_paths, *dog_image_file_paths]
correct_images_filepaths = [
    path for path in image_filepaths if cv2.imread(path) is not None
]
random.seed(42)
random.shuffle(correct_images_filepaths)
train_images_filepaths = correct_images_filepaths[:400]
val_images_filepaths = correct_images_filepaths[400:-10]
test_images_filepaths = correct_images_filepaths[-10:]


In [40]:
from torch.utils.data import Dataset
from PIL import Image


class DogVsCatDataset(Dataset):
    def __init__(self, file_list, transform=None, phase="train") -> None:
        super().__init__()
        self.file_list = file_list
        self.transform = transform
        self.phase = phase

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        img_path = self.file_list[index]
        img = Image.open(img_path)
        img_transformed = self.transform(img, self.phase)
        label = img_path.split("/")[-1].split(".")[0]
        if label == "dog":
            label = 1
        elif label == "cat":
            label = 0
        return img_transformed, label


In [41]:
size = 256
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32


In [42]:
train_dataset = DogVsCatDataset(train_images_filepaths, transform=ImageTransform(size, mean, std), phase='train')
val_dataset = DogVsCatDataset(val_images_filepaths, transform=ImageTransform(size, mean, std), phase='val')
test_dataset = DogVsCatDataset(val_images_filepaths, transform=ImageTransform(size, mean, std), phase='val')

index = 0
print(train_dataset.__getitem__(index)[0].size())
print(train_dataset.__getitem__(index)[1])

torch.Size([3, 256, 256])
0


In [43]:
from torch.utils.data import DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
dataloader_dict = {'train': train_dataloader, 'val': val_dataloader}

batch_iterator = iter(train_dataloader)
inputs, label = next(batch_iterator)
print(inputs.size())
print(label)

torch.Size([32, 3, 256, 256])
tensor([0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1,
        0, 0, 0, 0, 1, 0, 1, 0])


In [27]:
from torch import nn
import torch
class AlexNet(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256*6*6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 2),
        )
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [28]:
from torchsummary import summary


model = AlexNet()
summary(model, input_size=(3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 63, 63]          23,296
              ReLU-2           [-1, 64, 63, 63]               0
         MaxPool2d-3           [-1, 64, 31, 31]               0
            Conv2d-4          [-1, 192, 31, 31]         307,392
              ReLU-5          [-1, 192, 31, 31]               0
         MaxPool2d-6          [-1, 192, 15, 15]               0
            Conv2d-7          [-1, 384, 15, 15]         663,936
              ReLU-8          [-1, 384, 15, 15]               0
            Conv2d-9          [-1, 256, 15, 15]         884,992
             ReLU-10          [-1, 256, 15, 15]               0
           Conv2d-11          [-1, 256, 15, 15]         590,080
             ReLU-12          [-1, 256, 15, 15]               0
        MaxPool2d-13            [-1, 256, 7, 7]               0
AdaptiveAvgPool2d-14            [-1, 25

In [29]:
from torch import optim

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
cirterion = nn.CrossEntropyLoss()

In [30]:
from time import time
from tqdm import tqdm


def train_model(model, dataloader_dict, criterion, optimizer, num_epoch, device):
    since = time()
    best_acc = 0.0
    for epoch in range(num_epoch):
        print("="* 20)
        print(f"Epoch{epoch+1:3}/{num_epoch:3}")
        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            elif phase == "val":
                model.eval()
            else:
                raise KeyError

            epoch_loss = 0.0
            epoch_corrects = 0
            for inputs, labels in tqdm(dataloader_dict[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, dim=1)
                    loss = criterion(outputs, labels)
                    if phase == "train":
                        loss.backward()
                        optimizer.step()
                    epoch_loss += loss.item() * inputs.size(0)
                    epoch_corrects += torch.sum(preds == labels.data)
            epoch_loss = epoch_loss / len(dataloader_dict[phase].dataset)
            epoch_acc = epoch_corrects.double() / len(dataloader_dict[phase].dataset)
            print(f"{phase:5} Loss: {epoch_loss :.4f} Acc: {epoch_acc:.4f}")
            if phase == "val" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_weights = model.state_dict()
    time_elapsed = time() - since
    print(f"Training complete in {time_elapsed // 60}m {time_elapsed % 60:.1f}s")
    print("Best val Acc", best_acc)
    return model, best_model_weights


In [33]:
num_epoch = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = train_model(model, dataloader_dict, cirterion, optimizer, num_epoch, device)

Epoch  1/ 10


100%|██████████| 13/13 [00:12<00:00,  1.02it/s]


train Loss: 0.6931 Acc: 0.5025


100%|██████████| 3/3 [00:01<00:00,  2.26it/s]


val   Loss: 0.6929 Acc: 0.5109
Epoch  2/ 10


100%|██████████| 13/13 [00:13<00:00,  1.03s/it]


train Loss: 0.6930 Acc: 0.5025


100%|██████████| 3/3 [00:01<00:00,  1.80it/s]


val   Loss: 0.6930 Acc: 0.5109
Epoch  3/ 10


100%|██████████| 13/13 [00:14<00:00,  1.09s/it]


train Loss: 0.6933 Acc: 0.5000


100%|██████████| 3/3 [00:01<00:00,  1.94it/s]


val   Loss: 0.6930 Acc: 0.5109
Epoch  4/ 10


100%|██████████| 13/13 [00:14<00:00,  1.11s/it]


train Loss: 0.6931 Acc: 0.5025


100%|██████████| 3/3 [00:01<00:00,  2.16it/s]


val   Loss: 0.6929 Acc: 0.5109
Epoch  5/ 10


100%|██████████| 13/13 [00:17<00:00,  1.31s/it]


train Loss: 0.6931 Acc: 0.5025


100%|██████████| 3/3 [00:01<00:00,  1.67it/s]


val   Loss: 0.6929 Acc: 0.5109
Epoch  6/ 10


100%|██████████| 13/13 [00:18<00:00,  1.42s/it]


train Loss: 0.6931 Acc: 0.5025


100%|██████████| 3/3 [00:02<00:00,  1.47it/s]


val   Loss: 0.6929 Acc: 0.5109
Epoch  7/ 10


100%|██████████| 13/13 [00:21<00:00,  1.67s/it]


train Loss: 0.6931 Acc: 0.5025


100%|██████████| 3/3 [00:02<00:00,  1.18it/s]


val   Loss: 0.6929 Acc: 0.5109
Epoch  8/ 10


100%|██████████| 13/13 [00:22<00:00,  1.76s/it]


train Loss: 0.6928 Acc: 0.5025


100%|██████████| 3/3 [00:02<00:00,  1.34it/s]


val   Loss: 0.6929 Acc: 0.5109
Epoch  9/ 10


100%|██████████| 13/13 [00:26<00:00,  2.05s/it]


train Loss: 0.6931 Acc: 0.5025


100%|██████████| 3/3 [00:03<00:00,  1.15s/it]


val   Loss: 0.6929 Acc: 0.5109
Epoch 10/ 10


100%|██████████| 13/13 [00:24<00:00,  1.85s/it]


train Loss: 0.6931 Acc: 0.5025


100%|██████████| 3/3 [00:02<00:00,  1.23it/s]

val   Loss: 0.6929 Acc: 0.5109
Training complete in 3.0m 26.3s
Best val Acc tensor(0.5109, dtype=torch.float64)



