In [1]:
import os
import torch as torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchsummary import summary
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np

In [None]:
torch.manual_seed(69)

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
transforms = transforms.Compose([
    transforms.Resize((227, 227)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [4]:
train_dir = '../../data/cat_dog/training_set'
test_dir = '../../data/cat_dog/test_set'

train_dataset = ImageFolder(train_dir, transform=transforms)
test_dataset = ImageFolder(test_dir, transform=transforms)

In [5]:
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, 11, 4)
        self.pool1 = nn.MaxPool2d(3, 2)
        self.conv2 = nn.Conv2d(96, 256, 5, 1, 2)
        self.pool2 = nn.MaxPool2d(3, 2)
        self.conv3 = nn.Conv2d(256, 384, 3, 1, 1)
        self.conv4 = nn.Conv2d(384, 384, 3, 1, 1)
        self.conv5 = nn.Conv2d(384, 256, 3, 1, 1)
        self.pool3 = nn.MaxPool2d(3, 2)
        self.fc1 = nn.Linear(256 * 6 * 6, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, 2)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()

    def forward(self, x):
        # [32, 3, 224, 224]
        x = self.pool1(self.relu(self.conv1(x)))
        # [32, 3, 224, 224] -> [32, 96, 55, 55]
        x = self.pool2(self.relu(self.conv2(x)))
        # [32, 96, 55, 55] -> [32, 256, 13, 13]
        x = self.relu(self.conv3(x))
        # [32, 256, 13, 13] -> [32, 384, 13, 13]
        x = self.relu(self.conv4(x))
        # [32, 384, 13, 13] -> [32, 384, 13, 13]
        x = self.pool3(self.relu(self.conv5(x)))
        # [32, 384, 13, 13] -> [32, 256, 6, 6]
        x = x.view(-1, 256 * 6 * 6)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.dropout(self.relu(self.fc2(x)))
        x = self.fc3(x)
        return x

In [7]:
model = AlexNet().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)


In [8]:
summary(model, (3, 227, 227), 32)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [32, 96, 55, 55]          34,944
              ReLU-2           [32, 96, 55, 55]               0
         MaxPool2d-3           [32, 96, 27, 27]               0
            Conv2d-4          [32, 256, 27, 27]         614,656
              ReLU-5          [32, 256, 27, 27]               0
         MaxPool2d-6          [32, 256, 13, 13]               0
            Conv2d-7          [32, 384, 13, 13]         885,120
              ReLU-8          [32, 384, 13, 13]               0
            Conv2d-9          [32, 384, 13, 13]       1,327,488
             ReLU-10          [32, 384, 13, 13]               0
           Conv2d-11          [32, 256, 13, 13]         884,992
             ReLU-12          [32, 256, 13, 13]               0
        MaxPool2d-13            [32, 256, 6, 6]               0
           Linear-14                 [3

In [9]:
def train_one_epoch(model, dataloader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(dataloader):
        inputs, labels = data
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(dataloader)

In [10]:
def evaluate(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(dataloader):
            inputs, labels = data
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return running_loss / len(dataloader), correct / total

In [11]:
epoch = 10
for i in range(epoch):
    train_loss = train_one_epoch(model, train_dataloader, optimizer, criterion)
    test_loss, test_acc = evaluate(model, test_dataloader, criterion)
    print(f'epoch: {i}, train_loss: {train_loss}, test_loss: {test_loss}, test_acc: {test_acc}')

epoch: 0, train_loss: 0.6688631000034363, test_loss: 0.692940064240247, test_acc: 0.5570934256055363
epoch: 1, train_loss: 0.6263032778800721, test_loss: 0.5939315976575017, test_acc: 0.6846267918932278
epoch: 2, train_loss: 0.5689054595759191, test_loss: 0.5799907816108316, test_acc: 0.6994562530894711
epoch: 3, train_loss: 0.4860276678406385, test_loss: 0.44768622354604304, test_acc: 0.7938704893722195
epoch: 4, train_loss: 0.41842166070206704, test_loss: 0.4907483458518982, test_acc: 0.7849728126544736
epoch: 5, train_loss: 0.36240037592996166, test_loss: 0.39131883764639497, test_acc: 0.8363816114681166
epoch: 6, train_loss: 0.316205856957996, test_loss: 0.37085045129060745, test_acc: 0.8329214038556599
epoch: 7, train_loss: 0.2723030579755031, test_loss: 0.3677449888782576, test_acc: 0.8269896193771626
epoch: 8, train_loss: 0.21494586589148795, test_loss: 0.4089826731942594, test_acc: 0.8205635195254573
epoch: 9, train_loss: 0.1662838514745829, test_loss: 0.4058612852822989, test_