In [1]:
import os
import torch as torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchsummary import summary
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np

In [2]:
torch.manual_seed(69)

<torch._C.Generator at 0x25d4e11d590>

In [3]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
transforms = transforms.Compose([
    transforms.Resize((227, 227)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [5]:
train_dir = '../../data/cat_dog/training_set'
test_dir = '../../data/cat_dog/test_set'

train_dataset = ImageFolder(train_dir, transform=transforms)
test_dataset = ImageFolder(test_dir, transform=transforms)

In [6]:
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [7]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, 11, 4)
        self.pool1 = nn.MaxPool2d(3, 2)
        self.conv2 = nn.Conv2d(96, 256, 5, 1, 2)
        self.pool2 = nn.MaxPool2d(3, 2)
        self.conv3 = nn.Conv2d(256, 384, 3, 1, 1)
        self.conv4 = nn.Conv2d(384, 384, 3, 1, 1)
        self.conv5 = nn.Conv2d(384, 256, 3, 1, 1)
        self.pool3 = nn.MaxPool2d(3, 2)
        self.fc1 = nn.Linear(256 * 6 * 6, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, 2)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()

        self._initialize_weights()

    def forward(self, x):
        # [32, 3, 224, 224]
        x = self.pool1(self.relu(self.conv1(x)))
        # [32, 3, 224, 224] -> [32, 96, 55, 55]
        x = self.pool2(self.relu(self.conv2(x)))
        # [32, 96, 55, 55] -> [32, 256, 13, 13]
        x = self.relu(self.conv3(x))
        # [32, 256, 13, 13] -> [32, 384, 13, 13]
        x = self.relu(self.conv4(x))
        # [32, 384, 13, 13] -> [32, 384, 13, 13]
        x = self.pool3(self.relu(self.conv5(x)))
        # [32, 384, 13, 13] -> [32, 256, 6, 6]
        x = x.view(-1, 256 * 6 * 6)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.dropout(self.relu(self.fc2(x)))
        x = self.fc3(x)
        return x
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.constant_(m.bias, 0)

In [8]:
model = AlexNet().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)


In [9]:
summary(model, (3, 227, 227), 32)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [32, 96, 55, 55]          34,944
              ReLU-2           [32, 96, 55, 55]               0
         MaxPool2d-3           [32, 96, 27, 27]               0
            Conv2d-4          [32, 256, 27, 27]         614,656
              ReLU-5          [32, 256, 27, 27]               0
         MaxPool2d-6          [32, 256, 13, 13]               0
            Conv2d-7          [32, 384, 13, 13]         885,120
              ReLU-8          [32, 384, 13, 13]               0
            Conv2d-9          [32, 384, 13, 13]       1,327,488
             ReLU-10          [32, 384, 13, 13]               0
           Conv2d-11          [32, 256, 13, 13]         884,992
             ReLU-12          [32, 256, 13, 13]               0
        MaxPool2d-13            [32, 256, 6, 6]               0
           Linear-14                 [3

In [10]:
def train_one_epoch(model, dataloader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(dataloader):
        inputs, labels = data
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(dataloader)

In [11]:
def evaluate(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(dataloader):
            inputs, labels = data
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return running_loss / len(dataloader), correct / total

In [12]:
epoch = 20
for i in range(epoch):
    train_loss = train_one_epoch(model, train_dataloader, optimizer, criterion)
    test_loss, test_acc = evaluate(model, test_dataloader, criterion)
    print(f'epoch: {i}, train_loss: {train_loss}, test_loss: {test_loss}, test_acc: {test_acc}')

epoch: 0, train_loss: 1.6279557663131046, test_loss: 0.6170331100001931, test_acc: 0.6569451309935739
epoch: 1, train_loss: 0.6401617346531842, test_loss: 0.6692647703457624, test_acc: 0.6129510627780524
epoch: 2, train_loss: 0.6033247893312538, test_loss: 0.6074794288724661, test_acc: 0.6791893227879388
epoch: 3, train_loss: 0.5879450204125438, test_loss: 0.5643278704956174, test_acc: 0.7113198220464656
epoch: 4, train_loss: 0.5280114748325956, test_loss: 0.5167098930105567, test_acc: 0.7523479980227386
epoch: 5, train_loss: 0.5032513791108987, test_loss: 0.5699457908049226, test_acc: 0.708353929807217
epoch: 6, train_loss: 0.4679032469175726, test_loss: 0.48292724695056677, test_acc: 0.782501235788433
epoch: 7, train_loss: 0.4422226379592105, test_loss: 0.4608532334677875, test_acc: 0.7795353435491844
epoch: 8, train_loss: 0.4011144198150749, test_loss: 0.45840863115154207, test_acc: 0.7849728126544736
epoch: 9, train_loss: 0.3733643940779792, test_loss: 0.46402191009838134, test_acc