![](./img/alexnet.png)

In [19]:
from pathlib import Path
from PIL import Image

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision import models
from sklearn.metrics import accuracy_score

In [2]:
train_data_path = Path("data/train")

In [20]:
alexnet = models.alexnet(pretrained=False)

In [3]:
class ImageDataset(Dataset):
    
    def __init__(self, files, transform):
        self.files = files
        self.transform = transform
        
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, i):
        file = self.files[i]
        img = Image.open(file)
        tensor = self.transform(img)
        label = self.files[i].name.split(".")[0]
        return tensor, int(label == "cat")

In [4]:
file_list = list(train_data_path.rglob("*.jpg"))

In [5]:
np.random.shuffle(file_list)

In [6]:
file_list[:10]

[PosixPath('data/train/dog.3751.jpg'),
 PosixPath('data/train/cat.6251.jpg'),
 PosixPath('data/train/dog.1754.jpg'),
 PosixPath('data/train/cat.8025.jpg'),
 PosixPath('data/train/dog.10299.jpg'),
 PosixPath('data/train/dog.9901.jpg'),
 PosixPath('data/train/cat.10744.jpg'),
 PosixPath('data/train/dog.5922.jpg'),
 PosixPath('data/train/dog.10292.jpg'),
 PosixPath('data/train/cat.221.jpg')]

In [7]:
valid_size = int(0.2 * len(file_list))

In [8]:
valid_size

5000

In [9]:
len(file_list) - valid_size

20000

In [10]:
train_files = file_list[:-valid_size]
valid_files = file_list[-valid_size:]

In [11]:
train_transform = transforms.Compose([
    transforms.Resize(227),
    transforms.RandomCrop(227),
    transforms.ToTensor()
])

valid_transform = transforms.Compose([
    transforms.Resize(227),
    transforms.CenterCrop(227),
    transforms.ToTensor()
])

In [12]:
train_dataset = ImageDataset(train_files, train_transform)
valid_dataset = ImageDataset(train_files, valid_transform)

(batch_size, n_chanels, height, width)

In [13]:
train_loader = DataLoader(train_dataset, batch_size=4)
valid_loader = DataLoader(valid_dataset, batch_size=4)

In [14]:
class AlexNet(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, stride=(4, 4), kernel_size=(11, 11))
        self.relu1 = nn.ReLU(inplace=True)
        self.max_pool1 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2))
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
        self.relu2 = nn.ReLU(inplace=True)
        self.max_pool2 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2))
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.relu3 = nn.ReLU(inplace=True)
        self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.relu4 = nn.ReLU(inplace=True)
        self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.relu5 = nn.ReLU(inplace=True)
        self.max_pool3 = nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2))
        self.flatten = nn.Flatten()
        
        self.fc = nn.Sequential(
            nn.Dropout(),
            nn.Linear(9216, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 1)
        )
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.max_pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.max_pool2(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.conv4(x)
        x = self.relu4(x)
        x = self.conv5(x)
        x = self.relu5(x)
        x = self.max_pool3(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

In [15]:
net = AlexNet()
net = net

In [16]:
optimizer = optim.Adam(net.parameters(), lr=1e-3)

In [17]:
criteria = nn.BCEWithLogitsLoss()

In [21]:
for epoch in range(10):
    for i, (x, y) in enumerate(train_loader):
        
        optimizer.zero_grad()
        
        x = x
        y = y.unsqueeze(1).float()
        output = net(x)
        loss = criteria(output, y)
        loss.backward()
        
        optimizer.step()
        
        if (i + 1) % 100 == 0:
            print(f"Epoch {epoch + 1} / 10, iteration {i + 1} / {len(train_loader)}, loss: {loss.item():.3f}")
        
    y_true = []
    y_pred = []
    
    net.eval()
    for i, (x, y) in enumerate(valid_loader):
        with torch.no_grad():
            x = x.to("cuda")
            y = y.numpy().flatten()
            
            output = F.sigmoid(net(x))
            output = output.cpu().numpy().flatten() > 0.5
            y_true.extend(y)
            y_pred.extend(output)
            
    score = accuracy_score(y_true, y_pred)
    print(f"Epoch {epoch + 1} / 10, accuracy: {score:.3f}")

Epoch 1 / 10, iteration 100 / 5000, loss: 0.616


KeyboardInterrupt: 