## Сверточная нейронная сеть для бинарной классификации

In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import cv2
import sklearn
import PIL
import torch
import torchvision
from matplotlib import cm
import torch

import warnings
warnings.filterwarnings('ignore')

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
import os
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
import torch.optim.lr_scheduler as lr_scheduler

### Подгрузка данных

In [3]:
data_dir = '/kaggle/input/dl-hw3'

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

full_data = ImageFolder(data_dir, transform=transform)

train_size = int(0.8 * len(full_data))
val_size = int(0.1 * len(full_data))
test_size = len(full_data) - train_size - val_size

train_data, val_data, test_data = random_split(full_data, [train_size, val_size, test_size])

train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False, num_workers=4)

In [4]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.model = nn.Sequential(            
            nn.Conv2d(3, 32, kernel_size=5, padding=1), 
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.fc = nn.Sequential(
            nn.Linear(256 * 7 * 7, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 1),
        )

    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [5]:
model = CNN()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

In [6]:
# Для логирования в Tensorboard
writer = SummaryWriter('logs')

In [7]:
def save_checkpoint(state, filename):
    """Сохранение состояния модели"""
    torch.save(state, filename)


def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, start_epoch=0):
    best_val_acc = 0.0

    for epoch in tqdm(range(start_epoch, num_epochs)):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().view(-1, 1)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            predicted = (outputs > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_acc = correct / total

        val_loss, val_acc = evaluate_model(model, val_loader, criterion)

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}, '
              f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}')

        writer.add_scalar('Loss/train', train_loss, epoch)
        writer.add_scalar('Accuracy/train', train_acc, epoch)
        writer.add_scalar('Loss/val', val_loss, epoch)
        writer.add_scalar('Accuracy/val', val_acc, epoch)

        scheduler.step(val_loss)

        if val_acc > best_val_acc:
            print(f'Validation accuracy improved from {best_val_acc:.4f} to {val_acc:.4f}. Saving model...')
            best_val_acc = val_acc

            checkpoint = {
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': val_loss,
                'accuracy': val_acc
            }
            save_checkpoint(checkpoint, 'cnn_best_checkpoint.pth')


def evaluate_model(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device).float().view(-1, 1)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            predicted = (outputs > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    loss = running_loss / len(loader)
    accuracy = correct / total
    return loss, accuracy


In [8]:
train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=20)

  5%|▌         | 1/20 [16:51<5:20:19, 1011.54s/it]

Epoch [1/20], Loss: 0.3503, Accuracy: 0.8439, Val Loss: 0.2829, Val Accuracy: 0.8837

Validation accuracy improved from 0.0000 to 0.8837. Saving model...


 10%|█         | 2/20 [28:31<4:08:27, 828.21s/it] 

Epoch [2/20], Loss: 0.2796, Accuracy: 0.8818, Val Loss: 0.2500, Val Accuracy: 0.8925

Validation accuracy improved from 0.8837 to 0.8925. Saving model...


 15%|█▌        | 3/20 [40:05<3:37:20, 767.09s/it]

Epoch [3/20], Loss: 0.2504, Accuracy: 0.8963, Val Loss: 0.2239, Val Accuracy: 0.9067

Validation accuracy improved from 0.8925 to 0.9067. Saving model...


 20%|██        | 4/20 [51:41<3:17:03, 739.00s/it]

Epoch [4/20], Loss: 0.2337, Accuracy: 0.9045, Val Loss: 0.2101, Val Accuracy: 0.9131

Validation accuracy improved from 0.9067 to 0.9131. Saving model...


 25%|██▌       | 5/20 [1:03:21<3:01:13, 724.88s/it]

Epoch [5/20], Loss: 0.2202, Accuracy: 0.9105, Val Loss: 0.1974, Val Accuracy: 0.9210

Validation accuracy improved from 0.9131 to 0.9210. Saving model...


 30%|███       | 6/20 [1:14:56<2:46:44, 714.59s/it]

Epoch [6/20], Loss: 0.2110, Accuracy: 0.9147, Val Loss: 0.2027, Val Accuracy: 0.9140

Epoch [7/20], Loss: 0.2033, Accuracy: 0.9181, Val Loss: 0.1922, Val Accuracy: 0.9245

Validation accuracy improved from 0.9210 to 0.9245. Saving model...


 40%|████      | 8/20 [1:38:22<2:21:46, 708.84s/it]

Epoch [8/20], Loss: 0.1950, Accuracy: 0.9215, Val Loss: 0.1920, Val Accuracy: 0.9203


 45%|████▌     | 9/20 [1:50:04<2:09:30, 706.45s/it]

Epoch [9/20], Loss: 0.1900, Accuracy: 0.9242, Val Loss: 0.1892, Val Accuracy: 0.9285

Validation accuracy improved from 0.9245 to 0.9285. Saving model...

Epoch [10/20], Loss: 0.1849, Accuracy: 0.9259, Val Loss: 0.1838, Val Accuracy: 0.9298

Validation accuracy improved from 0.9285 to 0.9298. Saving model...


 55%|█████▌    | 11/20 [2:13:33<1:45:48, 705.39s/it]

Epoch [11/20], Loss: 0.1799, Accuracy: 0.9281, Val Loss: 0.1838, Val Accuracy: 0.9255

Epoch [12/20], Loss: 0.1750, Accuracy: 0.9305, Val Loss: 0.1807, Val Accuracy: 0.9300

Validation accuracy improved from 0.9298 to 0.9300. Saving model...


 65%|██████▌   | 13/20 [2:36:48<1:21:48, 701.22s/it]

Epoch [13/20], Loss: 0.1707, Accuracy: 0.9312, Val Loss: 0.1849, Val Accuracy: 0.9273


 70%|███████   | 14/20 [2:48:23<1:09:55, 699.20s/it]

Epoch [14/20], Loss: 0.1670, Accuracy: 0.9335, Val Loss: 0.1847, Val Accuracy: 0.9262


 75%|███████▌  | 15/20 [3:00:02<58:16, 699.31s/it]  

Epoch [15/20], Loss: 0.1640, Accuracy: 0.9350, Val Loss: 0.1821, Val Accuracy: 0.9286

Epoch [16/20], Loss: 0.1596, Accuracy: 0.9364, Val Loss: 0.1885, Val Accuracy: 0.9319

Validation accuracy improved from 0.9300 to 0.9319. Saving model...


 85%|████████▌ | 17/20 [3:23:19<34:53, 697.88s/it]

Epoch [17/20], Loss: 0.1447, Accuracy: 0.9429, Val Loss: 0.1781, Val Accuracy: 0.9341

Validation accuracy improved from 0.9319 to 0.9341. Saving model...


 90%|█████████ | 18/20 [3:35:11<23:24, 702.17s/it]

Epoch [18/20], Loss: 0.1391, Accuracy: 0.9448, Val Loss: 0.1820, Val Accuracy: 0.9337


 95%|█████████▌| 19/20 [3:46:53<11:41, 701.91s/it]

Epoch [19/20], Loss: 0.1352, Accuracy: 0.9464, Val Loss: 0.1793, Val Accuracy: 0.9310


100%|██████████| 20/20 [3:58:17<00:00, 714.87s/it]

Epoch [20/20], Loss: 0.1335, Accuracy: 0.9469, Val Loss: 0.1771, Val Accuracy: 0.9319





![title](img/tensorboard_accuracy.png)

![title](img/tensorboard_loss.png)

### Инференс

In [12]:
def load_checkpoint(filename):

    checkpoint = torch.load(filename, map_location=torch.device(device))
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    accuracy = checkpoint['accuracy']
    print(f"Checkpoint loaded. Epoch: {epoch}, Loss: {loss}, Accuracy: {accuracy}")
    return epoch, loss, accuracy

# Загрузка чекпоинта
model = CNN().to(device)  # Инициализация модели
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Инициализация оптимизатора

# Восстановление модели и оптимизатора из чекпоинта
epoch, loss, accuracy = load_checkpoint('cnn_best_checkpoint.pth')

# Перевод модели в режим инференса
model.eval()


Checkpoint loaded. Epoch: 17, Loss: 0.17806168762126404, Accuracy: 0.93413231998441


CNN(
  (model): Sequential(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14)

### Подзадания

In [15]:
params_num = sum([p.numel() for p in model.parameters() if p.requires_grad])
print(f"Кол-во обучаемых параметров сети: {params_num}")

Кол-во обучаемых параметров сети: 6815489


Вывод: Качество модели CNN для классификации indoor/outdoor изображений по метрике accuracy увеличилось благодаря
1) Применению batch normalization после свёрточных слоёв
2) Использованию ядра 5x5 на первом слое
3) Использованию scheduler для "умного" вычисления шага градиентного спуска на каждой итерации

In [10]:
all_labels = []
all_preds = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().view(-1, 1)
        
        outputs = model(inputs)
        sigmoid_outputs = torch.sigmoid(outputs)
        predictions = (sigmoid_outputs > 0.5).float()
        
        
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predictions.cpu().numpy())

In [14]:
from sklearn.metrics import f1_score
f1 = f1_score(all_labels, all_preds)
print(f"f1 score: {f1}")

f1 score: 0.5820245424467061


In [31]:
import pandas as pd
from sklearn.metrics import confusion_matrix

conf_matrix = confusion_matrix(all_labels, all_preds)
df_conf_matrix = pd.DataFrame(conf_matrix, 
                               index=['Actual 0', 'Actual 1'], 
                               columns=['Predicted 0', 'Predicted 1'])
print("Confusion Matrix:")
df_conf_matrix

Confusion Matrix:


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,7196,14351
Actual 1,5609,13897
