## Импорт библиотек

In [269]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
import os
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Subset
import numpy as np
import matplotlib.pyplot as plt
from tensorboard import notebook

## Считывание и преобразование данных

In [203]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),   
    transforms.ToTensor(),           
    transforms.Normalize(mean=[0.5, 0.5, 0.5], 
                         std=[0.5, 0.5, 0.5])  
])

data = ImageFolder("data/", transform=transform)
"""
class_indices = {i: np.where(np.array(data.targets) == i)[0] for i in range(len(data.classes))}
subset_indices = []

for indices in class_indices.values():
    subset_size = int(0.5 * len(indices))
    selected_indices = np.random.choice(indices, subset_size, replace=False)
    subset_indices.extend(selected_indices)

subset_data = Subset(data, subset_indices)

train_size = int(0.8 * len(subset_data))
val_size = int(0.1 * len(subset_data))
test_size = len(subset_data) - train_size - val_size

train_data, val_data, test_data = random_split(subset_data, [train_size, val_size, test_size])
"""
train_size = int(0.8 * len(data))
val_size = int(0.1 * len(data))
test_size = len(data) - train_size - val_size

train_data, val_data, test_data = random_split(data, [train_size, val_size, test_size])

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

## SummaryWriter для сохранения результататов эксперементов

In [235]:
tensorboardWriter = SummaryWriter('logs/experiment_14')

## Структура модели

In [237]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.fc_layers = nn.Sequential(
            nn.Linear(128 * 8 * 8, 1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x


## Проверка доступности видеокарты

In [239]:
print(torch.cuda.is_available()) 
print(torch.cuda.device_count())   
print(torch.cuda.current_device()) 
print(torch.cuda.get_device_name(0))  

True
1
0
NVIDIA GeForce RTX 3070


## Перевод расчетов модели на видеокарту

In [241]:
model = CNN()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [263]:
def save_checkpoint(state, filename='checkpoint.pth'):
    """Сохранение состояния модели"""
    torch.save(state, filename)


def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10, start_epoch=0):
    best_val_acc = 0.0
    
    for epoch in tqdm(range(start_epoch, start_epoch + num_epochs)):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().view(-1, 1)

            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            predicted = (outputs > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
        
        train_loss = running_loss / len(train_loader)
        train_acc = correct / total
        
        val_loss, val_acc = evaluate_model(model, val_loader, criterion)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}, '
              f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}')
        
        tensorboardWriter.add_scalar('Loss/train', train_loss, epoch)
        tensorboardWriter.add_scalar('Accuracy/train', train_acc, epoch)
        tensorboardWriter.add_scalar('Loss/val', val_loss, epoch)
        tensorboardWriter.add_scalar('Accuracy/val', val_acc, epoch)
        

        if val_acc > best_val_acc:
            print(f'Validation accuracy improved from {best_val_acc:.4f} to {val_acc:.4f}. Saving model...')
            best_val_acc = val_acc
            
            checkpoint = {
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': val_loss,
                'accuracy': val_acc
            }
            save_checkpoint(checkpoint, 'cnn_best_checkpoint.pth')
            
    
def evaluate_model(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device).float().view(-1, 1)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            predicted = (outputs > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    
    loss = running_loss / len(loader)
    accuracy = correct / total
    return loss, accuracy


## Обучение модели

In [249]:
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=7)

 14%|███████████▌                                                                     | 1/7 [14:49<1:28:57, 889.61s/it]

Epoch [1/7], Loss: 0.2240, Accuracy: 0.9153, Val Loss: 0.2344, Val Accuracy: 0.9128
Validation accuracy improved from 0.0000 to 0.9128. Saving model...


 29%|███████████████████████▏                                                         | 2/7 [29:40<1:14:10, 890.16s/it]

Epoch [2/7], Loss: 0.2193, Accuracy: 0.9165, Val Loss: 0.2206, Val Accuracy: 0.9163
Validation accuracy improved from 0.9128 to 0.9163. Saving model...


 43%|███████████████████████████████████▌                                               | 3/7 [44:26<59:14, 888.55s/it]

Epoch [3/7], Loss: 0.2168, Accuracy: 0.9177, Val Loss: 0.2240, Val Accuracy: 0.9144


 57%|███████████████████████████████████████████████▍                                   | 4/7 [59:12<44:22, 887.44s/it]

Epoch [4/7], Loss: 0.2199, Accuracy: 0.9191, Val Loss: 0.2306, Val Accuracy: 0.9145


 71%|█████████████████████████████████████████████████████████▊                       | 5/7 [1:13:44<29:23, 881.71s/it]

Epoch [5/7], Loss: 0.2120, Accuracy: 0.9200, Val Loss: 0.2190, Val Accuracy: 0.9190
Validation accuracy improved from 0.9163 to 0.9190. Saving model...


 86%|█████████████████████████████████████████████████████████████████████▍           | 6/7 [1:28:25<14:41, 881.47s/it]

Epoch [6/7], Loss: 0.2114, Accuracy: 0.9202, Val Loss: 0.2271, Val Accuracy: 0.9174


100%|█████████████████████████████████████████████████████████████████████████████████| 7/7 [1:43:13<00:00, 884.84s/it]

Epoch [7/7], Loss: 0.2091, Accuracy: 0.9214, Val Loss: 0.2203, Val Accuracy: 0.9178





## Результат точности предсказний на тесте

In [251]:
test_loss, test_accuracy = evaluate_model(model, test_loader, criterion)

print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

Test Loss: 0.2307, Test Accuracy: 0.9151


In [177]:
# 1 Как в семинаре Test Loss: 1.2679, Test Accuracy: 0.8753
# 2 Как в семинаре, но l_rate = 0.0004 Test Loss: 0.9375, Test Accuracy: 0.8622
# 3 Добавил новый слой в полносвязанную часть (1024, 128) Test Loss: 1.1222, Test Accuracy: 0.8612
# 1, 2, 3 - на 10% данных
# 4 Как в предыдущем случае но на 50% данных (слишком долго)
# 5 20% данных без 3 слоя свертки Test Loss: 0.9078, Test Accuracy: 0.8648
# 6 20% данных 4 слоя свертки 5 этох Test Loss: 0.2779, Test Accuracy: 0.8953
# 7 10% данных 4 слоя свертки 5 этох Test Loss: 0.3122, Test Accuracy: 0.8721
# 8 20% данных первая архитектура 5 этох Test Loss: 0.0008, Test Accuracy: 1.0000
# 9 25% даных 4 свертки 2 полных Test Loss: 0.2804, Test Accuracy: 0.8887
# 10 25% даных 4 свертки c 16 до 128 2 полных слоя 5 эпох Test Loss: 0.2849, Test Accuracy: 0.8918
# 11 25% даных 4 свертки c 16 до 128 2 полных слоя 7 эпох Test Loss: 0.3030, Test Accuracy: 0.8925
# 12 25% даных 4 свертки c 16 до 128 2 полных слоя 5 эпох Test Loss: 0.5584, Test Accuracy: 0.8896
# 13 50% даных 4 свертки c 16 до 128 2 полных слоя 7 эпох Test Loss: 0.2544, Test Accuracy: 0.9005

In [None]:
# 14 100% данных 4 сертки с 16 до 128 2 полных слоя 4 эпохи Test Loss: 0.2399, Test Accuracy: 0.9043

## Загрузка модели и дообучение

In [255]:
def load_checkpoint(filename, model, optimizer):
    checkpoint = torch.load(filename)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss'] 
    accuracy = checkpoint['accuracy']
    print(f"Checkpoint loaded. Epoch: {epoch},  Val Loss: {loss}, Val Accuracy: {accuracy}")
    return model, optimizer, epoch, loss, accuracy

In [265]:
model = CNN().to(device)  
optimizer = optim.Adam(model.parameters(), lr=0.001)

model, optimizer, start_epoch, val_loss, best_val_acc = load_checkpoint('cnn_best_checkpoint.pth', model, optimizer)

train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5, start_epoch=7)

  checkpoint = torch.load(filename)


Checkpoint loaded. Epoch: 5,  Val Loss: 0.2189508285562182, Val Accuracy: 0.9190051641820131


 20%|████████████████▌                                                                  | 1/5 [14:34<58:19, 874.90s/it]

Epoch [8/5], Loss: 0.2103, Accuracy: 0.9206, Val Loss: 0.2265, Val Accuracy: 0.9159
Validation accuracy improved from 0.0000 to 0.9159. Saving model...


 40%|█████████████████████████████████▏                                                 | 2/5 [29:21<44:04, 881.57s/it]

Epoch [9/5], Loss: 0.2097, Accuracy: 0.9206, Val Loss: 0.2643, Val Accuracy: 0.8966


 60%|█████████████████████████████████████████████████▊                                 | 3/5 [44:03<29:23, 881.87s/it]

Epoch [10/5], Loss: 0.2079, Accuracy: 0.9213, Val Loss: 0.2292, Val Accuracy: 0.9167
Validation accuracy improved from 0.9159 to 0.9167. Saving model...


 80%|██████████████████████████████████████████████████████████████████▍                | 4/5 [58:51<14:44, 884.40s/it]

Epoch [11/5], Loss: 0.2125, Accuracy: 0.9205, Val Loss: 0.2419, Val Accuracy: 0.9141


100%|████████████████████████████████████████████████████████████████████████████████| 5/5 [1:35:50<00:00, 1150.16s/it]

Epoch [12/5], Loss: 0.2051, Accuracy: 0.9222, Val Loss: 0.2549, Val Accuracy: 0.9106





In [267]:
test_loss, test_accuracy = evaluate_model(model, test_loader, criterion)

print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

Test Loss: 0.2626, Test Accuracy: 0.9047


## Визуализация логов эксперементов 

In [271]:
log_dir = 'logs/'
notebook.start("--logdir " + log_dir)

## Наилучший результат (Accuracy ~ 0.916)

In [273]:
model = CNN().to(device)  
optimizer = optim.Adam(model.parameters(), lr=0.001)

model, optimizer, start_epoch, val_loss, best_val_acc = load_checkpoint('cnn_best_checkpoint.pth', model, optimizer)

Checkpoint loaded. Epoch: 10,  Val Loss: 0.2292349541810975, Val Accuracy: 0.9167397447140212


  checkpoint = torch.load(filename)


In [275]:
test_loss, test_accuracy = evaluate_model(model, test_loader, criterion)

print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

Test Loss: 0.2417, Test Accuracy: 0.9137
