# Загрузка и обработка файлов

## Скачивание данных, распаковка и запись

In [23]:
'''
import os
from urllib.request import urlretrieve
import zipfile

# Создание папки data, если не существует
os.makedirs('./data', exist_ok=True)

# Пути к файлам
train_zip_path = './data/train.zip'
valid_zip_path = './data/valid.zip'

# Скачивание архивов
urlretrieve('https://www.dropbox.com/scl/fi/bel6gt6vsb3onahlxvyjc/train_fix.zip?rlkey=q2wscp6wv9j2hbk07y1mbcm54&dl=1', train_zip_path)
urlretrieve('https://www.dropbox.com/scl/fi/cwwblwhvqgwubb8a4xg90/valid.zip?rlkey=mow899lvyawq4wku2m8lfvrh3&dl=1', valid_zip_path)

# Распаковка
with zipfile.ZipFile(train_zip_path, 'r') as zip_ref:
    zip_ref.extractall('./data/train')

with zipfile.ZipFile(valid_zip_path, 'r') as zip_ref:
    zip_ref.extractall('./data/valid')

print("Готово! Архивы скачаны и распакованы в папку ./data")
'''


'\nimport os\nfrom urllib.request import urlretrieve\nimport zipfile\n\n# Создание папки data, если не существует\nos.makedirs(\'./data\', exist_ok=True)\n\n# Пути к файлам\ntrain_zip_path = \'./data/train.zip\'\nvalid_zip_path = \'./data/valid.zip\'\n\n# Скачивание архивов\nurlretrieve(\'https://www.dropbox.com/scl/fi/bel6gt6vsb3onahlxvyjc/train_fix.zip?rlkey=q2wscp6wv9j2hbk07y1mbcm54&dl=1\', train_zip_path)\nurlretrieve(\'https://www.dropbox.com/scl/fi/cwwblwhvqgwubb8a4xg90/valid.zip?rlkey=mow899lvyawq4wku2m8lfvrh3&dl=1\', valid_zip_path)\n\n# Распаковка\nwith zipfile.ZipFile(train_zip_path, \'r\') as zip_ref:\n    zip_ref.extractall(\'./data/train\')\n\nwith zipfile.ZipFile(valid_zip_path, \'r\') as zip_ref:\n    zip_ref.extractall(\'./data/valid\')\n\nprint("Готово! Архивы скачаны и распакованы в папку ./data")\n'

## Разделим виды бабочек по классам в разные папки

In [24]:
'''
import os
import shutil
from pathlib import Path

def restructure_dataset(source_dir):
    for file_name in os.listdir(source_dir):
        if not file_name.endswith(('.jpg', '.jpeg', '.png')):
            continue

        # Извлекаем имя класса из имени файла (до первой скобки)
        class_name = file_name.split(' (')[0]
        class_dir = os.path.join(source_dir, class_name)

        # Создаем папку, если нужно
        os.makedirs(class_dir, exist_ok=True)

        # Перемещаем файл в папку
        src_path = os.path.join(source_dir, file_name)
        dst_path = os.path.join(class_dir, file_name)
        shutil.move(src_path, dst_path)

# Применяем к train и valid
restructure_dataset('./data/train')
restructure_dataset('./data/valid')

print("✅ Структура папок исправлена")
'''

'\nimport os\nimport shutil\nfrom pathlib import Path\n\ndef restructure_dataset(source_dir):\n    for file_name in os.listdir(source_dir):\n        if not file_name.endswith((\'.jpg\', \'.jpeg\', \'.png\')):\n            continue\n\n        # Извлекаем имя класса из имени файла (до первой скобки)\n        class_name = file_name.split(\' (\')[0]\n        class_dir = os.path.join(source_dir, class_name)\n\n        # Создаем папку, если нужно\n        os.makedirs(class_dir, exist_ok=True)\n\n        # Перемещаем файл в папку\n        src_path = os.path.join(source_dir, file_name)\n        dst_path = os.path.join(class_dir, file_name)\n        shutil.move(src_path, dst_path)\n\n# Применяем к train и valid\nrestructure_dataset(\'./data/train\')\nrestructure_dataset(\'./data/valid\')\n\nprint("✅ Структура папок исправлена")\n'

# Подготовка данных

In [25]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import torch.nn.functional as F

In [26]:
import os

torch.set_num_threads(os.cpu_count())  

In [27]:
# Аугментации и нормализация
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
])

transform_valid = transforms.Compose([
    transforms.ToTensor()
])

In [28]:
# Загрузка датасета
train_data_full = datasets.ImageFolder(root='./data/train', transform=transform_train)
valid_data = datasets.ImageFolder(root='./data/valid', transform=transform_valid)

# Разделение тренировочной на обучающую и внутреннюю валидацию
train_size = int(0.8 * len(train_data_full))
val_size = len(train_data_full) - train_size
train_data, train_val_data = random_split(train_data_full, [train_size, val_size])

In [29]:
# DataLoader'ы
train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(train_val_data, batch_size=32)
valid_loader = DataLoader(valid_data, batch_size=32)

In [30]:
# Число классов
num_classes = len(train_data_full.classes)
print(f"Всего классов: {num_classes}")

Всего классов: 75


# Работа с моделью

## Создание модели

In [31]:
class ButterflyCNNImproved(nn.Module):
    def __init__(self, num_classes):
        super(ButterflyCNNImproved, self).__init__()
        self.conv_block = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 14 * 14, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv_block(x)
        x = self.fc(x)
        return x



## Обучение модели

In [33]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ButterflyCNN(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Тренировка
epochs = 7
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    acc = 100 * correct / total
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss:.3f}, Accuracy: {acc:.2f}%")


Epoch 1/7, Loss: 851.550, Accuracy: 11.35%
Epoch 2/7, Loss: 610.435, Accuracy: 31.11%
Epoch 3/7, Loss: 487.324, Accuracy: 43.59%
Epoch 4/7, Loss: 405.291, Accuracy: 52.21%
Epoch 5/7, Loss: 350.717, Accuracy: 58.56%
Epoch 6/7, Loss: 310.785, Accuracy: 61.81%
Epoch 7/7, Loss: 269.968, Accuracy: 66.80%


In [36]:
additional_epochs = 3
for epoch in range(epochs, epochs + additional_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    acc = 100 * correct / total
    print(f"[DO-обучение] Epoch {epoch+1}, Loss: {running_loss:.3f}, Accuracy: {acc:.2f}%")


[DO-обучение] Epoch 8, Loss: 83.824, Accuracy: 88.99%
[DO-обучение] Epoch 9, Loss: 78.072, Accuracy: 89.55%
[DO-обучение] Epoch 10, Loss: 78.554, Accuracy: 89.45%


## Сохранение и загрузка модели

In [37]:
torch.save(model.state_dict(), './model/butterfly_model_new.pth')
print("Модель сохранена")

Модель сохранена


# Оценка модели

In [39]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in valid_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"🎯 Accuracy на валидационном наборе: {accuracy:.2f}%")


🎯 Accuracy на валидационном наборе: 68.40%
