In [None]:
!pwd
!unzip /content/drive/MyDrive/one-piece-classification-2025.zip -d /content/data

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
import os
import copy

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, Subset
from torchvision import datasets, transforms
from torchvision.models import efficientnet_b3, EfficientNet_B3_Weights
from PIL import Image
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

**Основная идея:**

1) Взять готовую модель EfficientNet
2) Обучить классификатор на своих данных
3) Произвести полный fine-tuning

**Гипотеза:**

Классфикатор обучится быстро и стабильно, что не сломает сложные слои. Малый размер датасета может привести к переобучению и оверфиттингу.

В первом случае берем больший lr, тк шанс все поломать низкий

In [8]:
batch_size = 32
train_dir = "/content/data/splitted/train"
test_dir = "/content/data/splitted/test"


In [4]:
full_dataset = datasets.ImageFolder(root=train_dir)
num_classes = len(full_dataset.classes)

image_paths = [path for path, label in full_dataset.samples]
labels = [label for path, label in full_dataset.samples]

train_paths, val_paths, train_labels, val_labels = train_test_split(
    image_paths, labels, test_size=0.1, stratify=labels, random_state=42)

print(f"Train: {len(train_paths)}, Val: {len(val_paths)}")


Train: 2623, Val: 292


In [17]:
weights = EfficientNet_B3_Weights.IMAGENET1K_V1
model = efficientnet_b3(weights=weights)

In [46]:
# Кастомные трансформации
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize(256),       # небольшое изменение размера для последующего кропа
    transforms.CenterCrop(224),   # центрируем до 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, file_paths, labels, transform=None):
        self.paths = file_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        image = Image.open(self.paths[idx]).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = self.labels[idx]
        return image, label

# Создаем объекты датасетов
train_dataset = ImageDataset(train_paths, train_labels, transform=train_transforms)
val_dataset   = ImageDataset(val_paths,   val_labels,   transform=val_transforms)


In [47]:

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(42)
if device.type == 'cuda':
    torch.cuda.manual_seed(42)
    torch.backends.cudnn.benchmark = True  # оптимизация CUDA для повторяющихся размеров
print(device)


cuda


In [48]:
# Заменяем последний классификатор
in_features = model.classifier[1].in_features  # число входов последнего слоя
model.classifier[1] = nn.Linear(in_features, num_classes)

model = model.to(device)
print(model.classifier)

# Замораживаем все слои кроме последнего
for param in model.features.parameters():
    param.requires_grad = False

# обучаем только параметры классификатора
optimizer_stage1 = torch.optim.AdamW(model.classifier[1].parameters(), lr=1e-3, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()


Sequential(
  (0): Dropout(p=0.3, inplace=True)
  (1): Linear(in_features=1536, out_features=18, bias=True)
)


In [49]:
def train_one_epoch(model, dataloader, optimizer, criterion, device, scaler=None):
    model.train()
    running_loss = 0.0
    # Если используется смешанная точность, scaler должен быть передан
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        # Автоматическая смешанная точность
        with torch.cuda.amp.autocast(enabled=(scaler is not None)):
            outputs = model(inputs)
            loss = criterion(outputs, targets)
        # Обратное распространение с GradScaler, если он задан
        if scaler:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(dataloader.dataset)
    return epoch_loss

def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            running_loss += loss.item() * inputs.size(0)
            # Предсказанные классы:
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())
    epoch_loss = running_loss / len(dataloader.dataset)

    epoch_f1 = f1_score(all_labels, all_preds, average='macro')
    return epoch_loss, epoch_f1


In [None]:
# обучение только последнего слоя
epochs_stage1 = 3
scaler = torch.amp.GradScaler("cuda") if device.type == "cuda" else None

best_f1 = 0.0
for epoch in range(1, epochs_stage1+1):
    train_loss = train_one_epoch(model, train_loader, optimizer_stage1, criterion, device, scaler)
    val_loss, val_f1 = evaluate(model, val_loader, criterion, device)
    print(f"Epoch {epoch}/{epochs_stage1} - "f"Train loss: {train_loss:.4f}, Val loss: {val_loss:.4f}, Val F1: {val_f1:.4f}")

    if val_f1 > best_f1:
      best_f1 = val_f1
      best_state = copy.deepcopy(model.state_dict())
      best_epoch = epoch
print(best_f1)


In [None]:
# размораживаем все слои и настраиваем оптимизатор для fine-tuning всей модели
for param in model.features.parameters():
    param.requires_grad = True

optimizer_stage2 = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scaler = torch.cuda.amp.GradScaler() if device.type == 'cuda' else None

patience = 5
wait = 0
best_f1_stage2 = best_f1
best_state_stage2 = best_state.copy()
best_epoch_stage2 = 0

for epoch in range(1, 51):
    train_loss = train_one_epoch(model, train_loader, optimizer_stage2, criterion, device, scaler)
    val_loss, val_f1 = evaluate(model, val_loader, criterion, device)
    print(f"Epoch {epoch} - Train loss: {train_loss:.4f}, Val loss: {val_loss:.4f}, Val F1: {val_f1:.4f}")
    if val_f1 > best_f1_stage2:
        # Улучшение на валидации - сохраняем модель
        best_f1_stage2 = val_f1
        best_state_stage2 = model.state_dict().copy()
        best_epoch_stage2 = epoch
        wait = 0
    else:
        wait += 1
        if wait >= patience:
            print("Выход на плато")
            break


In [52]:
# лучшие веса модели
model.load_state_dict(best_state_stage2)
print(f"Лучшая эпоха{best_epoch_stage2}")

# финальная оценка на валидации с лучшими весами
val_loss, val_f1 = evaluate(model, val_loader, criterion, device)
print(f" {val_f1:.4f}")


Лучшая эпоха17
 0.9171


In [None]:
class TestDataset(Dataset):
    def __init__(self, image_ids, image_dir, transform=None):
        self.image_ids = image_ids
        self.image_dir = image_dir
        self.transform = transform
        self.extensions = ["", ".png", ".jpg", ".jpeg", ".JPG", ".JPEG", ".PNG"]

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_id = str(self.image_ids[idx])

        # доработка для расширений
        image_path = None
        for ext in self.extensions:
            trial = os.path.join(self.image_dir, img_id + ext)
            if os.path.exists(trial):
                image_path = trial
                break

        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, img_id


def make_submission_csv(
    model,
    best_state,
    test_dir,
    sample_submission_path="sample_submission.csv",
    output_path="submission.csv",
    transform=None,
    batch_size=32,
):
    index_to_class = {
        0: "Ace",
        1: "Akainu",
        2: "Brook",
        3: "Chopper",
        4: "Crocodile",
        5: "Franky",
        6: "Jinbei",
        7: "Kurohige",
        8: "Law",
        9: "Luffy",
        10: "Mihawk",
        11: "Nami",
        12: "Rayleigh",
        13: "Robin",
        14: "Sanji",
        15: "Shanks",
        16: "Usopp",
        17: "Zoro"
    }

    model.load_state_dict(best_state)
    model.to(device)
    model.eval()

    # шаблон submission
    sub_df = pd.read_csv(sample_submission_path)
    id_col = sub_df.columns[0]
    label_col = sub_df.columns[1]

    image_ids = sub_df[id_col].tolist()

    test_dataset = TestDataset(image_ids=image_ids, image_dir=test_dir, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    predictions = []

    with torch.no_grad():
        for images, ids in test_loader:
            images = images.to(device)

            outputs = model(images)
            preds_idx = outputs.argmax(dim=1).cpu().numpy()

            for p in preds_idx:
                predictions.append(p)

    # сохраняем предсказания
    sub_df[label_col] = predictions
    sub_df.to_csv(output_path, index=False)

    print(f"submission файл создан: {output_path}")


In [None]:
make_submission_csv(
    model=model,
    best_state=best_state_stage2,
    test_dir=test_dir,
    sample_submission_path="data/submission.csv",
    output_path="submission_1.csv",
    transform=val_transforms,
    batch_size=32
)




submission файл создан: submission_1.csv


In [None]:
/content/data/splitted/test/88f6f8c2-d752-4876-b3c0-a5407ecd30f3.png