# Загрузка данных

In [None]:
!pip install opendatasets --quiet

In [None]:
# по API подключаемся к kaggle и скачиваем нужный датасет
# для этого в colab надо подгрузить файл kaggle.json, который содержит токен и пароль
# после этого в проводнике colab появится папка с данными
import opendatasets as od
import pandas as pd

# {"username":"adele1997","key":"b455b34a14df6c01a6a95866d44d6f13"}
od.download( "https://www.kaggle.com/competitions/lamoda-images-classification")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: adele1997
Your Kaggle Key: ··········
Downloading lamoda-images-classification.zip to ./lamoda-images-classification


100%|██████████| 265M/265M [00:00<00:00, 791MB/s]



Extracting archive ./lamoda-images-classification/lamoda-images-classification.zip to ./lamoda-images-classification


In [None]:
import os
import pandas as pd

print(len(os.listdir('/content/lamoda-images-classification/images/train')))
print(len(os.listdir('/content/lamoda-images-classification/images/test')))

13476
3369


In [None]:
import os
import pandas as pd

def make_dataframe(img_dir):
    data = []

    for filename in os.listdir(img_dir):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            if 'bluzy' in filename.lower():
                label = 0
            elif 'bryuki' in filename.lower():
                label = 1
            else:
                continue  # неизвестная метка — пропустим

            full_path = os.path.join(img_dir, filename)
            data.append({'filename': full_path, 'label': label})

    df = pd.DataFrame(data)
    return df

In [None]:
df = make_dataframe("/content/lamoda-images-classification/images/train")

df.head()

Unnamed: 0,filename,label
0,/content/lamoda-images-classification/images/t...,1
1,/content/lamoda-images-classification/images/t...,1
2,/content/lamoda-images-classification/images/t...,1
3,/content/lamoda-images-classification/images/t...,1
4,/content/lamoda-images-classification/images/t...,0


In [None]:
import os

df['filename'] = df['filename'].apply(os.path.basename)
df

Unnamed: 0,filename,label
0,10389_bryuki.jpg,1
1,15664_bryuki.jpg,1
2,15135_bryuki.jpg,1
3,12214_bryuki.jpg,1
4,0144_bluzy.jpg,0
...,...,...
13471,5826_bluzy.jpg,0
13472,15680_bryuki.jpg,1
13473,13302_bryuki.jpg,1
13474,7692_bluzy.jpg,0


In [None]:
df.shape

(13476, 2)

In [None]:
df['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
0,7074
1,6402


# Подготовка данных

In [None]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

print(train_df.shape, val_df.shape)

(10780, 2) (2696, 2)


В `PyTorch` есть класс `Dataset` — это как шаблон (или "заготовка") для того, чтобы научить `PyTorch` понимать наши собственные данные.

Он говорит: "Вот данные, вот как их читать, вот метки — теперь можешь тренировать модель".

Когда мы создаём свой класс, мы говорим `PyTorch`'у:

«У меня свои данные, и я объясню тебе, как с ними работать».

Базовый класс: `torch.utils.data.Dataset` - Это абстрактный класс — то есть заготовка, от которой мы наследуемся и переопределяем нужные методы.

In [None]:
from torch.utils.data import Dataset

class MyDataset(Dataset):
    def __init__(self, ...):
        # Здесь загружаем и подготавливаем данные
        pass

    def __len__(self):
        # Возвращаем количество элементов в датасете
        return 0

    def __getitem__(self, idx):
        # Возвращаем один элемент по индексу
        return ...

In [None]:
import torch
from torch.utils.data import Dataset
from PIL import Image
import os

class ClothingDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None, mode='train'):
        """
        :param dataframe: pd.DataFrame с колонками 'filename' и (опционально) 'label'
        :param image_dir: путь к изображениям
        :param transform: torchvision.transforms
        :param mode: 'train' / 'val' / 'test' — влияет на возвращаемые значения
        """
        self.dataframe = dataframe.reset_index(drop=True)
        self.image_dir = image_dir
        self.transform = transform
        self.mode = mode
        self.label_to_idx = {'bryuki': 1, 'bluzy': 0}

        if self.mode == 'train' or self.mode == 'val':
            if 'label' not in self.dataframe.columns:
                raise ValueError("Dataframe must contain 'label' column in train/val mode.")

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        img_path = os.path.join(self.image_dir, row['filename'])
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        if self.mode in ['train', 'val']:
            label = row['label']
            if isinstance(label, str):
                label = self.label_to_idx[label]
            return image, label
        else:
            return image, row['filename']

`DataLoader` — это обёртка над `Dataset`, которая:
- разбивает данные на батчи (`batch_size`),
- может перемешивать (`shuffle`) данные,
- может загружать данные параллельно (`num_workers`).

In [None]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

train_path = '/content/lamoda-images-classification/images/train'
# Для обучения
train_dataset = ClothingDataset(train_df, image_dir=train_path, transform=transform, mode='train')

# Для теста или предсказания
val_dataset = ClothingDataset(val_df, image_dir=train_path, transform=transform, mode='val')

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=True)

In [None]:
for images, labels in train_loader:
    print("Batch shape:", images.shape)      # [batch_size, 3, H, W]
    print("Labels shape:", labels.shape)     # [batch_size]
    print("Пример меток:", labels[:5])
    break

Batch shape: torch.Size([16, 3, 128, 128])
Labels shape: torch.Size([16])
Пример меток: tensor([0, 0, 1, 0, 0])


In [None]:
for images, filenames in val_loader:
    print("Batch shape:", images.shape)
    print("Файлы:", filenames[:5])
    break

Batch shape: torch.Size([16, 3, 128, 128])
Файлы: ('14998_bryuki.jpg', '11582_bryuki.jpg', '1030_bluzy.jpg', '13033_bryuki.jpg', '15415_bryuki.jpg')


# Простая нейросеть на PyTorch

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)  # -> [B, 16, 224, 224]
        self.pool1 = nn.MaxPool2d(2, 2)                          # -> [B, 16, 112, 112]
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) # -> [B, 32, 112, 112]
        self.pool2 = nn.MaxPool2d(2, 2)                          # -> [B, 32, 56, 56]

        self.fc1 = nn.Linear(32 * 32 * 32, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Обучение нейронной сети

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Устройство (GPU если доступно)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

Using device: cuda


In [None]:
# Модель, функция потерь, оптимизатор
model = SimpleCNN(num_classes=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
# Цикл обучения
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    acc = correct / total
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {avg_loss:.4f} - Accuracy: {acc:.4f}")

Epoch 1/5 - Loss: 0.1467 - Accuracy: 0.9469
Epoch 2/5 - Loss: 0.0424 - Accuracy: 0.9859
Epoch 3/5 - Loss: 0.0327 - Accuracy: 0.9883
Epoch 4/5 - Loss: 0.0253 - Accuracy: 0.9921
Epoch 5/5 - Loss: 0.0202 - Accuracy: 0.9932


In [None]:
test_path = '/content/lamoda-images-classification/images/test'

test_df = pd.DataFrame(os.listdir(test_path), columns = ['filename'])
test_df.head()

Unnamed: 0,filename
0,0896.jpg
1,0232.jpg
2,0013.jpg
3,0721.jpg
4,3036.jpg


In [None]:
test_dataset = ClothingDataset(test_df, image_dir=test_path, transform=transform, mode='test')

test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True)

In [None]:
for images, filenames in test_loader:
    print("Batch shape:", images.shape)
    print("Файлы:", filenames[:5])
    break

Batch shape: torch.Size([16, 3, 128, 128])
Файлы: ('2692.jpg', '1964.jpg', '1588.jpg', '2347.jpg', '0645.jpg')


In [None]:
model.eval()
predictions = []
filenames = []

with torch.no_grad():
    for images, names in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        predictions.extend(preds.cpu().numpy())
        filenames.extend(names)

In [None]:
idx_to_label = {1: 'bryuki', 0: 'bluzy'}

all_labels = [idx_to_label[p] for p in predictions]

submission_df = pd.DataFrame({
    'index': filenames,
    'label': all_labels
})

submission_df

Unnamed: 0,index,label
0,1686.jpg,bluzy
1,2215.jpg,bryuki
2,3351.jpg,bryuki
3,0720.jpg,bluzy
4,1921.jpg,bryuki
...,...,...
3364,0541.jpg,bluzy
3365,3225.jpg,bryuki
3366,3209.jpg,bryuki
3367,2115.jpg,bryuki


In [None]:
submission_df.to_csv('submission.csv', index=False)

# Как использовать готовые предобученные модели в PyTorch

## ResNet модели

1. Откуда брать готовые модели?

В `PyTorch` есть встроенный пакет `torchvision.models`, который содержит много популярных архитектур с предобученными весами на `ImageNet`, например:
- ResNet (resnet18, resnet50, ...)
- VGG (vgg16, vgg19, ...)
- DenseNet
- MobileNet
- EfficientNet
- AlexNet
- и другие

2. Почему использовать предобученные модели?

Быстрая сходимость и хорошая точность даже на небольших датасетах (т.к. модель уже "видела" много картинок и научилась выделять базовые признаки).

Можно использовать как `feature extractor`(заморозить все слои и обучать только классификатор).

Можно дообучать всю модель (`fine-tuning`).

In [None]:
import torchvision.models as models

# Подгружаем ResNet18 с предобученными весами
model = models.resnet18(pretrained=True)

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class MyResNet18Classifier(nn.Module):
    def __init__(self, num_classes=2, pretrained=True):
        super().__init__()
        self.model = models.resnet18(pretrained=pretrained)
        in_features = self.model.fc.in_features
        self.model.fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.model(x)

model = MyResNet18Classifier(num_classes=2, pretrained=True)
model.to(device)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = MyResNet18Classifier(num_classes=2, pretrained=True)
model.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 76.0MB/s]


MyResNet18Classifier(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True

In [None]:
# как замараживать слои

for param in model.model.parameters():
    param.requires_grad = False

for param in model.model.fc.parameters():
    param.requires_grad = True

In [None]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
for images, labels in train_loader:
    print("Batch shape:", images.shape)      # [batch_size, 3, H, W]
    print("Labels shape:", labels.shape)     # [batch_size]
    print("Пример меток:", labels[:5])
    break

Batch shape: torch.Size([16, 3, 128, 128])
Labels shape: torch.Size([16])
Пример меток: tensor([0, 0, 0, 1, 1])


In [None]:
for images, labels in val_loader:
    print("Batch shape:", images.shape)      # [batch_size, 3, H, W]
    print("Labels shape:", labels.shape)     # [batch_size]
    print("Пример меток:", labels[:5])
    break

Batch shape: torch.Size([16, 3, 128, 128])
Labels shape: torch.Size([16])
Пример меток: tensor([1, 1, 0, 1, 0])


In [None]:
for images, filenames in test_loader:
    print("Batch shape:", images.shape)      # [batch_size, 3, H, W]
    print("Пример названий:", filenames[:5])
    break

Batch shape: torch.Size([16, 3, 128, 128])
Пример названий: ('0896.jpg', '0232.jpg', '0013.jpg', '0721.jpg', '3036.jpg')


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [None]:
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [None]:
def validate_epoch(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [None]:
num_epochs = 10

for epoch in range(num_epochs):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc = validate_epoch(model, val_loader, criterion, device)
    print(f"Epoch {epoch+1}/{num_epochs} — "
          f"Train loss: {train_loss:.4f}, Train acc: {train_acc:.4f} — "
          f"Val loss: {val_loss:.4f}, Val acc: {val_acc:.4f}")

Epoch 1/10 — Train loss: 0.0498, Train acc: 0.9831 — Val loss: 0.0377, Val acc: 0.9881
Epoch 2/10 — Train loss: 0.0313, Train acc: 0.9905 — Val loss: 0.0321, Val acc: 0.9896
Epoch 3/10 — Train loss: 0.0244, Train acc: 0.9927 — Val loss: 0.0445, Val acc: 0.9826
Epoch 4/10 — Train loss: 0.0182, Train acc: 0.9951 — Val loss: 0.0231, Val acc: 0.9926
Epoch 5/10 — Train loss: 0.0139, Train acc: 0.9960 — Val loss: 0.0226, Val acc: 0.9918
Epoch 6/10 — Train loss: 0.0127, Train acc: 0.9968 — Val loss: 0.0244, Val acc: 0.9911
Epoch 7/10 — Train loss: 0.0109, Train acc: 0.9968 — Val loss: 0.0095, Val acc: 0.9967
Epoch 8/10 — Train loss: 0.0087, Train acc: 0.9977 — Val loss: 0.0083, Val acc: 0.9981
Epoch 9/10 — Train loss: 0.0061, Train acc: 0.9986 — Val loss: 0.0110, Val acc: 0.9963
Epoch 10/10 — Train loss: 0.0068, Train acc: 0.9975 — Val loss: 0.0097, Val acc: 0.9978


In [None]:
model.eval()
idx_to_label = {1: 'bryuki', 0: 'bluzy'}

all_preds = []
all_filenames = []

with torch.no_grad():
    for images, filenames in test_loader:   # test_loader должен возвращать filenames
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_filenames.extend(filenames)

all_labels = [idx_to_label[i] for i in all_preds]

import pandas as pd
submission_df = pd.DataFrame({'index': all_filenames, 'label': all_labels})
submission_df.to_csv('submission.csv', index=False)

## Efficient

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class MyEfficientNetClassifier(nn.Module):
    def __init__(self, num_classes=2, pretrained=True):
        super().__init__()
        self.model = models.efficientnet_b0(pretrained=pretrained)

        # Заменяем классификатор
        in_features = self.model.classifier[1].in_features
        self.model.classifier[1] = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.model(x)

model = MyEfficientNetClassifier(num_classes=2, pretrained=True)
model.to(device)