In [1]:
import os
from typing import Callable

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import random_split
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader, Subset

from PIL import Image
import matplotlib.pyplot as plt

from tqdm import trange
from tqdm.auto import tqdm

from IPython.display import clear_output

In [2]:
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import WeightedRandomSampler

In [3]:
from sklearn.model_selection import KFold

In [None]:
MODEL_NAME = "efficientnet_b4_tuned"

In [5]:
TRAIN_LABELS_PATH = "/kaggle/input/dl-5-image-classification/train-labels.csv"
IMG_TRAIN_DIR = "/kaggle/input/dl-5-image-classification/train"

In [None]:
MODEL_OUTPUT_PATH = f"/kaggle/working/{MODEL_NAME}.pth"
LOGS_PATH = f"/kaggle/working/{MODEL_NAME}-logs.npy"

In [None]:
BATCH_SIZE = 64
NUM_EPOCHS = 100
PATIENCE = 10
NUM_CLASSES = 20
LEARNING_RATE = 1e-4

In [6]:
df_train = pd.read_csv(TRAIN_LABELS_PATH)
df_train = df_train[df_train["image"] != "39401.jpg"]
df_train

Unnamed: 0,image,label
0,21310.jpg,Sunglasses
1,8993.jpg,Sports Shoes
2,33363.jpg,Wallets
3,27744.jpg,Handbags
4,11603.jpg,Kurtas
...,...,...
16571,57536.jpg,Belts
16572,56337.jpg,Watches
16573,29762.jpg,Watches
16574,11515.jpg,Heels


In [7]:
labels = df_train.label.unique()
labels

array(['Sunglasses', 'Sports Shoes', 'Wallets', 'Handbags', 'Kurtas',
       'Casual Shoes', 'Shirts', 'Perfume and Body Mist', 'Tshirts',
       'Tops', 'Flip Flops', 'Backpacks', 'Watches', 'Belts',
       'Formal Shoes', 'Socks', 'Jeans', 'Heels', 'Sandals', 'Briefs'],
      dtype=object)

In [8]:
map_label_to_int = {}
map_int_to_label = {}

for i, label in enumerate(labels):
    map_label_to_int[label] = i
    map_int_to_label[i] = label

In [9]:
df_train["label_int"] = df_train["label"].map(map_label_to_int)

In [10]:
df_train

Unnamed: 0,image,label,label_int
0,21310.jpg,Sunglasses,0
1,8993.jpg,Sports Shoes,1
2,33363.jpg,Wallets,2
3,27744.jpg,Handbags,3
4,11603.jpg,Kurtas,4
...,...,...,...
16571,57536.jpg,Belts,13
16572,56337.jpg,Watches,12
16573,29762.jpg,Watches,12
16574,11515.jpg,Heels,17


## Класс для работы с кастомным датасетом

In [11]:
class FashionDataset(Dataset):
    def __init__(self, df_meta: pd.DataFrame, img_dir: str, transform: Callable=None):
        super().__init__()
        self.img_labels = df_meta
        self.img_dir = img_dir
        self.transform = transform
                                    
    def __getitem__(self, idx: int) -> (torch.tensor, torch.tensor):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path)
        label = self.img_labels.iloc[idx, 2]

        if self.transform:
            image = self.transform(image)

        return image, label
        

    def __len__(self) -> int:
        return len(self.img_labels)

## Трансформации данных

In [None]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )
])

In [None]:
full_dataset = FashionDataset(
    df_meta = df_train,
    img_dir = IMG_TRAIN_DIR,
    transform = None
)

In [None]:
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_subset, val_subset = random_split(full_dataset, [train_size, val_size])

In [None]:
train_subset.dataset.transform = train_transform
val_subset.dataset.transform = val_transform

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Training on: {device}")

In [None]:
labels = [label for _, label in train_subset]

In [None]:
class_weights = compute_class_weight('balanced', classes=np.unique(labels), y=labels)
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

In [None]:
class_counts = np.bincount(labels)

sample_weights = 1.0 / class_counts[labels]
sample_weights = torch.tensor(sample_weights, dtype=torch.float)

sampler = WeightedRandomSampler(sample_weights, len(sample_weights))

In [None]:
train_dataloader = DataLoader(
    train_subset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    sampler=sampler
)

val_dataloader = DataLoader(
    val_subset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4
)

In [None]:
# model = models.efficientnet_b4(pretrained=True)
# model.classifier[1] = nn.Linear(model.classifier[1].in_features, NUM_CLASSES)

model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)

In [None]:
torch.cuda.device_count()

In [None]:
model = model.to(device)

if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)
model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.1)

In [None]:
import time

In [None]:
best_val_loss = float("inf")
patience = 10
training_time_start = time.time()

val_loss_logs = []
train_loss_logs = []
for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss = 0.0
    for images, labels in train_dataloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step() 
        running_loss += loss.item()

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    train_loss = running_loss / len(train_dataloader)
    val_loss = val_loss / len(val_dataloader)

    scheduler.step(val_loss)
    print(f"Epoch [{epoch + 1}/{NUM_EPOCHS}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, LR: {optimizer.param_groups[0]['lr']}")

    train_loss_logs.append(train_loss)
    val_loss_logs.append(val_loss)
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'val_loss': val_loss,
        }, MODEL_OUTPUT_PATH)
        patience = 10
    else:
        patience -= 1
        if patience == 0:
            print("Early stopping")
            break

training_time_end = time.time()
training_time = training_time_end - training_time_start

logs = {
    "val_loss": val_loss_logs,
    "train_loss": train_loss_logs,
    "learning_time": training_time
}
np.save(LOGS_PATH, logs)

print(f"Training complete! Training time: {training_time}")

# Обучение с K-Fold Cross Validation

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Гиперпараметры

In [13]:
BATCH_SIZE = 64
NUM_EPOCHS = 100
PATIENCE = 10
NUM_CLASSES = 20
LEARNING_RATE = 1e-4

### Функция, возвращающая предобученную модель

In [14]:
def create_model():
    model = models.efficientnet_b4(
        weights=models.EfficientNet_B4_Weights.IMAGENET1K_V1
    )
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, NUM_CLASSES)

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model.to(device)

    return model

### Инициализация переменной для работы с кастомным датасетом

In [15]:
dataset = FashionDataset(
    df_meta = df_train,
    img_dir = IMG_TRAIN_DIR,
    transform = None
)

### Трансформации для тренировочного и валидационного набора данных

In [16]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    ),
])

In [17]:
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )
])

In [20]:
import time

### Цикл обучения

In [21]:
def train(model, train_dataloader, val_dataloader, criterion, optimizer, scheduler, fold):
    best_val_loss = float("inf")
    patience = 10
    training_time_start = time.time()

    val_loss_logs = []
    train_loss_logs = []
    for epoch in range(NUM_EPOCHS):
        model.train()
        running_loss = 0.0
        for images, labels in train_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step() 
            running_loss += loss.item()

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_dataloader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

        train_loss = running_loss / len(train_dataloader)
        val_loss = val_loss / len(val_dataloader)

        scheduler.step(val_loss)
        print(f"Epoch [{epoch + 1}/{NUM_EPOCHS}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, LR: {optimizer.param_groups[0]['lr']}")

        train_loss_logs.append(train_loss)
        val_loss_logs.append(val_loss)
    
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), f"/kaggle/working/efficientnet_b4-fold-{fold+1}.pth")
            patience = 10
        else:
            patience -= 1
            if patience == 0:
                print("Early stopping")
                break

    training_time_end = time.time()
    training_time = training_time_end - training_time_start

    logs = {
        "val_loss": val_loss_logs,
        "train_loss": train_loss_logs,
        "learning_time": training_time
    }
    np.save(f"/kaggle/working/logs-fold-{fold}", logs)

    return best_val_loss


## Главный цикл K-Fold Cross-Validation

In [None]:
%%time

from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import KFold

k = 5
kfold = KFold(n_splits=k, shuffle=True, random_state=42)

fold_results = []
for fold, (train_ids, val_ids) in enumerate(kfold.split(dataset)):
    print(f"Fold {fold+1}")
    
    train_subset = Subset(dataset, train_ids)
    val_subset = Subset(dataset, val_ids)

    train_subset.dataset.transform = train_transform
    val_subset.dataset.transform = val_transform

    train_labels = [label for _, label in train_subset]
    class_counts = torch.bincount(torch.tensor(train_labels))
    class_weights = 1. / class_counts
    class_weights = class_weights / class_weights.sum()        
    sample_weights = class_weights[train_labels]
    sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

    model = create_model()
    train_dataloader = DataLoader(
        train_subset, 
        batch_size=BATCH_SIZE, 
        sampler=sampler
    )
    val_dataloader = DataLoader(
        val_subset, 
        batch_size=BATCH_SIZE, 
        shuffle=False
    )
    criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)

    val_loss = train(model, train_dataloader, val_dataloader, criterion, optimizer, scheduler, fold)
    fold_results.append(val_loss)

print(f"Fold results: {fold_results}")
print(f"Average validation loss: {sum(fold_results)/len(fold_results)}")

Fold 1
Epoch [1/100], Train Loss: 1.2001, Val Loss: 0.3365, LR: 0.0001
Epoch [2/100], Train Loss: 0.2123, Val Loss: 0.2300, LR: 0.0001
Epoch [3/100], Train Loss: 0.1380, Val Loss: 0.2039, LR: 0.0001
Epoch [4/100], Train Loss: 0.1003, Val Loss: 0.1858, LR: 0.0001
Epoch [5/100], Train Loss: 0.0782, Val Loss: 0.1867, LR: 0.0001
Epoch [6/100], Train Loss: 0.0602, Val Loss: 0.1779, LR: 0.0001
Epoch [7/100], Train Loss: 0.0567, Val Loss: 0.1742, LR: 0.0001
Epoch [8/100], Train Loss: 0.0469, Val Loss: 0.1851, LR: 0.0001
Epoch [9/100], Train Loss: 0.0374, Val Loss: 0.2077, LR: 0.0001
Epoch [10/100], Train Loss: 0.0345, Val Loss: 0.1866, LR: 0.0001
Epoch [11/100], Train Loss: 0.0320, Val Loss: 0.1920, LR: 0.0001
Epoch [12/100], Train Loss: 0.0291, Val Loss: 0.2010, LR: 0.0001
Epoch [13/100], Train Loss: 0.0261, Val Loss: 0.1999, LR: 1e-05
Epoch [14/100], Train Loss: 0.0216, Val Loss: 0.1922, LR: 1e-05
Epoch [15/100], Train Loss: 0.0224, Val Loss: 0.1896, LR: 1e-05
Epoch [16/100], Train Loss: 0.

In [26]:
class TestFashionDataset(Dataset):
    def __init__(self, df_images: pd.DataFrame, img_dir: str, transform: Callable=None):
        super().__init__()
        self.img_names = df_images
        self.img_dir = img_dir
        self.transform = transform
                                    
    def __getitem__(self, idx: int) -> (torch.tensor, torch.tensor):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_path = os.path.join(self.img_dir, self.img_names.iloc[idx, 0])
        image = Image.open(img_path)

        if self.transform:
            image = self.transform(image)

        return image
        
    def __len__(self) -> int:
        return len(self.img_names)

In [27]:
df_images = pd.read_csv("/kaggle/input/dl-5-image-classification/sample_submission.csv")
df_images

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,image,label
0,4117.jpg,
1,48551.jpg,
2,18248.jpg,
3,45905.jpg,
4,44184.jpg,
...,...,...
16569,41512.jpg,
16570,52106.jpg,
16571,27085.jpg,
16572,51680.jpg,


In [1]:
test_dataset = TestFashionDataset(
    df_images = df_images,
    img_dir = "/kaggle/input/dl-5-image-classification/test/test",
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
)

In [2]:
test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=4
)

In [31]:
%%time

for data in test_dataloader:
    continue

CPU times: user 288 ms, sys: 2.08 s, total: 2.37 s
Wall time: 3min 20s


In [3]:
label_to_index = {
        "Sunglasses": 0,
        "Sports Shoes": 1,
        "Wallets": 2,
        "Handbags": 3,
        "Kurtas": 4,
        "Casual Shoes": 5,
        "Shirts": 6,
        "Perfume and Body Mist": 7,
        "Tshirts": 8,
        "Tops": 9,
        "Flip Flops": 10,
        "Backpacks": 11,
        "Watches": 12,
        "Belts": 13,
        "Formal Shoes": 14,
        "Socks": 15,
        "Jeans": 16,
        "Heels": 17,
        "Sandals": 18,
        "Briefs": 19
    }

In [4]:
index_to_label = {v: k for k, v in label_to_index.items()}

In [13]:
models_to_test = []
for i in range(k):
    model = create_model()
    checkpoint = torch.load(f"/kaggle/working/efficientnet_b4-fold-{i+1}.pth")   
    model.load_state_dict(checkpoint)
    model.eval()
    models_to_test.append(model)

  checkpoint = torch.load(f"/kaggle/working/efficientnet_b4-fold-{i+1}.pth")


## Предсказание с использованием всех обученных моделей и усреднением их логитов

In [None]:
predictions = []
with torch.no_grad():
    for images in test_dataloader:
        images = images.to(device)
        outputs0 = models_to_test[0](images)
        outputs1 = models_to_test[1](images)
        outputs2 = models_to_test[2](images)
        outputs3 = models_to_test[3](images)
        outputs4 = models_to_test[4](images)

        avg_outputs = (outputs0 + outputs1 + outputs2 + outputs3 + outputs4) / k
        _, predicted = torch.max(avg_outputs, 1)
        predictions.extend(predicted.cpu().numpy())

In [8]:
text_predictions = [index_to_label[pred] for pred in predictions]

In [9]:
df = pd.read_csv('/kaggle/input/dl-5-image-classification/sample_submission.csv')

In [10]:
df['label'] = text_predictions

In [11]:
df.to_csv("/kaggle/working/sub-1.csv", index=False)