In [1]:
# Для установки gdown убрать комментарий
# !pip install gdown

### Загрузка данных:

In [2]:
import gdown
import shutil
import os

# Data loading

data_zip_url = "https://drive.google.com/file/d/1TG9P5B2k3eTbC4XDxDmEc07dyAORPC16/view?usp=sharing" # обучение
test_zip_url = "https://drive.google.com/file/d/12QrDrLT1F-X7UycvOoApXFqxTw3Zx93K/view?usp=sharing" # тест

# все выполняется в колабе, при необходимости замените пути до файлов

data_zip_path = "/content/data.zip"
test_zip_path = "/content/test.zip"

gdown.download(data_zip_url, data_zip_path, fuzzy=True)
gdown.download(test_zip_url, test_zip_path, fuzzy=True)

shutil.unpack_archive(data_zip_path, '.', 'zip')
shutil.unpack_archive(test_zip_path, '.', 'zip')

os.remove(data_zip_path)
os.remove(test_zip_path)

Downloading...
From (original): https://drive.google.com/uc?id=1TG9P5B2k3eTbC4XDxDmEc07dyAORPC16
From (redirected): https://drive.google.com/uc?id=1TG9P5B2k3eTbC4XDxDmEc07dyAORPC16&confirm=t&uuid=794af176-9ed6-4214-9394-7a71239de2b0
To: /content/data.zip
100%|██████████| 2.28G/2.28G [00:14<00:00, 154MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=12QrDrLT1F-X7UycvOoApXFqxTw3Zx93K
From (redirected): https://drive.google.com/uc?id=12QrDrLT1F-X7UycvOoApXFqxTw3Zx93K&confirm=t&uuid=bf7de75c-4d97-4c53-b7f0-7c7c0e2b8e3e
To: /content/test.zip
100%|██████████| 222M/222M [00:05<00:00, 38.3MB/s]


Эмоции:
- neutral - нейтральная эмоция
- anger - гнев, злость
- contempt - презрение
- disgust - отвращение
- fear - страх
- happy - веселый
- sad - грусть
- surprise - удивленность
- uncertain - неуверенность

### Подготовка данных и обучение модели:

## Импорты

In [3]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from PIL import Image
from tqdm import tqdm

Разметка эмоций:

In [4]:
emotion_labels = {
    'anger': 0, 'contempt': 1, 'disgust': 2, 'fear': 3, 'happy': 4,
    'neutral': 5, 'sad': 6, 'surprise': 7, 'uncertain': 8
}

Класс для обработки данных:

In [5]:
class EmotionDataset(Dataset):
    def __init__(self, file_dir, transform=None):
        self.file_dir = file_dir
        self.transform = transform
        self.image_files = []
        self.labels = []

        for emotion, label in emotion_labels.items():
            emotion_dir = os.path.join(file_dir, emotion)
            for img_file in os.listdir(emotion_dir):
                if img_file.endswith('.jpg'):
                    self.image_files.append(os.path.join(emotion_dir, img_file))
                    self.labels.append(label)

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = self.labels[idx]
        return image, label

Трансформация изображений:

In [6]:
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

Создание датасета и загрузчика:

In [7]:
# заменить адрес ниже на актуальный для локального выполнения
train_dataset = EmotionDataset(file_dir="/content/train", transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4, pin_memory=True)



Создание модели:

In [8]:
class EmotionClassifier(nn.Module):
    def __init__(self):
        super(EmotionClassifier, self).__init__()
        self.model = models.mobilenet_v2(pretrained=True)
        self.model.classifier[1] = nn.Linear(self.model.last_channel, 9)

    def forward(self, x):
        return self.model(x)

In [9]:
model = EmotionClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)



In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

EmotionClassifier(
  (model): MobileNetV2(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): InvertedResidual(
        (conv): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU6(inplace=True)
          )
          (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (2): InvertedResidual(
        (conv): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(16, 96, kernel_siz

Обучение и предсказание на test выборке:

In [11]:
num_epochs = 3
for epoch in tqdm(range(num_epochs)):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device, non_blocking=True), labels.to(device, non_blocking=True)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


class TestDataset(Dataset):
    def __init__(self, file_dir, transform=None):
        self.file_dir = file_dir
        self.transform = transform
        self.image_files = [f for f in os.listdir(file_dir) if f.endswith('.jpg')]


    def __len__(self):
        return len(self.image_files)


    def __getitem__(self, idx):
        img_path = os.path.join(self.file_dir, self.image_files[idx])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, self.image_files[idx]



100%|██████████| 3/3 [13:39<00:00, 273.11s/it]


Создание датасета для тестирования на Kaggle и prediction:

In [12]:
# заменить адрес ниже на актуальный для локального выполнения
test_dataset = TestDataset(file_dir="/content/test_kaggle", transform=transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)

In [13]:
model.eval()
predictions = []
with torch.no_grad():
    for images, image_files in test_loader:
        images = images.to(device, non_blocking=True)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(zip(image_files, predicted.cpu().numpy()))

In [14]:
reverse_emotion_labels = {v: k for k, v in emotion_labels.items()}
predictions = [(img, reverse_emotion_labels[pred]) for img, pred in predictions]

Сохранение модели:

In [15]:
submission_df = pd.DataFrame(predictions, columns=['image_path', 'emotion'])
submission_df.to_csv("submission.csv", index=False)

In [16]:
torch.save(model.state_dict(), "emotion_classifier.pth")

In [19]:
print(submission_df.head(20))

   image_path   emotion
0     514.jpg   neutral
1    4785.jpg       sad
2    3971.jpg     happy
3    4087.jpg       sad
4     601.jpg  surprise
5    3999.jpg   neutral
6     860.jpg   disgust
7     835.jpg   neutral
8     329.jpg  surprise
9    3139.jpg   neutral
10   2560.jpg   neutral
11   2258.jpg       sad
12   2286.jpg       sad
13   2146.jpg      fear
14   1198.jpg       sad
15   3197.jpg     anger
16   1879.jpg  surprise
17    752.jpg   neutral
18    816.jpg     happy
19   4799.jpg   neutral
