# Лабораторная работа 3
### Выполнил: Бейлин Давид Михайлович

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from pathlib import Path
from PIL import Image
import os
import torchvision.transforms as transforms
import numpy as np
import torch.nn.functional as F
from tqdm import tqdm


seed_value = 42
torch.manual_seed(seed_value)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Настройка Google Colab

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# import json
# import os

# def download_contest3_data():
#   # !mkdir ~/.kaggle # закоментить если ругается
#   # !touch ~/.kaggle/kaggle.json # закоментить если ругается

#   # токен надо сгенерировать в личном кабинете на kaggle (https://www.kaggle.com/settings/account)
#   api_token = {"username":"fokuspokus","key":"abrakadabra"}
#   with open('/root/.kaggle/kaggle.json', 'w') as file:
#       json.dump(api_token, file)

#   !chmod 600 ~/.kaggle/kaggle.json

#   !kaggle competitions download -p /content/drive/MyDrive/ -c ml-mipt-2023-contest-3

#   if not os.path.isdir("/content/drive/MyDrive/contest3"):
#     !mkdir /content/drive/MyDrive/contest3

#   !unzip /content/drive/MyDrive/ml-mipt-2023-contest-3.zip -d /content/drive/MyDrive/contest3

# download_contest3_data()

### EDA (исследовательский анализ данных)

In [None]:
from matplotlib import patches
import matplotlib.pyplot as plt


def draw(image, points):
    fig, ax = plt.subplots()
    ax.imshow(image)

    for i in range(0, 18, 2):
        x, y = points[i], points[i + 1]
        circle = patches.Circle((x, y), radius=5, color='red')
        ax.add_patch(circle)

    plt.show()

In [None]:
keypoints = pd.read_csv("data/train_labels.csv")

keypoints = keypoints.drop(index=140)
keypoints = keypoints.drop(index=448)
keypoints = keypoints.set_index('file_name')

### Preprocessing (подготовка данных)

In [None]:
class CatsDataset(Dataset):
    def __init__(self, folder, keypoint_data):
        self.folder = folder
        self.keypoint_data = keypoint_data.copy()
        self.image_paths = [os.path.join(folder, filename) for filename in os.listdir(folder)]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')

        scale_x = 256 / image.size[0]
        scale_y = 256 / image.size[1]
        image = image.resize((256, 256))
        transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
        image = transform(image)

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        image = image.to(device)
        file_name = os.path.basename(image_path)

        points = self.keypoint_data.loc[file_name].values.copy()
        for i in range(points.shape[0]):
          if i % 2 == 0:
            points[i] *= scale_x
          else:
            points[i] *= scale_y
        points = torch.tensor(points, device=device)
        return image.float(), points.float()



class TestDataset(Dataset):
    def __init__(self, folder):
        self.folder = folder
        self.image_paths = [os.path.join(folder, filename) for filename in os.listdir(folder)]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        scale_x = 256 / image.size[0]
        scale_y = 256 / image.size[1]
        image = image.resize((256, 256))
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        image = transform(image)
        image = image.to(device)
        file_name = os.path.basename(image_path)
        return image.float(), file_name, scale_x, scale_y

Аугментация, чтобы лучше обучались перевернутые коты
экспериментировал с кол-вом новых картинок, остановился на оптимальных чуть меньше 50% от датасета

In [None]:
from PIL import Image
import random
import os

def augmentation():
    count_image = 4000
    source_folder = "/content/drive/MyDrive/contest3/images/images/train/"
    output_folder = "/content/drive/MyDrive/contest3/images/images/train/"
    data_folder = "/content/drive/MyDrive/contest3/train_labels.csv"

    image_files = os.listdir(source_folder)

    selected_files = random.sample(image_files, count_image)

    for filename in selected_files:
        image_path = os.path.join(source_folder, filename)
        image = Image.open(image_path)

        rotated_image = image.transpose(Image.FLIP_TOP_BOTTOM)

        output_path = os.path.join(output_folder, f'new_y_{filename}')
        rotated_image.save(output_path)

        coords = keypoints.loc[filename].copy()

        data = [i for i in range(18)]
        for i in range(18):
            if i % 2 == 0:
              data[i] = coords[i]
            else:
              data[i] = image.size[1] - coords[i]

        data[0], data[1], data[2], data[3] = data[2], data[3], data[0], data[1]
        data[6], data[7], data[12], data[13] = data[12], data[13], data[6], data[7]
        data[8], data[9], data[14], data[15] = data[14], data[15], data[8], data[9]
        data[10], data[11], data[16], data[17] = data[16], data[17], data[10], data[11]

        keypoints.loc[f'new_y_{filename}'] = data

        image.close()

In [None]:
from sklearn.model_selection import train_test_split

train_dataset = CatsDataset("/content/drive/MyDrive/contest3/images/images/train/", keypoints)
test_dataset = TestDataset("/content/drive/MyDrive/contest3/images/images/test/")

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

indices = np.arange(len(train_dataset))
train_indices, val_indices = train_test_split(indices, test_size=0.1, random_state=42)

train_loader = DataLoader(torch.utils.data.Subset(train_dataset, train_indices), batch_size=32, shuffle=True)
val_loader = DataLoader(torch.utils.data.Subset(train_dataset, val_indices), batch_size=32, shuffle=False)

### Training/evaluation loop

В этом разделе напишите функцию, принимающую модель, оптимизатор, кол-во эпох, и т.д, которая осуществляет обучение с заданными параметрами. Подумайте, что функция будет возвращать. 

Смысл этого раздела в том, чтобы не дублировать код обучения для каждого эксперимента. А еще на такую функцию легко накинуть перебор гиперпараметров... 

In [None]:
def train(num_epochs, train_load, model, optimizer, criterion):
    model.to(device)

    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        for x_batch, y_batch in tqdm(train_load):
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            train_losses.append(loss.item())

        model.eval()
        with torch.no_grad():
            val_loss = 0.0
            for x_val, y_val in val_loader:
                x_val, y_val = x_val.to(device), y_val.to(device)
                val_outputs = model(x_val)
                val_loss += criterion(val_outputs, y_val).item()

            val_loss /= len(val_loader)
            val_losses.append(val_loss)

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")
    return train_losses, val_losses

### Prediction function

Реализуйте функцию, которая бы делала предсказания. Функция принимает датасет/даталоадер и модель (мб еще что-то). Эта функция нужна вам, чтобы было удобнее считать метрику (по сути она будет склеивать предсказания из батчей в один массив). 

In [None]:
def prediction(model, test_loader):
    model.eval()

    filenames = []
    predictions = []

    with torch.no_grad():
        for images, image_paths, scale_x, scale_y in tqdm(test_loader):
            images = torch.tensor(images, dtype=torch.float32, device='cuda').clone().detach()
            outputs = model(images)
            predicted_keypoints = outputs.cpu().detach().numpy()

            predicted_keypoints[:, 0::2] /= scale_x.unsqueeze(-1)
            predicted_keypoints[:, 1::2] /= scale_y.unsqueeze(-1)

            predictions.append(predicted_keypoints)
            filenames += image_paths


    predictions = np.vstack(predictions)
    csv_columns = ["file_names", "left_eye_x", "left_eye_y", "right_eye_x", "right_eye_y", "mouth_x", "mouth_y", "left_ear_1_x", "left_ear_1_y", "left_ear_2_x", "left_ear_2_y", "left_ear_3_x", "left_ear_3_y", "right_ear_1_x", "right_ear_1_y", "right_ear_2_x", "right_ear_2_y", "right_ear_3_x", "right_ear_3_y"]
    df = pd.DataFrame(predictions, columns=csv_columns[1:])
    df['file_names'] = filenames

    df.to_csv('test_preds.csv', index=False)

    return predictions, filenames

In [None]:
def validation(model, val_load):
    predictions, file_names = prediction(model, val_load)
    total_metrics = 0.0
    for i, file_name in enumerate(file_names):
        targets = torch.tensor(val_indices.loc[file_name].copy())
        preds = torch.tensor(predictions[i])
        mae = torch.mean(torch.abs(preds - targets))
        total_metrics += mae.item()


    for i in range(5):
        file_path = os.path.join("drive/MyDrive/contest3/images/images/train", file_names[i])
        image = Image.open(file_path)
        draw(image, predictions[i])

    average_metrics = total_metrics / file_names.shape[0]
    print("CMAE:", average_metrics)

### Experiments

#### Эксперимент 1

первая попытка наугад

In [None]:
class KeypointCNN(nn.Module):
    def __init__(self):
        super(KeypointCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 64 * 64, 512)
        self.fc2 = nn.Linear(512, 18)

    def forward(self, x):
        x = self.pool(torch.nn.functional.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.nn.functional.relu(self.bn2(self.conv2(x))))
        x = x.view(-1, 64 * 64 * 64)
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x


num_epochs = 10
learning_rate = 0.03
model = KeypointCNN()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.L1Loss()

train_losses, val_losses = train(num_epochs, train_loader, model, optimizer, criterion)

plt.plot(train_losses, label='Training Losses', color='blue')
plt.plot(val_losses, label='Validation Losses', color='red')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Losses')
plt.legend()
plt.show()

validation(model, val_loader)

#### Эксперимент 2

понял что нужно больше эпох и другой лернинг рейт

In [None]:
class KeypointCNN(nn.Module):
    def __init__(self):
        super(KeypointCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 64 * 64, 512)
        self.fc2 = nn.Linear(512, 18)

    def forward(self, x):
        x = self.pool(torch.nn.functional.relu(self.bn1(self.conv1(x))))
        x = self.pool(torch.nn.functional.relu(self.bn2(self.conv2(x))))
        x = x.view(-1, 64 * 64 * 64)
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x


num_epochs = 25
learning_rate = 0.001
model = KeypointCNN()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.L1Loss()

train_losses, val_losses = train(num_epochs, train_loader, model, optimizer, criterion)

plt.plot(train_losses, label='Training Losses', color='blue')
plt.plot(val_losses, label='Validation Losses', color='red')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Losses')
plt.legend()
plt.show()

validation(model, val_loader)

#### Эксперимент 3

добавил больше слоев и новый оптимизатор

In [None]:
class KeypointCNN(nn.Module):
    def __init__(self):
        super(KeypointCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(256 * 16 * 16, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 18)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(nn.ReLU(inplace=True)(self.bn2(self.conv2(x))))
        x = self.pool(F.leaky_relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu6(self.bn4(self.conv4(x))))
        x = x.view(-1, 256 * 16 * 16)
        x = F.relu(self.fc1(x))
        x = F.leaky_relu(self.fc2(x), negative_slope=0.01)
        x = self.fc3(x)
        return x


num_epochs = 25
learning_rate = 0.001
model = KeypointCNN()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
criterion = nn.L1Loss()

train_losses, val_losses = train(num_epochs, train_loader, model, optimizer, criterion)

plt.plot(train_losses, label='Training Losses', color='blue')
plt.plot(val_losses, label='Validation Losses', color='red')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Losses')
plt.legend()
plt.show()

validation(model, val_loader)

#### Эксперимент 4

добавил больше слоев, эпох и аугментацию

In [None]:
class KeypointCNN(nn.Module):
    def __init__(self):
        super(KeypointCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(256)
        self.conv5 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.bn5 = nn.BatchNorm2d(512)
        self.conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=1)
        self.bn6 = nn.BatchNorm2d(1024)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(1024 * 4 * 4, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 18)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        x = self.pool(F.relu(self.bn5(self.conv5(x))))
        x = self.pool(F.relu(self.bn6(self.conv6(x))))
        x = x.view(-1, 1024 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x


num_epochs = 30
learning_rate = 0.001
model = KeypointCNN()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
criterion = nn.L1Loss()

augmentation()
train_dataset = CatsDataset("/content/drive/MyDrive/contest3/images/images/train/", keypoints)
test_dataset = TestDataset("/content/drive/MyDrive/contest3/images/images/test/")
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)
indices = np.arange(len(train_dataset))
train_indices, val_indices = train_test_split(indices, test_size=0.1, random_state=42)
train_loader = DataLoader(torch.utils.data.Subset(train_dataset, train_indices), batch_size=32, shuffle=True)
val_loader = DataLoader(torch.utils.data.Subset(train_dataset, val_indices), batch_size=32, shuffle=False)

train_losses, val_losses = train(num_epochs, train_loader, model, optimizer, criterion)

plt.plot(train_losses, label='Training Losses', color='blue')
plt.plot(val_losses, label='Validation Losses', color='red')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Losses')
plt.legend()
plt.show()

validation(model, val_loader)

Ставил эксперементы с батч сайзом, чем больше тем лучше. На финальной попытке использовал 512, но ресурс закончиля
также в первых попытках не делал ресайз

### Evaluation (оценка качества модели)

In [None]:
validation(model, val_loader)

In [None]:
prediction(model, test_loader)

### Conclusion (Выводы)

Не успел сделать оптюну, чтобы разобраться с гипер параметрами
Мало поработал с аугментацией
Не понял как лучше стакать слои
Получилось справиться с проблемой наклоненных и перевернутых котов, благодаря аугментации
Получил маленькую ошибка на валидации