# Загружаем данные

In [None]:
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1XBOW8M8zHboa7g3W28o_TRcFGNWmWFhb' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1XBOW8M8zHboa7g3W28o_TRcFGNWmWFhb" -O made_cv_hw1_data.zip && rm -rf /tmp/cookies.txt
#!unzip ./made_cv_hw1_data.zip

import zipfile
with zipfile.ZipFile('made_cv_hw1_data.zip', 'r') as zip_ref:
    zip_ref.extractall()

# Скопировал нейобходимые куски кода из hack_train.py и hack_utils.py и далее использовал их за основу

# Загружаем библиотеки

In [1]:
import os
import pickle
import sys
from argparse import ArgumentParser

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import tqdm
from torch.nn import functional as fnn
from torch.utils import data
from torchvision import transforms

# Оптимизация обучения, если граф вычеслений остается неизменным

In [2]:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Функции и классы для обработки изображений

In [3]:
import os
import tqdm
import cv2
import numpy as np
import pandas as pd
import torch
from torch.utils import data

np.random.seed(1234)
torch.manual_seed(1234)

TRAIN_SIZE = 0.8
NUM_PTS = 971
CROP_SIZE = 128
SUBMISSION_HEADER = "file_name,Point_M0_X,Point_M0_Y,Point_M1_X,Point_M1_Y,Point_M2_X,Point_M2_Y,Point_M3_X,Point_M3_Y,Point_M4_X,Point_M4_Y,Point_M5_X,Point_M5_Y,Point_M6_X,Point_M6_Y,Point_M7_X,Point_M7_Y,Point_M8_X,Point_M8_Y,Point_M9_X,Point_M9_Y,Point_M10_X,Point_M10_Y,Point_M11_X,Point_M11_Y,Point_M12_X,Point_M12_Y,Point_M13_X,Point_M13_Y,Point_M14_X,Point_M14_Y,Point_M15_X,Point_M15_Y,Point_M16_X,Point_M16_Y,Point_M17_X,Point_M17_Y,Point_M18_X,Point_M18_Y,Point_M19_X,Point_M19_Y,Point_M20_X,Point_M20_Y,Point_M21_X,Point_M21_Y,Point_M22_X,Point_M22_Y,Point_M23_X,Point_M23_Y,Point_M24_X,Point_M24_Y,Point_M25_X,Point_M25_Y,Point_M26_X,Point_M26_Y,Point_M27_X,Point_M27_Y,Point_M28_X,Point_M28_Y,Point_M29_X,Point_M29_Y\n"


class ScaleMinSideToSize(object):
    def __init__(self, size=(CROP_SIZE, CROP_SIZE), elem_name='image'):
        self.size = torch.tensor(size, dtype=torch.float)
        self.elem_name = elem_name

    def __call__(self, sample):
        h, w, _ = sample[self.elem_name].shape
        if h > w:
            f = self.size[0] / w
        else:
            f = self.size[1] / h

        sample[self.elem_name] = cv2.resize(sample[self.elem_name], None, fx=f, fy=f, interpolation=cv2.INTER_AREA)
        sample["scale_coef"] = f

        if 'landmarks' in sample:
            landmarks = sample['landmarks'].reshape(-1, 2).float()
            landmarks = landmarks * f
            sample['landmarks'] = landmarks.reshape(-1)

        return sample


class CropCenter(object):
    def __init__(self, size=128, elem_name='image'):
        self.size = size
        self.elem_name = elem_name

    def __call__(self, sample):
        img = sample[self.elem_name]
        h, w, _ = img.shape
        margin_h = (h - self.size) // 2
        margin_w = (w - self.size) // 2
        sample[self.elem_name] = img[margin_h:margin_h + self.size, margin_w:margin_w + self.size]
        sample["crop_margin_x"] = margin_w
        sample["crop_margin_y"] = margin_h

        if 'landmarks' in sample:
            landmarks = sample['landmarks'].reshape(-1, 2)
            landmarks -= torch.tensor((margin_w, margin_h), dtype=landmarks.dtype)[None, :]
            sample['landmarks'] = landmarks.reshape(-1)

        return sample


class TransformByKeys(object):
    def __init__(self, transform, names):
        self.transform = transform
        self.names = set(names)

    def __call__(self, sample):
        for name in self.names:
            if name in sample:
                sample[name] = self.transform(sample[name])

        return sample


class ThousandLandmarksDataset(data.Dataset):
    def __init__(self, root, transforms, split="train"):
        super(ThousandLandmarksDataset, self).__init__()
        self.root = root
        landmark_file_name = os.path.join(root, 'landmarks.csv') if split is not "test" \
            else os.path.join(root, "test_points.csv")
        images_root = os.path.join(root, "images")

        self.image_names = []
        self.landmarks = []

        with open(landmark_file_name, "rt") as fp:
            num_lines = sum(1 for line in fp)
        num_lines -= 1  # header

        with open(landmark_file_name, "rt") as fp:
            i_range=tqdm.tqdm(enumerate(fp))
            for i, line in i_range:
                if i == 0:
                    continue  # skip header
                if split == "train" and i == int(TRAIN_SIZE * num_lines):
                    break  # reached end of train part of data
                elif split == "val" and i < int(TRAIN_SIZE * num_lines):
                    continue  # has not reached start of val part of data
                elements = line.strip().split("\t")
                image_name = os.path.join(images_root, elements[0])
                self.image_names.append(image_name)

                if split in ("train", "val"):
                    landmarks = list(map(np.int16, elements[1:]))
                    landmarks = np.array(landmarks, dtype=np.int16).reshape((len(landmarks) // 2, 2))
                    self.landmarks.append(landmarks)
                i_range.refresh()
        i_range.close()
        if split in ("train", "val"):
            self.landmarks = torch.as_tensor(self.landmarks)
        else:
            self.landmarks = None

        self.transforms = transforms

    def __getitem__(self, idx):
        sample = {}
        if self.landmarks is not None:
            landmarks = self.landmarks[idx]
            sample["landmarks"] = landmarks

        image = cv2.imread(self.image_names[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        sample["image"] = image

        if self.transforms is not None:
            sample = self.transforms(sample)

        return sample

    def __len__(self):
        return len(self.image_names)


def restore_landmarks(landmarks, f, margins):
    dx, dy = margins
    landmarks[:, 0] += dx
    landmarks[:, 1] += dy
    landmarks /= f
    return landmarks


def restore_landmarks_batch(landmarks, fs, margins_x, margins_y):
    landmarks[:, :, 0] += margins_x[:, None]
    landmarks[:, :, 1] += margins_y[:, None]
    landmarks /= fs[:, None, None]
    return landmarks


def create_submission(path_to_data, test_predictions, path_to_submission_file):
    test_dir = os.path.join(path_to_data, "test")

    output_file = path_to_submission_file
    wf = open(output_file, 'w')
    wf.write(SUBMISSION_HEADER)

    mapping_path = os.path.join(test_dir, 'test_points.csv')
    mapping = pd.read_csv(mapping_path, delimiter='\t')

    for i, row in mapping.iterrows():
        file_name = row[0]
        point_index_list = np.array(eval(row[1]))
        points_for_image = test_predictions[i]
        needed_points = points_for_image[point_index_list].astype(np.int)
        wf.write(file_name + ',' + ','.join(map(str, needed_points.reshape(2 * len(point_index_list)))) + '\n')

# Функции для обучения, валидации и предсказания

In [4]:
def train(model, loader, loss_fn, optimizer, device):
    model.train()
    train_loss = []
    i_range=tqdm.tqdm(loader, total=len(loader), desc="training...")
    for batch in i_range:
        images = batch["image"].to(device)  # B x 3 x CROP_SIZE x CROP_SIZE
        landmarks = batch["landmarks"]  # B x (2 * NUM_PTS)

        pred_landmarks = model(images).cpu()  # B x (2 * NUM_PTS)
        loss = loss_fn(pred_landmarks, landmarks) #, reduction="mean"
        train_loss.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #scheduler.step()
        i_range.refresh()
    i_range.close()
    return np.mean(train_loss)


def validate(model, loader, loss_fn, device):
    model.eval()
    val_loss = []
    i_range=tqdm.tqdm(loader, total=len(loader), desc="validation...")
    for batch in i_range:
        images = batch["image"].to(device)
        landmarks = batch["landmarks"]

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
        loss = loss_fn(pred_landmarks, landmarks) #, reduction="mean"
        val_loss.append(loss.item())
        i_range.refresh()
    i_range.close()
    return np.mean(val_loss)


def predict(model, loader, device):
    model.eval()
    predictions = np.zeros((len(loader.dataset), NUM_PTS, 2))
    i_range=tqdm.tqdm(loader, total=len(loader), desc="test prediction...")
    for i, batch in enumerate(i_range):
        images = batch["image"].to(device)

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
        pred_landmarks = pred_landmarks.numpy().reshape((len(pred_landmarks), NUM_PTS, 2))  # B x NUM_PTS x 2

        fs = batch["scale_coef"].numpy()  # B
        margins_x = batch["crop_margin_x"].numpy()  # B
        margins_y = batch["crop_margin_y"].numpy()  # B
        prediction = restore_landmarks_batch(pred_landmarks, fs, margins_x, margins_y)  # B x NUM_PTS x 2
        predictions[i * loader.batch_size: (i + 1) * loader.batch_size] = prediction
        i_range.refresh()
    i_range.close()
    return predictions

# Читаем и трансформируем данные для обучения. Размер батча - 256

In [5]:
# 1. prepare data & models
train_transforms = transforms.Compose([
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    TransformByKeys(transforms.ToPILImage(), ("image",)),
    TransformByKeys(transforms.ToTensor(), ("image",)),
    TransformByKeys(transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ("image",)),
])

In [6]:
print("Reading data...")
train_dataset = ThousandLandmarksDataset(os.path.join('./data/', 'train'), train_transforms, split="train")
train_dataloader = data.DataLoader(train_dataset, batch_size=256, num_workers=16, pin_memory=True,
                                    shuffle=True, drop_last=True)

Reading data...


315115it [10:55, 480.48it/s]


# Читаем и трансформируем данные для валидации. Размер батча - 256

In [7]:
val_dataset = ThousandLandmarksDataset(os.path.join('./data/', 'train'), train_transforms, split="val")
val_dataloader = data.DataLoader(val_dataset, batch_size=256, num_workers=16, pin_memory=True,
                                  shuffle=False, drop_last=False)

393931it [02:42, 2418.51it/s]  


# Генерируем название модели

In [8]:
name_try = '10_epoch_resnet50_l1'

# Пробовал WingLoss в качестве функции потерь

In [None]:
import torch
from torch import nn
import math

class WingLoss(nn.Module):
    def __init__(self, omega=10, epsilon=2):
        super(WingLoss, self).__init__()
        self.omega = omega
        self.epsilon = epsilon

    def forward(self, pred, target):
        y = target
        y_hat = pred
        delta_y = (y - y_hat).abs()
        delta_y1 = delta_y[delta_y < self.omega]
        delta_y2 = delta_y[delta_y >= self.omega]
        loss1 = self.omega * torch.log(1 + delta_y1 / self.epsilon)
        C = self.omega - self.omega * math.log(1 + self.omega / self.epsilon)
        loss2 = delta_y2 - C
        return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2))

# Определяем модель, функцию потерь, метод оптимизации. В качестве функции потерь лучше всего себя показала smooth_l1_loss. Изменение learning rate у меня не зашло

In [9]:
print("Creating model...")
# device = torch.device("cuda: 0") if args.gpu else torch.device("cpu")
device = torch.device("cuda: 0")
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2 * NUM_PTS, bias=True)
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-3, amsgrad=True)
#loss_fn = fnn.mse_loss
loss_fn = fnn.smooth_l1_loss
#loss_fn = WingLoss()
#lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.2)

Creating model...


# Обучение модели

In [10]:
# 2. train & validate
print("Ready for training...")
best_val_loss = np.inf
for epoch in range(10):
    train_loss = train(model, train_dataloader, loss_fn, optimizer, device=device)
    #lr_scheduler.step()
    val_loss = validate(model, val_dataloader, loss_fn, device=device)
    print("Epoch #{:2}:\ttrain loss: {:5.2}\tval loss: {:5.2}".format(epoch, train_loss, val_loss))
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        with open(f"{name_try}_best.pth", "wb") as fp:
            torch.save(model.state_dict(), fp)

training...:   0%|          | 0/1231 [00:00<?, ?it/s]

Ready for training...


training...: 100%|██████████| 1231/1231 [20:44<00:00,  1.01s/it]
validation...: 100%|██████████| 308/308 [01:44<00:00,  2.94it/s]
training...:   0%|          | 0/1231 [00:00<?, ?it/s]

Epoch # 0:	train loss:   2.6	val loss:  0.89


training...: 100%|██████████| 1231/1231 [21:04<00:00,  1.03s/it]
validation...: 100%|██████████| 308/308 [01:46<00:00,  2.91it/s]


Epoch # 1:	train loss:  0.83	val loss:  0.84


training...: 100%|██████████| 1231/1231 [21:04<00:00,  1.03s/it]
validation...: 100%|██████████| 308/308 [01:46<00:00,  2.89it/s]


Epoch # 2:	train loss:  0.73	val loss:  0.63


training...: 100%|██████████| 1231/1231 [21:04<00:00,  1.03s/it]
validation...: 100%|██████████| 308/308 [01:48<00:00,  2.85it/s]
training...:   0%|          | 0/1231 [00:00<?, ?it/s]

Epoch # 3:	train loss:  0.68	val loss:   0.8


training...: 100%|██████████| 1231/1231 [21:04<00:00,  1.03s/it]
validation...: 100%|██████████| 308/308 [01:46<00:00,  2.88it/s]


Epoch # 4:	train loss:  0.64	val loss:  0.55


training...: 100%|██████████| 1231/1231 [21:03<00:00,  1.03s/it]
validation...: 100%|██████████| 308/308 [01:44<00:00,  2.95it/s]
training...:   0%|          | 0/1231 [00:00<?, ?it/s]

Epoch # 5:	train loss:   0.6	val loss:  0.58


training...: 100%|██████████| 1231/1231 [21:03<00:00,  1.03s/it]
validation...: 100%|██████████| 308/308 [01:46<00:00,  2.89it/s]
training...:   0%|          | 0/1231 [00:00<?, ?it/s]

Epoch # 6:	train loss:  0.58	val loss:   0.6


training...: 100%|██████████| 1231/1231 [20:47<00:00,  1.01s/it]
validation...: 100%|██████████| 308/308 [01:50<00:00,  2.79it/s]
training...:   0%|          | 0/1231 [00:00<?, ?it/s]

Epoch # 7:	train loss:  0.55	val loss:   0.6


training...: 100%|██████████| 1231/1231 [21:02<00:00,  1.03s/it]
validation...: 100%|██████████| 308/308 [01:46<00:00,  2.89it/s]


Epoch # 8:	train loss:  0.52	val loss:  0.55


training...: 100%|██████████| 1231/1231 [21:03<00:00,  1.03s/it]
validation...: 100%|██████████| 308/308 [01:45<00:00,  2.92it/s]


Epoch # 9:	train loss:   0.5	val loss:  0.49


# Читаем и трансформируем данные для предсказания.

In [11]:
test_dataset = ThousandLandmarksDataset(os.path.join('./data/', 'test'), train_transforms, split="test")
test_dataloader = data.DataLoader(test_dataset, batch_size=256, num_workers=4, pin_memory=True,
                                  shuffle=False, drop_last=False)

2892it [00:01, 2026.64it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

7594it [00:03, 2036.20it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

12550it [00:06, 2059.24it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=100

# Генерируем предсказание для загрузки на kaggle

In [12]:
with open(f"{name_try}_best.pth", "rb") as fp:
    best_state_dict = torch.load(fp, map_location="cpu")
    model.load_state_dict(best_state_dict)

test_predictions = predict(model, test_dataloader, device)
with open(f"{name_try}_test_predictions.pkl", "wb") as fp:
    pickle.dump({"image_names": test_dataset.image_names,
                  "landmarks": test_predictions}, fp)

create_submission('./data/', test_predictions, f"{name_try}_submit.csv")

test prediction...: 100%|██████████| 390/390 [04:27<00:00,  1.46it/s]
