#### Подготовка данных
Решил посмотреть как выглядят картинки на которых мы ошибаемся больше всего и увидел, что `train` содержит изображения со сдвинутой разметкой. 

Для их обнаружения обучается такая же модель, но на всех картинках, включая *испорченные*. После этого перебираются все изображения и делается на них предсказание. Если MSE больше 100 (просто константа), считаем, что на этой картинке неадекватная разметка.
 

In [None]:
import os
import pickle
import sys

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import tqdm.notebook as tqdm

from torch.nn import functional as fnn
from torch.utils import data
from torchvision import transforms
import matplotlib.pyplot as plt
import cv2
import pandas as pd

from utils import NUM_PTS, CROP_SIZE
from utils import train,validate,predict
from utils import ScaleMinSideToSize,CropCenter,TransformByKeys
from utils import ThousandLandmarksDataset
from utils import restore_landmarks_batch, create_submission,restore_landmarks_batch_ex,draw_landmarks


torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
use_gpu = True
data_size = None

In [None]:
data_dir = "C:/_Data/full/"
learning_rate = 1e-3
batch_size = 192
epochs = 30
prj_name = "test"
# data_size = 40000
# data_size = 300

In [None]:
# метрика, которая учитывает масштабирование изображений
class MseW(torch.nn.Module):
    def __init__(self):
        super(MseW,self).__init__()

    def setWeight(self, weight):
        self.w = weight
        
    def forward(self, outputs, labels):
        mse = torch.mul(outputs - labels,outputs - labels).mean(axis=1)        
        mse=torch.mul(mse,self.w).mean(axis=0)
        mse=mse.mean(axis=0)
        mse=2*mse
        return mse

In [None]:
class CropRandom(object):
    def __init__(self, size=CROP_SIZE, elem_name='image'):
        self.size = torch.tensor(size, dtype=torch.float)
        self.elem_name = elem_name

    def __call__(self, sample):
        if 'landmarks' in sample:
            img = sample[self.elem_name] #.copy()
            landmarks = sample['landmarks'].reshape(-1, 2)
            bound=landmarks[:,0].min(),landmarks[:,1].min(),landmarks[:,0].max(),landmarks[:,1].max()                        
            h,w,_ = img.shape
            min_sq = max(bound[3]-bound[1],bound[2]-bound[0])
            max_sq = min(w,h)            
            if min_sq+1<max_sq-1:
                sq = np.random.randint(min_sq+1,max_sq-1)
            else:
                sq = max_sq-1                
            
            min_dx = max(bound[2]-sq,0)
            max_dx = min(w-sq,bound[0])
            if min_dx<max_dx:
                dx = np.random.randint(min_dx,max_dx)
            else:
                dx = int(min_dx)
            
            min_dy = max(bound[3]-sq,0)
            max_dy = min(h-sq,bound[1])
            if min_dy<max_dy:
                dy = np.random.randint(min_dy,max_dy)
            else:                
                dy = int(min_dy)
                
            landmarks -= torch.tensor((dx, dy), dtype=landmarks.dtype)[None, :]                        
            sample['landmarks'] = landmarks.reshape(-1)
            sample[self.elem_name] = img[dy:dy+sq, dx:dx+sq]
            sample['dx'] = torch.tensor(dx,dtype=torch.short)
            sample['dy'] = torch.tensor(dy,dtype=torch.short)
        else:
            raise RuntimeError(f"stop")
            sample['dx'] = torch.tensor(0)
            sample['dy'] = torch.tensor(0)
        return sample    

In [None]:
train_loss_fn = fnn.mse_loss
valid_loss_fn = MseW()

In [None]:
device = torch.device("cuda: 0") if use_gpu else torch.device("cpu")

In [None]:
# pipeline при обучении
train_transforms = transforms.Compose([
#     RandomFlipV(),
    CropRandom(),    
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),   
    TransformByKeys(transforms.ToPILImage(), ("image",)),
    TransformByKeys(transforms.ToTensor(), ("image",)),
    TransformByKeys(transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ("image",)),
])
# pipeline при валидации и предсказании
val_transforms = transforms.Compose([
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    TransformByKeys(transforms.ToPILImage(), ("image",)),
    TransformByKeys(transforms.ToTensor(), ("image",)),
    TransformByKeys(transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ("image",)),
])

In [None]:
%%time
print("Reading data...")
train_dataset = ThousandLandmarksDataset(os.path.join(data_dir, 'train'), train_transforms, split="train",size = data_size) 
train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, num_workers=0, pin_memory=True,drop_last=True,
                                   shuffle=True)
print(len(train_dataset))

In [None]:
val_dataset = ThousandLandmarksDataset(os.path.join(data_dir, 'train'), val_transforms, split="val",size = data_size)
val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, num_workers=0, pin_memory=True,drop_last=False,shuffle=False)
print(len(val_dataset))

In [None]:
print("Creating model...")
device = torch.device("cuda: 0") if use_gpu else torch.device("cpu")
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2 * NUM_PTS, bias=True)
model.to(device)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, amsgrad=True)

In [None]:
# 2. train & validate
print("Ready for training...")
best_val_loss = np.inf
for epoch in range(0,epochs):    
    train_loss = train(model, train_dataloader, train_loss_fn, optimizer, device=device)
    val_loss = validate(model, val_dataloader, valid_loss_fn, device=device)
    print("Epoch #{:2}:\ttrain loss: {:5.4f}\tval loss: {:5.4f}".format(epoch, train_loss, val_loss))    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        with open(f"{prj_name}_best.pth", "wb") as fp:
            torch.save(model.state_dict(), fp)
    with open(f"{prj_name}_"+str(epoch)+".pth", "wb") as fp:
            torch.save(model.state_dict(), fp)


Предскажем значения для валидационного множества

In [None]:
def dataset_landmark_to_pred(ds):
    lm = ds['landmarks'].numpy().copy()
    dx = ds['crop_margin_x']
    dy = ds['crop_margin_y']    
    for ix in range(0,len(lm),2):
        lm[ix]+=dx
    for iy in range(1,len(lm),2):
        lm[iy]+=dy
    coef = ds['scale_coef'].numpy()    
    lm=lm/coef
    lm = lm.reshape(-1,2)
    return lm

def calc_err(idx,landmarks,val_dataset,loss_fn):
    return loss_fn(torch.tensor(landmarks[idx]), torch.tensor(dataset_landmark_to_pred(val_dataset[idx]))).numpy()

def show_dataset_image(fn,ds):    
    image = cv2.imread(fn)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)    
#     dx = ds['crop_margin_x']
#     dy = ds['crop_margin_y']
    lm = dataset_landmark_to_pred(ds)
    image = draw_landmarks(image, lm)    
    plt.imshow(image)

def show_predict_image(fn,lm):
    image = cv2.imread(fn)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = draw_landmarks(image, lm)
    plt.imshow(image)    

In [None]:
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2 * NUM_PTS, bias=True)
model.load_state_dict(torch.load(f"{prj_name}_best.pth"))

model.to(device)
model.eval()

In [None]:
%%time
print("Reading data...")
# поменяли трансформацию на валидационную, чтобы убрать рандом
train_dataset = ThousandLandmarksDataset(os.path.join(data_dir, 'train'), val_transforms, split="train",size = data_size) 
train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, num_workers=0, pin_memory=True,drop_last=False,shuffle=False)# уберем перемешивание
print(len(train_dataset))
train_predictions = predict(model, train_dataloader, device)

In [None]:
val_predictions = predict(model, val_dataloader, device)

Хочется сравнить разметку и результат работы модели

In [None]:
%%time
dataset = train_dataset
landmarks = train_predictions
max_len = len(dataset)
print(max_len)
max_err_idxs = []
for i in range(0,max_len):
    err = calc_err(i,landmarks,dataset,train_loss_fn)
    if err>100:
        max_err_idxs.append(i)
        print(i,err)
print(max_err_idxs)

In [None]:
%%time
dataset = val_dataset
landmarks = val_predictions
max_len = len(dataset)
print(max_len)
max_err_idxs_val = []
for i in range(0,max_len):
    err = calc_err(i,landmarks,dataset,train_loss_fn)
    if err>100:
        max_err_idxs_val.append(i)
        print(i,err)
print(max_err_idxs_val)

In [None]:
ignore_image = set()
for idx in max_err_idxs:
    ignore_image.add(os.path.basename(train_dataset.image_names[idx]))
    
for idx in max_err_idxs_val:    
    ignore_image.add(os.path.basename(val_dataset.image_names[idx]))
print(len(ignore_image))
with open("ignore_images_.lst", "wt") as fp:
    for s in ignore_image:    
        print(s, file = fp)

In [None]:
NUM_IMAGES_TO_SHOW = 16
NUM_COLS = 4
NUM_ROWS = NUM_IMAGES_TO_SHOW // NUM_COLS + int(NUM_IMAGES_TO_SHOW % NUM_COLS != 0)

plt.figure(figsize=(25, NUM_ROWS * 8))
for i, idx in enumerate(max_err_idxs_val[:16], 1):    
    plt.subplot(NUM_ROWS, NUM_COLS, i)
    show_predict_image(val_dataset.image_names[idx],val_predictions[idx])
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(25, NUM_ROWS * 8))
for i, idx in enumerate(max_err_idxs_val[:16], 1):    
    plt.subplot(NUM_ROWS, NUM_COLS, i)
    show_dataset_image(val_dataset.image_names[idx],val_dataset[idx])
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(25, NUM_ROWS * 8))
for i, idx in enumerate(max_err_idxs[:16], 1):    
    plt.subplot(NUM_ROWS, NUM_COLS, i)
    show_predict_image(train_dataset.image_names[idx],train_predictions[idx])
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(25, NUM_ROWS * 8))
for i, idx in enumerate(max_err_idxs[:16], 1):    
    plt.subplot(NUM_ROWS, NUM_COLS, i)
    show_dataset_image(train_dataset.image_names[idx],train_dataset[idx])
plt.tight_layout()
plt.show()