Let's replace resnet34 by resnet50  
prev model: model_11

In [1]:
import os
import pickle

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import tqdm
from torch.nn import functional as fnn
from torch.utils import data
from torchvision import transforms

from hack_utils import ScaleMinSideToSize, CropCenter, TransformByKeys
from hack_utils import ThousandLandmarksDataset
from hack_utils import restore_landmarks_batch, create_submission
from hack_utils import stqdm

import cv2
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
DATA_DIR = '../data/'
LOCAL_DATA_DIR = '/work/local_data/made_cv/contest1'

In [3]:
GPUs = [0, 1, 2]

In [4]:
NUM_PTS = 971
CROP_SIZE = 128
BATCH_SIZE = 256 * len(GPUs)
LR = 1e-3
NUM_WORKERS = 24
MODEL_NAME = 'model_12'

In [5]:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [6]:
def train(model, loader, loss_fn, optimizer, device):
    model.train()
    train_loss = []
    for batch in stqdm(loader, total=len(loader), desc="training...", leave=True):
        images = batch["image"].to(device)  # B x 3 x CROP_SIZE x CROP_SIZE
        landmarks = batch["landmarks"]  # B x (2 * NUM_PTS)

        pred_landmarks = model(images).cpu()  # B x (2 * NUM_PTS)
        loss = loss_fn(pred_landmarks, landmarks, reduction="mean")
        train_loss.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return np.mean(train_loss)

In [7]:
def validate(model, loader, loss_fn, device):
    model.eval()
    val_loss = []
    for batch in stqdm(loader, total=len(loader), desc="validation...", leave=True):
        images = batch["image"].to(device)
        landmarks = batch["landmarks"]

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
        loss = loss_fn(pred_landmarks, landmarks, reduction="mean")
        val_loss.append(loss.item())

    return np.mean(val_loss)

In [8]:
def predict(model, loader, device):
    model.eval()
    predictions = np.zeros((len(loader.dataset), NUM_PTS, 2))
    for i, batch in enumerate(stqdm(loader, total=len(loader), desc="test prediction...", leave=True)):
        images = batch["image"].to(device)

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
        pred_landmarks = pred_landmarks.numpy().reshape((len(pred_landmarks), NUM_PTS, 2))  # B x NUM_PTS x 2

        fs = batch["scale_coef"].numpy()  # B
        margins_x = batch["crop_margin_x"].numpy()  # B
        margins_y = batch["crop_margin_y"].numpy()  # B
        prediction = restore_landmarks_batch(pred_landmarks, fs, margins_x, margins_y)  # B x NUM_PTS x 2
        predictions[i * loader.batch_size: (i + 1) * loader.batch_size] = prediction

    return predictions

In [None]:
train_transforms = transforms.Compose([
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    TransformByKeys(transforms.ToPILImage(), ("image",)),
    TransformByKeys(transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2), ("image",)),
    TransformByKeys(transforms.ToTensor(), ("image",)),
    TransformByKeys(transforms.Normalize(mean=[0.4782, 0.3758, 0.3277], std=[0.3077, 0.2652, 0.2521]), ("image",)),
    TransformByKeys(transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3)), ("image",)),
])

In [None]:
test_transforms = transforms.Compose([
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    TransformByKeys(transforms.ToPILImage(), ("image",)),
    TransformByKeys(transforms.ToTensor(), ("image",)),
    TransformByKeys(transforms.Normalize(mean=[0.4782, 0.3758, 0.3277], std=[0.3077, 0.2652, 0.2521]), ("image",)),
])

In [None]:
train_dataset = ThousandLandmarksDataset(os.path.join(LOCAL_DATA_DIR, 'train'), train_transforms, split="train")
val_dataset = ThousandLandmarksDataset(os.path.join(LOCAL_DATA_DIR, 'train'), test_transforms, split="val")

Total lines (without header): 393930
Loading train dataset (315144 lines)


HBox(children=(IntProgress(value=0, max=315144), HTML(value='')))


Total lines (without header): 393930
Loading val dataset (78786 lines)


HBox(children=(IntProgress(value=0, max=78786), HTML(value='')))




In [None]:
train_dataloader = data.DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=True,
                                   shuffle=True, drop_last=True)
val_dataloader = data.DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=True,
                                 shuffle=False, drop_last=False)

In [None]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
device

device(type='cuda', index=0)

In [None]:
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2 * NUM_PTS, bias=True)

model = nn.DataParallel(model, device_ids=GPUs)

model.to(device)

DataParallel(
  (module): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
      

In [None]:
optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=0.01, amsgrad=True)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=1)
loss_fn = fnn.mse_loss

In [None]:
N_EPOCHS = 50
best_val_loss = np.inf
for epoch in range(N_EPOCHS):
    train_loss = train(model, train_dataloader, loss_fn, optimizer, device=device)
    val_loss = validate(model, val_dataloader, loss_fn, device=device)
    lr_scheduler.step(val_loss)
    print("Epoch #{:2}:\ttrain loss: {:.5f}\tval loss: {:.5f}".format(epoch, train_loss, val_loss))
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        with open(os.path.join(DATA_DIR, f"{MODEL_NAME}_best.pth"), "wb") as fp:
            torch.save(model.state_dict(), fp)

HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch # 0:	train loss: 199.02148	val loss: 14.87672


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch # 1:	train loss: 7.54943	val loss: 4.93664


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch # 2:	train loss: 4.86735	val loss: 3.65977


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch # 3:	train loss: 4.09229	val loss: 3.17053


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch # 4:	train loss: 3.65468	val loss: 2.95561


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch # 5:	train loss: 3.28813	val loss: 2.92250


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch # 6:	train loss: 3.06327	val loss: 2.46810


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch # 7:	train loss: 2.90490	val loss: 2.47427


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch # 8:	train loss: 2.80344	val loss: 2.31413


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch # 9:	train loss: 2.69297	val loss: 2.35746


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #10:	train loss: 2.61243	val loss: 2.18132


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #11:	train loss: 2.53749	val loss: 2.11509


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #12:	train loss: 2.46126	val loss: 2.12230


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #13:	train loss: 2.40371	val loss: 2.08205


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #14:	train loss: 2.34544	val loss: 2.07477


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #15:	train loss: 2.31871	val loss: 2.06650


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #16:	train loss: 2.25978	val loss: 2.06174


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #17:	train loss: 2.25479	val loss: 1.97144


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #18:	train loss: 2.23378	val loss: 1.99810


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #19:	train loss: 2.14566	val loss: 2.05527


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #20:	train loss: 1.98272	val loss: 1.90754


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #21:	train loss: 1.92421	val loss: 1.88604


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #22:	train loss: 1.88477	val loss: 1.97408


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #23:	train loss: 1.86439	val loss: 1.86356


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #24:	train loss: 1.84017	val loss: 1.90063


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #25:	train loss: 1.80891	val loss: 1.83851


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #26:	train loss: 1.78866	val loss: 1.84557


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #27:	train loss: 1.76101	val loss: 1.83282


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #28:	train loss: 1.73969	val loss: 1.87238


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #29:	train loss: 1.72311	val loss: 1.82877


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='validation...', max=103, style=ProgressStyle(description_widt…


Epoch #30:	train loss: 1.71165	val loss: 1.84644


HBox(children=(IntProgress(value=0, description='training...', max=410, style=ProgressStyle(description_width=…

In [26]:
f'Best validation loss: {best_val_loss}'

'Best validation loss: 1.7659767854560926'

In [None]:
with open(os.path.join(DATA_DIR, f"{MODEL_NAME}_best.pth"), "rb") as fp:
    best_state_dict = torch.load(fp, map_location="cpu")
    model.load_state_dict(best_state_dict)

In [None]:
test_dataset = ThousandLandmarksDataset(os.path.join(LOCAL_DATA_DIR, 'test'), test_transforms, split="test")
test_dataloader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, pin_memory=True,
                                  shuffle=False, drop_last=False)

In [None]:
test_predictions = predict(model, test_dataloader, device)
with open(os.path.join(DATA_DIR, f"{MODEL_NAME}_test_predictions.pkl"), "wb") as fp:
    pickle.dump({"image_names": test_dataset.image_names,
                 "landmarks": test_predictions}, fp)

In [None]:
create_submission(DATA_DIR, test_predictions,
                 os.path.join(DATA_DIR, f"{MODEL_NAME}_submit_val_loss_{best_val_loss:.5f}.csv"))

In [None]:
TEST_PREDICTIONS_FILENAME = os.path.join(DATA_DIR, f"{MODEL_NAME}_test_predictions.pkl")
NUM_IMAGES_TO_SHOW = 16
NUM_COLS = 4
NUM_ROWS = NUM_IMAGES_TO_SHOW // NUM_COLS + int(NUM_IMAGES_TO_SHOW % NUM_COLS != 0)

In [None]:
def draw_landmarks(image, landmarks):
    for point in landmarks:
        x, y = point.astype(np.int)
        cv2.circle(image, (x, y), 1, (128, 0, 128), 1, -1)
    return image

In [None]:
with open(TEST_PREDICTIONS_FILENAME, "rb") as fp:
    results = pickle.load(fp)
image_names = results["image_names"]
landmarks = results["landmarks"]

print("Images:", len(image_names))
print("Landmarks shape:", landmarks.shape)

if len(image_names) < NUM_IMAGES_TO_SHOW:
    raise RuntimeError(f"Choose less images to show, you have only {len(image_names)}")

In [None]:
random_idxs = np.random.choice(len(image_names), size=min(NUM_IMAGES_TO_SHOW, len(image_names)), replace=False)

plt.figure(figsize=(25, NUM_ROWS * 8))
for i, idx in enumerate(random_idxs, 1):
    image = cv2.imread(image_names[idx])
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = draw_landmarks(image, landmarks[idx])
    
    plt.subplot(NUM_ROWS, NUM_COLS, i)
    plt.imshow(image)

plt.tight_layout()
plt.show()