In [2]:
# Uncomment to install albumentations
#!pip install -U git+https://github.com/albu/albumentations --no-cache-dir

In [3]:
import os
import pickle
import sys
from argparse import ArgumentParser
import albumentations as albu

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import tqdm
from torch.nn import functional as fnn
from torch.utils import data
from torchvision import transforms

from hack_utils import NUM_PTS, CROP_SIZE
from hack_utils import ScaleMinSideToSize, CropCenter, TransformByKeys
from hack_utils import ThousandLandmarksDataset
from hack_utils import restore_landmarks_batch, create_submission

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [4]:
def train(model, loader, loss_fn, optimizer, device):
    model.train()
    train_loss = []
    for batch in tqdm.tqdm(loader, total=len(loader), desc="training..."):
        images = batch["image"].to(device)  # B x 3 x CROP_SIZE x CROP_SIZE
        landmarks = batch["landmarks"]  # B x (2 * NUM_PTS)

        pred_landmarks = model(images).cpu()  # B x (2 * NUM_PTS)
        loss = loss_fn(pred_landmarks, landmarks, reduction="mean")
        train_loss.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return np.mean(train_loss)

In [5]:
def validate(model, loader, loss_fn, device):
    model.eval()
    val_loss = []
    for batch in tqdm.tqdm(loader, total=len(loader), desc="validation..."):
        images = batch["image"].to(device)
        landmarks = batch["landmarks"]

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
        loss = loss_fn(pred_landmarks, landmarks, reduction="mean")
        val_loss.append(loss.item())

    return np.mean(val_loss)

In [6]:
def predict(model, loader, device):
    model.eval()
    predictions = np.zeros((len(loader.dataset), NUM_PTS, 2))
    for i, batch in enumerate(tqdm.tqdm(loader, total=len(loader), desc="test prediction...")):
        images = batch["image"].to(device)

        with torch.no_grad():
            pred_landmarks = model(images).cpu()
        pred_landmarks = pred_landmarks.numpy().reshape((len(pred_landmarks), NUM_PTS, 2))  # B x NUM_PTS x 2

        fs = batch["scale_coef"].numpy()  # B
        margins_x = batch["crop_margin_x"].numpy()  # B
        margins_y = batch["crop_margin_y"].numpy()  # B
        prediction = restore_landmarks_batch(pred_landmarks, fs, margins_x, margins_y)  # B x NUM_PTS x 2
        predictions[i * loader.batch_size: (i + 1) * loader.batch_size] = prediction

    return predictions

In [7]:
NAME = "resneXt50_bs848_epochs24"
DATA = "data"
BATCH_SIZE = 848
EPOCHS = 24
LEARNING_RATE = 1e-3
GPU = True

In [6]:
# 1. prepare data & models
train_transforms = transforms.Compose([
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    TransformByKeys(transforms.ToPILImage(), ("image",)),     
    TransformByKeys(transforms.ToTensor(), ("image",)),
    TransformByKeys(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ("image",)),
])
    
val_transforms = transforms.Compose([
    ScaleMinSideToSize((CROP_SIZE, CROP_SIZE)),
    CropCenter(CROP_SIZE),
    TransformByKeys(transforms.ToPILImage(), ("image",)),     
    TransformByKeys(transforms.ToTensor(), ("image",)),
    TransformByKeys(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ("image",)),
])

albumentations = [#albu.ShiftScaleRotate(),
#                    albu.IAAPerspective(),
                    albu.HueSaturationValue(p=0.3),
#                    albu.RandomBrightnessContrast(),
#                    albu.GaussNoise(p=0.3),
#                    albu.GaussianBlur(p=0.1),
                    albu.Blur(p=0.1),
                    albu.ChannelShuffle(p=0.2)
#                    albu.CLAHE()
                      ]

print("Reading data...")
train_dataset = ThousandLandmarksDataset(os.path.join(DATA, 'train'), train_transforms, albumentations, split="train")
train_dataloader = data.DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True,
                                    shuffle=True, drop_last=True)
val_dataset = ThousandLandmarksDataset(os.path.join(DATA, 'train'), val_transforms, split="val")
val_dataloader = data.DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True,
                                    shuffle=False, drop_last=False)

Reading data...
Cook train data from csv...
Chunk... 0... 1... 2... 3... 4... 5... 6... 7... Finish
Convert to tensor... Implementing transforms...
Applying augmentations...
Finish
Cook val data from csv...
Chunk... 0... 1... 2... 3... 4... 5... 6... 7... Finish
Convert to tensor... Implementing transforms...
Finish


In [8]:
torch.cuda.empty_cache()

In [9]:
print("Creating model...")
device = torch.device("cuda: 0") if torch.cuda.is_available() else torch.device("cpu")
print(device)
model = models.resnext50_32x4d(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2 * NUM_PTS, bias=True)
#   model.load_state_dict(torch.load('resneXt50_bs848_epochs6_best.pth', map_location='cpu'))

if torch.cuda.device_count() > 1:
    print(f'Using {torch.cuda.device_count()} gpus')
    model = nn.DataParallel(model)
        
model.to(device)

optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, amsgrad=True)
#   optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, amsgrad=False)
#   optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9, nesterov=True) #, weight_decay=0.0005)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, factor=0.1)
loss_fn = fnn.mse_loss

Creating model...
cpu


Downloading: "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth" to C:\Users\svizo/.cache\torch\checkpoints\resnext50_32x4d-7cdf4587.pth


HBox(children=(IntProgress(value=0, max=100441675), HTML(value='')))




In [10]:
# Freeze some layers
for param in model.conv1.parameters():
    param.requires_grad = False

for param in model.bn1.parameters():
    param.requires_grad = False

for param in model.relu.parameters():
    param.requires_grad = False

for param in model.maxpool.parameters():
    param.requires_grad = False

for param in model.layer1.parameters():
    param.requires_grad = False

for i in range(2):
    for param in model.layer2[i].parameters():
        param.requires_grad = False

In [11]:
# Check number of parameters to train
model_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
model_total_params

26111894

In [17]:
# 2. train & validate
print("Ready for training...")
best_val_loss = np.inf

for epoch in range(EPOCHS):
    train_loss = train(model, train_dataloader, loss_fn, optimizer, device=device)
    val_loss = validate(model, val_dataloader, loss_fn, device=device)
    scheduler.step(val_loss)
    print("Epoch #{:2}:\ttrain loss: {:5.3}\tval loss: {:5.3}".format(epoch, train_loss, val_loss))
    print(f'lr = {optimizer.param_groups[0]["lr"]}')
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        with open(f"{NAME}_best.pth", "wb") as fp:
            torch.save(model.state_dict(), fp)


training...:   0%|          | 0/371 [00:00<?, ?it/s][A

Ready for training...



training...:   0%|          | 1/371 [00:12<1:19:04, 12.82s/it][A
training...:   1%|          | 2/371 [00:16<1:02:02, 10.09s/it][A
training...:   1%|          | 3/371 [00:21<51:35,  8.41s/it]  [A
training...:   1%|          | 4/371 [00:25<44:27,  7.27s/it][A
training...:   1%|▏         | 5/371 [00:30<39:17,  6.44s/it][A
training...:   2%|▏         | 6/371 [00:34<35:41,  5.87s/it][A
training...:   2%|▏         | 7/371 [00:39<33:09,  5.47s/it][A
training...:   2%|▏         | 8/371 [00:43<31:23,  5.19s/it][A

KeyboardInterrupt: ignored

In [18]:
# 2.5. More train & validate 
print("Once again ready for training...")
EPOCHS = 3
LEARNING_RATE = 1e-4
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, amsgrad=True)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, factor=0.5)

for epoch in range(EPOCHS):
    train_loss = train(model, train_dataloader, loss_fn, optimizer, device=device)
    val_loss = validate(model, val_dataloader, loss_fn, device=device)
    scheduler.step(val_loss)
    print("Epoch #{:2}:\ttrain loss: {:5.3}\tval loss: {:5.3}".format(epoch, train_loss, val_loss))
    print(f'lr = {optimizer.param_groups[0]["lr"]}')
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        with open(f"{NAME}_best.pth", "wb") as fp:
            torch.save(model.state_dict(), fp)



training...:   0%|          | 0/371 [00:00<?, ?it/s][A[A

Once again ready for training...


KeyboardInterrupt: ignored

In [0]:
best_val_loss

1.7404832643212624

In [19]:
    # 3. predict
    test_dataset = ThousandLandmarksDataset(os.path.join(DATA, 'test'), val_transforms, split="test")
    test_dataloader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True,
                                      shuffle=False, drop_last=False)

    with open(f"{NAME}_best.pth", "rb") as fp:
        best_state_dict = torch.load(fp, map_location="cpu")
        model.load_state_dict(best_state_dict)

    test_predictions = predict(model, test_dataloader, device)
    with open(f"{NAME}_test_predictions.pkl", "wb") as fp:
        pickle.dump({"image_names": test_dataset.image_names,
                     "landmarks": test_predictions}, fp)

    create_submission(DATA, test_predictions, f"{NAME}_submit.csv")

Cook test data from csv...
Chunk... 0... 1... Finish
Convert to tensor... Implementing transforms...
Finish


FileNotFoundError: ignored