In [1]:
import sys
import glob
import torch
import torch.nn.functional as F
import collections
import numpy as np
import pandas as ps
from tqdm import tqdm
from torch.utils.data import DataLoader
from sklearn import metrics

import albumentations as alb
import albumentations.pytorch


sys.path.append("..")
device = torch.device("cuda:1")

In [2]:
from src.models import MulticlassEfficientNet
from src.datasets import ImagesDataset

normalizations = alb.Compose([alb.Normalize(), alb.pytorch.ToTensorV2()])
# normalizations = alb.Compose([alb.ToFloat(max_value=255), alb.pytorch.ToTensorV2()])
# normalizations = alb.Compose([alb.Normalize(max_pixel_value=1), alb.pytorch.ToTensorV2()])  # IT IS WRONG

ttas = [
    # original
    alb.Compose([
        alb.Resize(512, 512),
        normalizations,
    ]),
    # horizontal flipped
    alb.Compose([
        alb.Resize(512, 512),
        alb.HorizontalFlip(p=1),
        normalizations,
    ]),
    # vertical flipped
    alb.Compose([
        alb.Resize(512, 512),
        alb.VerticalFlip(p=1),
        normalizations,
    ]),
    # horizontal & vertical flipped
    alb.Compose([
        alb.Resize(512, 512),
        alb.HorizontalFlip(p=1),
        alb.VerticalFlip(p=1),
        normalizations,
    ]),
]

len(ttas)

  from pandas import Panel

Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working


numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject



4

In [3]:
def average_checkpoints(inputs: list):
    """Loads checkpoints from inputs and returns a model with averaged weights.
    
    Args:
        inputs (List[str]): An iterable of string paths of checkpoints to load from.
    
    Returns:
        A dict of string keys mapping to various values. The 'model' key
        from the returned dict should correspond to an OrderedDict mapping
        string parameter names to torch Tensors.
    """
    params_dict = collections.OrderedDict()
    params_keys = None
    new_state = None
    num_models = len(inputs)

    for f in inputs:
        state = torch.load(
            f,
            map_location=(
                lambda s, _: torch.serialization.default_restore_location(s, 'cpu')
            ),
        )
        # Copies over the settings from the first checkpoint
        if new_state is None:
            new_state = state

        model_params = state['model_state_dict']

        model_params_keys = list(model_params.keys())
        if params_keys is None:
            params_keys = model_params_keys
        elif params_keys != model_params_keys:
            raise KeyError(
                'For checkpoint {}, expected list of params: {}, '
                'but found: {}'.format(f, params_keys, model_params_keys)
            )

        for k in params_keys:
            p = model_params[k]
            if isinstance(p, torch.HalfTensor):
                p = p.float()
            if k not in params_dict:
                params_dict[k] = p.clone()
                # NOTE: clone() is needed in case of p is a shared parameter
            else:
                params_dict[k] += p

    averaged_params = collections.OrderedDict()
    for k, v in params_dict.items():
        averaged_params[k] = v
        averaged_params[k].div_(num_models)
    new_state['model_state_dict'] = averaged_params
    return new_state

In [4]:
swa_state = average_checkpoints([
#     "../logs/efficientnet-b3-c4-continue4/checkpoints/stage1.4.pth",
#     "../logs/efficientnet-b3-c4-continue4/checkpoints/stage1.2.pth",
#     "../logs/efficientnet-b3-c4-continue4/checkpoints/stage1.3.pth",
#     "../logs/efficientnet-b3-c4-continue4/checkpoints/best.pth",
#     "../logs/efficientnet-b3-c4-continue5/checkpoints/best.pth",
    
#     "../logs/efficientnet-b3-c4-grouped4/checkpoints/stage1.10.pth",
#     "../logs/efficientnet-b3-c4-grouped4/checkpoints/stage1.9.pth",
#     "../logs/efficientnet-b3-c4-grouped4/checkpoints/stage1.6.pth",

    "../logs/efficientnet-b3-c4-grouped5/checkpoints/stage1.6.pth",
    "../logs/efficientnet-b3-c4-grouped5/checkpoints/stage1.5.pth",
    "../logs/efficientnet-b3-c4-grouped5/checkpoints/stage1.4.pth",
])

In [5]:
model = MulticlassEfficientNet("efficientnet-b3", 4)
model.load_state_dict(swa_state["model_state_dict"])
model = model.to(device)
model = model.eval()

Loaded pretrained weights for efficientnet-b3


In [6]:
test_images = glob.glob("../data/resized_data/Test/*.jpg")

len(test_images), test_images[:2]

(5000,
 ['../data/resized_data/Test/1787.jpg', '../data/resized_data/Test/0312.jpg'])

In [7]:
tta_preds = []
for transform_idx, transform in enumerate(ttas, 1):

    dataset = ImagesDataset(test_images, transforms=transform)
    loader = DataLoader(dataset, batch_size=64, num_workers=16)

    preds = []
    with torch.no_grad():
        for batch in tqdm(loader, desc=f"TTA index - {transform_idx}"):
            batch = batch.to(device)
            out = 1 - F.softmax(model(batch), dim=1).data.cpu().numpy()[:,0]
#             out = torch.sigmoid().detach().cpu().numpy().flatten()
            preds.append(out)

    preds = np.concatenate(preds)
    tta_preds.append(preds)

TTA index - 1: 100%|██████████| 79/79 [01:10<00:00,  1.12it/s]
TTA index - 2: 100%|██████████| 79/79 [01:10<00:00,  1.12it/s]
TTA index - 3: 100%|██████████| 79/79 [01:10<00:00,  1.12it/s]
TTA index - 4: 100%|██████████| 79/79 [01:10<00:00,  1.12it/s]


In [8]:
avg_pred = np.mean(np.stack(tta_preds), 0)
avg_pred.shape, avg_pred

((5000,),
 array([0.52554893, 0.9713402 , 0.5211836 , ..., 0.6499492 , 0.97256297,
        0.61747885], dtype=float32))

In [9]:
submission = ps.DataFrame.from_dict({
    "Id": [file.rsplit("/", 1)[1] for file in test_images],
    "Label": avg_pred
}).sort_values(by="Id").reset_index(drop=True)

print(submission.shape)
submission.head()

(5000, 2)


Unnamed: 0,Id,Label
0,0001.jpg,0.05234
1,0002.jpg,0.625522
2,0003.jpg,0.516279
3,0004.jpg,0.638676
4,0005.jpg,0.965906


In [10]:
submission.to_csv("../submissions/c4_en_b3_hv+flip_swa_group2.csv", index=False)