# overview
- https://www.guruguru.science/competitions/17/discussions/a39d588e-aff2-4728-8323-b07f15563552/

In [1]:
# default package
import logging
import sys
import os 
import pathlib
import math

In [2]:
# third party package
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torchvision 
import numpy as np
import lightly

In [3]:
# my package
sys.path.append(os.path.join(pathlib.Path().resolve(),"../"))

In [4]:
# reload settings
%load_ext autoreload
%autoreload 2

In [5]:
# logger
logger=logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

In [6]:
# graph settings
sns.set()

In [7]:
def cd_project_root_()->None:
    current=pathlib.Path().cwd()
    if current.stem=="notebooks":
        os.chdir(current.parent)
    logger.info(f"current directory: {pathlib.Path().cwd()}")

cd_project_root_()

INFO:__main__:current directory: /workspaces/load_to_goal/GitHub/kaggle-past-comp/atma-11


## lightly

In [8]:
# config
num_workers = 2
batch_size = 32
seed = 1
epochs = 10
input_size = 224

# dimension of the embeddings
num_ftrs = 512
# dimension of the output of the prediction and projection heads
out_dim = proj_hidden_dim = 512
# the prediction head uses a bottleneck architecture
pred_hidden_dim = 128
# use 2 layers in the projection head
num_mlp_layers = 2

input_dir="data/raw/photos"

In [9]:
# define the augmentations for self-supervised learning
collate_fn = lightly.data.ImageCollateFunction(
    input_size=input_size,
    # require invariance to flips and rotations
    hf_prob=0.5,
    vf_prob=0.5,
    rr_prob=0.5,
    # satellite images are all taken from the same height
    # so we use only slight random cropping
    min_scale=0.5,
    # use a weak color jitter for invariance w.r.t small color changes
    cj_prob=0.2,
    cj_bright=0.1,
    cj_contrast=0.1,
    cj_hue=0.1,
    cj_sat=0.1,
)

# create a lightly dataset for training, since the augmentations are handled
# by the collate function, there is no need to apply additional ones here
dataset_train_simsiam = lightly.data.LightlyDataset(
    input_dir=input_dir
)

# create a dataloader for training
dataloader_train_simsiam = torch.utils.data.DataLoader(
    dataset_train_simsiam,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn,
    drop_last=True,
    num_workers=num_workers
)

# create a torchvision transformation for embedding the dataset after training
# here, we resize the images to match the input size during training and apply
# a normalization of the color channel based on statistics from imagenet
test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.Resize((input_size, input_size)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        mean=lightly.data.collate.imagenet_normalize['mean'],
        std=lightly.data.collate.imagenet_normalize['std'],
    )
])



# create a lightly dataset for embedding
dataset_test = lightly.data.LightlyDataset(
    input_dir=input_dir,
    transform=test_transforms
)



# create a dataloader for embedding
dataloader_test = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=batch_size,
    shuffle=False,
    drop_last=False,
    num_workers=num_workers
)

In [10]:
resnet = torchvision.models.resnet18(pretrained=False)
backbone = nn.Sequential(*list(resnet.children())[:-1])

# create the SimSiam model using the backbone from above
model = lightly.models.SimSiam(
    backbone,
    num_ftrs=num_ftrs,
    proj_hidden_dim=pred_hidden_dim,
    pred_hidden_dim=pred_hidden_dim,
    out_dim=out_dim,
    #num_mlp_layers=num_mlp_layers
)



In [11]:
criterion = lightly.loss.SymNegCosineSimilarityLoss()

# scale the learning rate
lr = 0.05 * batch_size / 256
# use SGD with momentum and weight decay
optimizer = torch.optim.SGD(
    model.parameters(),
    lr=lr,
    momentum=0.9,
    weight_decay=5e-4
)

In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

avg_loss = 0.
avg_output_std = 0.
for e in range(epochs):

    for (x0, x1), _, _ in dataloader_train_simsiam:

        # move images to the gpu
        x0 = x0.to(device)
        x1 = x1.to(device)

        # run the model on both transforms of the images
        # the output of the simsiam model is a y containing the predictions
        # and projections for each input x
        y0, y1 = model(x0, x1)

        # backpropagation
        loss = criterion(y0, y1)
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        # calculate the per-dimension standard deviation of the outputs
        # we can use this later to check whether the embeddings are collapsing
        output, _ = y0
        output = output.detach()
        output = torch.nn.functional.normalize(output, dim=1)

        output_std = torch.std(output, 0)
        output_std = output_std.mean()

        # use moving averages to track the loss and standard deviation
        w = 0.9
        avg_loss = w * avg_loss + (1 - w) * loss.item()
        avg_output_std = w * avg_output_std + (1 - w) * output_std.item()

    # the level of collapse is large if the standard deviation of the l2
    # normalized output is much smaller than 1 / sqrt(dim)
    collapse_level = max(0., 1 - math.sqrt(out_dim) * avg_output_std)
    # print intermediate results
    print(f'[Epoch {e:3d}] '
        f'Loss = {avg_loss:.2f} | '
        f'Collapse Level: {collapse_level:.2f} / 1.00')

[Epoch   0] Loss = -0.84 | Collapse Level: 0.09 / 1.00
[Epoch   1] Loss = -0.89 | Collapse Level: 0.10 / 1.00
[Epoch   2] Loss = -0.91 | Collapse Level: 0.11 / 1.00
[Epoch   3] Loss = -0.91 | Collapse Level: 0.09 / 1.00
[Epoch   4] Loss = -0.90 | Collapse Level: 0.10 / 1.00
[Epoch   5] Loss = -0.91 | Collapse Level: 0.09 / 1.00
[Epoch   6] Loss = -0.91 | Collapse Level: 0.10 / 1.00
[Epoch   7] Loss = -0.91 | Collapse Level: 0.08 / 1.00
[Epoch   8] Loss = -0.91 | Collapse Level: 0.07 / 1.00
[Epoch   9] Loss = -0.91 | Collapse Level: 0.07 / 1.00


In [14]:
torch.save(model.backbone.state_dict(),"data/processed/ssl/211113_simsiam.pth")

## check

In [23]:
output.shape

torch.Size([32, 512])

In [36]:
torch.std(output, 0).shape

torch.Size([512])

In [26]:
output.max(dim=1)

torch.return_types.max(
values=tensor([0.1136, 0.1041, 0.0994, 0.1229, 0.1057, 0.1083, 0.1237, 0.1041, 0.1255,
        0.1390, 0.1141, 0.1241, 0.1267, 0.1095, 0.1613, 0.1691, 0.1140, 0.1211,
        0.1041, 0.1018, 0.1017, 0.1752, 0.1172, 0.1056, 0.0994, 0.0969, 0.1471,
        0.1115, 0.1263, 0.1370, 0.1080, 0.0592], device='cuda:0'),
indices=tensor([433, 480, 314, 452,  35, 375, 364, 247, 364, 108, 386, 364, 368, 386,
        108, 224, 364, 224, 314, 483,  35, 393, 108,  35, 112, 323, 157, 314,
        157, 157, 364, 252], device='cuda:0'))

In [17]:
output_std

tensor(0.0399, device='cuda:0')

In [19]:
loss.item()

-0.8644692897796631

In [29]:
tensor=torch.randn(2,3)

In [34]:
torch.nn.functional.normalize(tensor, dim=1)

tensor([[ 0.9647,  0.1292, -0.2295],
        [-0.8550,  0.4210,  0.3028]])

In [32]:
tensor

tensor([[ 1.5962,  0.2138, -0.3797],
        [-1.7328,  0.8532,  0.6137]])

In [39]:
y0[0].shape

torch.Size([32, 512])

In [40]:
y0[1].shape

torch.Size([32, 512])

In [41]:
model

SimSiam(
  (backbone): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True