**This Kaggle notebook was used to train our custom SimCLR model.**

In [1]:
!pip install lightning lightly

Collecting lightning
  Downloading lightning-2.0.5-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting lightly
  Downloading lightly-1.4.12-py3-none-any.whl (632 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m632.6/632.6 kB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m
Collecting croniter<1.5.0,>=1.3.0 (from lightning)
  Downloading croniter-1.4.1-py2.py3-none-any.whl (19 kB)
Collecting dateutils<2.0 (from lightning)
  Downloading dateutils-0.6.12-py2.py3-none-any.whl (5.7 kB)
Collecting deepdiff<8.0,>=5.7.0 (from lightning)
  Downloading deepdiff-6.3.1-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.7/70.7 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
Collecting inquirer<5.0,>=2.10.0 (from lightning)
  Downloading inquirer-3.1.3-py3-none-any.whl (18 kB)
Collecting lightning-cloud>=0.5.37 (from light

In [2]:
import tarfile

file_path = "/kaggle/input/two4two-small/two4two_small.tar.xz"
output_path = "/kaggle/working/"
#simple function to extract the train data
#tar_file : the path to the .tar file
#path : the path where it will be extracted
def extract(tar_file, path):
    opened_tar = tarfile.open(tar_file)
     
    if tarfile.is_tarfile(tar_file):
        opened_tar.extractall(path)
        print(f"Extracted to {output_path}")
    else:
        print("The tar file you entered is not a tar file")
        
extract(file_path, output_path)

Extracted to /kaggle/working/


In [2]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torchvision
from PIL import Image
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import normalize

from lightly.data import LightlyDataset
from lightly.transforms import SimCLRTransform, utils

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [3]:
output_path = "/kaggle/working/"

train_path = "/kaggle/working/two4two_small/train"
test_path = "/kaggle/working/two4two_small/test"
val_path = "/kaggle/working/two4two_small/validation"

In [27]:
num_workers = 8
batch_size = 256
seed = 1
max_epochs = 500
input_size = 128
num_ftrs = 32

In [5]:
pl.seed_everything(seed)

1

In [6]:
transform = SimCLRTransform(
    input_size=input_size, vf_prob=0.5, rr_prob=0.5, cj_prob=0.0, random_gray_scale=0.0
)

# We create a torchvision transformation for embedding the dataset after
# training
test_transform = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize((input_size, input_size)),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            mean=utils.IMAGENET_NORMALIZE["mean"],
            std=utils.IMAGENET_NORMALIZE["std"],
        ),
    ]
)

In [7]:
import os
import os
import tarfile
from pathlib import Path

from torchvision.io import read_image, ImageReadMode
from PIL import Image


import pytorch_lightning as L
import pandas as pd
import torch
from torch.utils.data import random_split, DataLoader
from torchvision import transforms


class Two4TwoDataset(torch.utils.data.Dataset):
    def __init__(self,
                 data_input_dir,
                 mode='train',
                 transform=None,
                 target_transform=None):

        if transform is None:
            transform = transforms.ToTensor()

        self.root_dir = os.path.join(data_input_dir, mode)
        self.parameters_file = os.path.join(self.root_dir, 'parameters.jsonl')

        self.parameters = self.create_df(mode)
        self.id_col_idx = self.parameters.columns.get_loc("id")
        self.label_col_idx = self.parameters.columns.get_loc("label")

        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.parameters)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                self.parameters.iloc[idx, self.id_col_idx] + '.png')
        image = Image.open(img_name).convert('RGB')
        image = self.transform(image)

        mask_name = os.path.join(self.root_dir,
                                self.parameters.iloc[idx, self.id_col_idx] + '_mask.png')
        mask = transforms.ToTensor()(Image.open(mask_name).convert('RGB'))
        
        label = self.parameters.iloc[idx, self.label_col_idx]

        sample = (image,mask, label)

        return sample

    def create_df(self, mode):

        label_data = pd.read_json(self.parameters_file, lines=True)
        label_data['label'] = label_data['obj_name'].apply(
            lambda x: 0 if x == 'sticky' else 1)

        return label_data


class Two4TwoDataModule(L.LightningDataModule):
    def __init__(self, data_dir: str = "./", working_path: str = None, batch_size: int = 32, transform=None, mask_transform=None):
        super().__init__()
        self.two2two__predict = None
        self.two2two_val = None
        self.two2two_test = None
        self.two2two_train = None
        self.data_dir = data_dir
        self.working_path = working_path
        self.batch_size = batch_size
        self.num_workers = 0
        if transform:
            self.transform = transform
        else:
            self.transform = transforms.Compose([ transforms.Normalize((0.1307,), (0.3081,))])
        if mask_transform:
            self.mask_transform = mask_transform

            

    def prepare_data(self):
        # extract ?
        file = Path(self.data_dir)
        if file.is_dir():
            return
        elif tarfile.is_tarfile(file):
            tar = tarfile.open(self.data_dir, "r")
            if self.working_path is None:
                # remove file ending from data_dir
                self.data_dir =os.path.splitext(self.data_dir)[0]
                tar.extractall(path=self.data_dir)
            else:
                tar.extractall(path=self.working_path)
            tar.close()
        else:
            raise ValueError("Data directory is not a tarfile or directory")

    def setup(self, stage: str):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit":
            self.two2two_train = Two4TwoDataset(self.data_dir, mode='train', transform=self.transform)
            self.two2two_val = Two4TwoDataset(self.data_dir, mode='validation', transform=self.transform)

        # Assign test dataset for use in dataloader(s)
        if stage == "test":
            self.two2two_test = Two4TwoDataset(self.data_dir, mode='test', transform=self.transform )

        if stage == "predict":
            self.two2two__predict = Two4TwoDataset(self.data_dir, mode='test', transform=self.transform)

    def train_dataloader(self):
        return DataLoader(self.two2two_train, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=True, drop_last=True,)

    def val_dataloader(self):
        return DataLoader(self.two2two_val, batch_size=self.batch_size, num_workers=self.num_workers)

    def test_dataloader(self):
        return DataLoader(self.two2two_test, batch_size=self.batch_size, num_workers=self.num_workers)

    def predict_dataloader(self):
        return DataLoader(self.two2two_test, batch_size=self.batch_size, num_workers=self.num_workers)

In [29]:
from lightly.loss import NTXentLoss
from lightly.models.modules.heads import SimCLRProjectionHead


class SimCLRModel(pl.LightningModule):
    def __init__(self):
        super().__init__()

        # create a ResNet backbone and remove the classification head
        resnet = torchvision.models.resnet18()
        self.backbone = nn.Sequential(*list(resnet.children())[:-1])

        hidden_dim = resnet.fc.in_features
        self.projection_head = SimCLRProjectionHead(hidden_dim, hidden_dim, 128)

        self.criterion = NTXentLoss()

    def forward(self, x):
        h = self.backbone(x).flatten(start_dim=1)
        z = self.projection_head(h)
        return z

    def training_step(self, batch, batch_idx):
        (x0, x1), _, _ = batch
        z0 = self.forward(x0)
        z1 = self.forward(x1)
        loss = self.criterion(z0, z1)
        #self.log("train_loss_ssl", loss)
        self.log("train_loss_ssl", loss, prog_bar=True, on_step=True, on_epoch=True)
        return loss

    def configure_optimizers(self):
        optim = torch.optim.SGD(
            self.parameters(), lr=7e-2, momentum=0.9, weight_decay=5e-4
        )
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, max_epochs)
        return [optim], [scheduler]

In [15]:
data_module = Two4TwoDataModule(data_dir="/kaggle/working/two4two_small",
                                batch_size=batch_size, 
                                transform=transform)

**We had to save checkpoints and rerun multiple times with different account because our daily allowance for computing resources on Kaggle ran out, hence all the different checkpoints and commented out lines.**

In [17]:
os.listdir('/kaggle/working/checkpoints')

['epoch=40-run-4.ckpt',
 'epoch=3-run-6-train_loss_ssl=4.299095153808594.ckpt',
 'epoch=0-run-4.ckpt',
 'epoch=16-train_loss=0.00.ckpt',
 'epoch=2-run-7-train_loss_ssl=4.302505016326904.ckpt',
 'epoch=1-train_loss=0.00.ckpt',
 'epoch=0-train_loss=0.00.ckpt',
 'epoch=0-val_loss=0.00.ckpt',
 'epoch=7-train_loss=0.00.ckpt',
 'epoch=43-run-5-train_loss_ssl=4.282283782958984.ckpt',
 'epoch=14-train_loss=0.00.ckpt',
 'epoch=51-run-7-train_loss_ssl=4.299927711486816.ckpt',
 'epoch=8-train_loss=0.00.ckpt']

In [30]:
model = SimCLRModel.load_from_checkpoint(output_path + '/checkpoints/epoch=51-run-7-train_loss_ssl=4.299927711486816.ckpt')
#torch.save(model, output_path + "full_model4.pth")

In [33]:
#model = SimCLRModel()
#model = torch.load(output_path + "full_model.pth") #load the 20 epoch-trained model by jonas
#model = SimCLRModel.load_from_checkpoint(output_path + '/checkpoints/epoch=16-train_loss=0.00.ckpt')
#model = SimCLRModel.load_from_checkpoint(output_path + '/checkpoints/epoch=14-train_loss=0.00.ckpt') 
#model = SimCLRModel.load_from_checkpoint(output_path + '/checkpoints/epoch=8-train_loss=0.00.ckpt') # currently at 20+16+14+8 = 68
#model = SimCLRModel.load_from_checkpoint(output_path + '/checkpoints/epoch=40-run-4.ckpt') # currently at 20+16+14+8+40 = 108
#model = SimCLRModel.load_from_checkpoint(output_path + '/checkpoints/epoch=43-run-5-train_loss_ssl=4.282283782958984.ckpt') #108+43 = 151
#model = SimCLRModel.load_from_checkpoint(output_path + '/checkpoints/epoch=3-run-6-train_loss_ssl=4.299095153808594.ckpt') #151+3 = 154
model = SimCLRModel.load_from_checkpoint(output_path + '/checkpoints/epoch=51-run-7-train_loss_ssl=4.299927711486816.ckpt') #154+51 = 205
checkpoint_callback = pl.callbacks.ModelCheckpoint(dirpath='/kaggle/working/checkpoints/',filename='{epoch}-run-8-{train_loss_ssl}') #increase index at earch run!!!
trainer = pl.Trainer(max_epochs=max_epochs, devices=1, accelerator="gpu", callbacks=[checkpoint_callback])
trainer.fit(model, data_module)

  rank_zero_warn("You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.")
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [None]:
torch.save(model, output_path + "full_model3.pth")