In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# !ls

In [None]:
# %%bash
# set -m
# git clone https://github.com/sachdevkartik/DeepLense.git
# cd DeepLense && git checkout kartik_contribution
# cd ..
# mv DeepLense/Transformers_Classification_DeepLense_Kartik_Sachdev/* .
# rm -rf DeepLense

In [None]:
# %%bash
# pip3 install --upgrade lightly einops vit-pytorch>=0.27.0 seaborn>=0.11.0 ipython thop gdown split-folders  protobuf==3.20.* e2cnn==0.1.9


# scikit-learn 
# opencv-python>=4.1.1 Pillow>=8.2.0 PyYAML>=5.3.1

In [None]:
# ray[tune] \
# ray[air] \


In [None]:
# !pip uninstall -y numpy

In [None]:
# !pip install numpy

In [None]:
from __future__ import print_function
import os
import time
import copy
import json
import yaml

import torch.nn as nn
import torch.optim as optim
from torchinfo import summary
import torchvision
from typing import *

In [None]:
from utils.util import *
from config.data_config import DATASET
from utils.dataset import DefaultDatasetSetupSSL
from self_supervised.losses.contrastive_loss import (
    ContrastiveLossEuclidean,
    ContrastiveLossEmbedding,
    SimCLR_Loss,
    NegativeCosineSimilarity,
)
from self_supervised.losses.sym_neg_cos_sim_loss import SymNegCosineSimilarityLoss

from models.modules.head import BYOLProjectionHead, BYOLPredictionHead
from utils.scheduler import cosine_schedule
from torch.utils.data import DataLoader, random_split
from einops.layers.torch import Rearrange
from config.cvt_config import CvT_CONFIG
from models.transformer_zoo import TransformerModels
import math
from transformers import get_cosine_schedule_with_warmup
from utils.inference import InferenceSSL
from utils.trainer.finetune import finetune

In [None]:
class ArgParser(object):
    def __init__(self) -> None:
        self.dataset_name = "Model_II"
        self.save = "data"
        self.batch_size = 64
        self.epochs_pretrain = 15
        self.epochs_finetune = 20
        self.train_config = "CvT"
        self.cuda = True
        self.num_workers = 20
        self.ci=False

In [None]:
args = ArgParser()
dataset_name = args.dataset_name
dataset_dir = args.save
batch_size = args.batch_size
epochs_pretrain = args.epochs_pretrain
epochs_finetune = args.epochs_finetune
train_config_name = args.train_config
use_cuda = args.cuda
num_workers = args.num_workers
ci = args.ci

In [None]:
train_config = CvT_CONFIG
learning_rate = train_config["optimizer_config"]["lr"]
num_channels = train_config["channels"]
network_type = train_config["network_type"]
image_size = train_config["image_size"]
optimizer_config = train_config["optimizer_config"]
log_dir_base = "logger"
extracted_dir = "/kaggle/input/gsocml4scimodel2/Model_II"
classes = DATASET[f"{dataset_name}"]["classes"]
num_classes = len(classes)

make_directories([dataset_dir])
seed_everything(seed=42)
device = get_device(use_cuda=use_cuda, cuda_idx=0)

In [None]:
# logging
current_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
log_dir = f"{log_dir_base}/{current_time}"
init_logging_handler(log_dir_base, current_time)

# paths
model_path_pretrained = os.path.join(
    f"{log_dir}/checkpoint",
    f"{network_type}_pretrained_{dataset_name}_{current_time}.pt",
)

finetuned_model_path = os.path.join(
    f"{log_dir}/checkpoint",
    f"{network_type}_finetune_{dataset_name}_{current_time}.pt",
)

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2


class DINOTransform:
    """Implements the global and local view augmentations for DINO [0].
    """

    def __init__(self):
        pass
    
    def get_transforms(self, 
                       global_crop_size: int = 224,
                       global_crop_scale: Tuple[float, float] = (0.4, 1.0),
                       local_crop_size: int = 96,
                       local_crop_scale: Tuple[float, float] = (0.05, 0.4),):

        # first global crop
        global_transform_0 = A.Compose(
        [
            A.HorizontalFlip(p=0.25),
            A.VerticalFlip(p=0.25),
            A.Resize(global_crop_size, global_crop_size, p=1.0),
            A.RandomResizedCrop(height=global_crop_size, width=global_crop_size),
            A.Rotate(p=0.5), 
            ToTensorV2(),
        ])

        # second global crop
        global_transform_1 = A.Compose(
        [
            A.Resize(global_crop_size, global_crop_size, p=1.0),
            A.RandomResizedCrop(height=global_crop_size, width=global_crop_size),
            A.Rotate(p=0.5), 
            ToTensorV2(),
        ])
        
        # transformation for the local small crops
        local_transform = A.Compose(
        [
            A.HorizontalFlip(p=0.25),
            A.VerticalFlip(p=0.25),
            A.Resize(local_crop_size, local_crop_size, p=1.0),
            A.RandomResizedCrop(height=local_crop_size, width=local_crop_size),
            A.Rotate(p=0.5), 
            ToTensorV2(),
        ])
        return [global_transform_0, global_transform_1, local_transform]
    

In [None]:
from utils.dataset import visualize_samples_ssl, DeepLenseDatasetSSL
# trainset
dino_transform = DINOTransform()
train_transforms = dino_transform.get_transforms()
train_dataset = DeepLenseDatasetSSL(destination_dir = dataset_dir, transforms = train_transforms, mode="train", dataset_name=dataset_name, download=True, channels=1)
logging.debug(f"train data: {len(train_dataset)}")
visualize_samples_ssl(train_dataset, labels_map=classes, num_rows_inner=1, num_cols_inner=3)

In [None]:
# split in train and valid set
split_ratio = 0.25  # 0.25
valid_len = int(split_ratio * len(train_dataset))
train_len = len(train_dataset) - valid_len

train_dataset, val_set = random_split(train_dataset, [train_len, valid_len])

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
)

val_loader = DataLoader(
    dataset=val_set, batch_size=batch_size, shuffle=True, num_workers=num_workers
)

# Load test dataset
# testset = default_dataset_setup.get_dataset(mode="val")
# test_loader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=True)

# size check
sample = next(iter(train_loader))
logging.debug("num of classes: ", num_classes)
logging.debug(sample[0].shape)

In [None]:
from lightly.loss import DINOLoss
from lightly.models.modules import DINOProjectionHead
from lightly.models.utils import deactivate_requires_grad, update_momentum
from lightly.transforms.dino_transform import DINOTransform
from lightly.utils.scheduler import cosine_schedule

In [None]:
class DINO(torch.nn.Module):
    def __init__(self, backbone, input_dim):
        super().__init__()

        self.backbone = backbone
        self.backbone[0] = nn.Conv2d(
            1, 64, kernel_size=7, stride=2, padding=3, bias=False
        )

        self.student_backbone = self.backbone
        self.student_head = DINOProjectionHead(
            input_dim, 512, 64, 2048, freeze_last_layer=1
        )
        self.teacher_backbone = copy.deepcopy(self.backbone)
        self.teacher_head = DINOProjectionHead(input_dim, 512, 64, 2048)
        deactivate_requires_grad(self.teacher_backbone)
        deactivate_requires_grad(self.teacher_head)

    def forward(self, x):
        y = self.student_backbone(x).flatten(start_dim=1)
        z = self.student_head(y)
        return z

    def forward_teacher(self, x):
        y = self.teacher_backbone(x).flatten(start_dim=1)
        z = self.teacher_head(y)
        return z

In [None]:
class DINOTransformer(torch.nn.Module):
    def __init__(self, backbone, input_dim):
        super().__init__()

        self.backbone = backbone
        self.student_backbone = self.backbone
        self.student_head = DINOProjectionHead(
            input_dim, 512, 64, 512, freeze_last_layer=1
        )
        self.teacher_backbone = copy.deepcopy(self.backbone)
        self.teacher_head = DINOProjectionHead(input_dim, 512, 64, 512)
        deactivate_requires_grad(self.teacher_backbone)
        deactivate_requires_grad(self.teacher_head)

    def forward(self, x):
        y = self.student_backbone(x).flatten(start_dim=1)
        z = self.student_head(y)
        return z

    def forward_teacher(self, x):
        y = self.teacher_backbone(x).flatten(start_dim=1)
        z = self.teacher_head(y)
        return z

In [None]:
# # Create ResNet pretrain model
resnet = torchvision.models.resnet34()
backbone = nn.Sequential(*list(resnet.children())[:-1])

num_ftrs_dict = {
    "resnet18": 512,
    "resnet34": 512,
    "resnet50": 2048,

}

model =  DINO(backbone, input_dim=num_ftrs_dict["resnet34"])
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

summary(model, input_size=(2, 1, 224, 224), device="cuda")

In [None]:
network_config = {
    "s1_emb_dim": 64,  # stage 1 - dimension
    "s1_emb_kernel": 7,  # stage 1 - conv kernel size
    "s1_emb_stride": 4,  # stage 1 - conv stride
    "s1_proj_kernel": 3,  # stage 1 - attention ds-conv kernel size
    "s1_kv_proj_stride": 2,  # stage 1 - attention key / value projection stride
    "s1_heads": 2,  # stage 1 - heads
    "s1_depth": 2,  # stage 1 - depth
    "s1_mlp_mult": 3,  # stage 1 - feedforward expansion factor
    "s2_emb_dim": 128,  # stage 2 - (same as above)
    "s2_emb_kernel": 3,
    "s2_emb_stride": 2,
    "s2_proj_kernel": 3,
    "s2_kv_proj_stride": 2,
    "s2_heads": 2,
    "s2_depth": 2,
    "s2_mlp_mult": 3,
    "mlp_last": 256,
    "dropout": 0.1,
}

In [None]:
# in_features = 128
# # Transformer model
# backbone = TransformerModels(
#     transformer_type=train_config["network_type"],
#     num_channels=train_config["channels"],
#     num_classes=in_features,
#     img_size=image_size,
#     **network_config, # **train_config["network_config"]
# )


# model = DINOTransformer(backbone, input_dim=in_features)
# summary(model, input_size=(2, 1, 224, 224), device="cuda")

In [None]:
epochs_pretrain = 15

In [None]:
# optimizer
lr = optimizer_config["lr"]
weight_decay = optimizer_config["weight_decay"]
warmup_epochs = optimizer_config["warmup_epoch"]


optimizer_pretrain = optim.AdamW(model.parameters(), lr=lr, weight_decay = weight_decay)
warmup_epochs = warmup_epochs
num_train_steps = math.ceil(len(train_loader))
num_warmup_steps= num_train_steps * warmup_epochs
num_training_steps=int(num_train_steps * epochs_pretrain)

#learning rate scheduler
cosine_scheduler = get_cosine_schedule_with_warmup(optimizer_pretrain,num_warmup_steps = num_warmup_steps,num_training_steps =num_training_steps) 

In [None]:
criterion_pretrain = DINOLoss(
    output_dim=2048,
    warmup_teacher_temp_epochs=5,
)
# move loss to correct device because it also contains parameters
criterion_pretrain = criterion_pretrain.to(device)

In [None]:
import logging

In [None]:
print("Starting Training")
for epoch in range(epochs_pretrain):
    total_loss = 0
    best_loss = float("inf")

    momentum_val = cosine_schedule(epoch, epochs_pretrain, 0.996, 1)
    for batch_idx, batch in enumerate(train_loader):
        views = batch[:3]
        
        update_momentum(model.student_backbone, model.teacher_backbone, m=momentum_val)
        update_momentum(model.student_head, model.teacher_head, m=momentum_val)
        views = [view.to(device) for view in views]
        global_views = views[:2]
        teacher_out = [model.forward_teacher(view) for view in global_views]
        student_out = [model.forward(view) for view in views]
        loss = criterion_pretrain(teacher_out, student_out, epoch=epoch)
        total_loss += loss.detach()
        loss.backward()

        # We only cancel gradients of student head.
        model.student_head.cancel_last_layer_gradients(current_epoch=epoch)
        optimizer_pretrain.step()
        cosine_scheduler.step()
        optimizer_pretrain.zero_grad()        
        if ci:
            break    
            
        if batch_idx % 100 == 0:
            logging.debug(
                f"Epoch [{epoch}/{epochs_pretrain}], Batch [{batch_idx}/{len(train_loader)}], Loss: {loss.item()}"
            )

    if ci:
        break    
    
    if total_loss < best_loss:
        best_loss = total_loss

        torch.save(model.state_dict(), model_path_pretrained)

    avg_loss = total_loss / len(train_loader)
    
    logging.debug(f"epoch: {epoch:>02}, loss: {avg_loss:.5f}")

In [None]:
# class FinetuneClassifier(nn.Module):
#     def __init__(self, backbone: nn.Module, head: nn.Module):
#         super().__init__()

#         # Combine the pretrained model and the projection head
#         self.model = nn.Sequential(*list(backbone.children())[:-1], head)

#     def forward(self, x):
#         x = self.model(x)
#         return x


In [None]:
# class FinetuneClassifier(nn.Module):
#     def __init__(self, backbone, head):
#         super(FinetuneClassifier, self).__init__()
#         self.backbone = backbone
#         self.pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
#         self.rearrange = Rearrange("... () () -> ...")
#         self.head = head

#     def forward(self, x):
#         z = self.backbone(x)
#         z = self.pool(z)
#         z = self.rearrange(z)
#         z = self.head(z)
#         return z

In [None]:
class FinetuneClassifier(nn.Module):
    def __init__(self, backbone, head):
        super(FinetuneClassifier, self).__init__()
        self.backbone = backbone
        self.head = head

    def forward(self, x):
        z = self.backbone[0](x).flatten(start_dim=1)
        z = self.backbone[1](z)
        z = self.head(z)
        return z

In [None]:
# del model
# import gc
# gc.collect()
# torch.cuda.empty_cache()
# print(torch.cuda.memory_allocated())

In [None]:
backbone = model.student_backbone # nn.Sequential(*list(model.backbone.children())[:-1])
backbone = nn.Sequential(model.student_backbone, model.student_head)

In [None]:
classification_head = nn.Sequential(
    nn.Linear(2048, 512), # num_ftrs_dict["resnet34"]
    nn.ReLU(),
    nn.BatchNorm1d(512),
    nn.Linear(512, num_classes),)
model = FinetuneClassifier(backbone, classification_head)
model.to(device)
summary(model, input_size=(2, 1, 224, 224), device="cuda")

In [None]:
import math
learning_rate = 0.001 # 3e-4
weight_decay =  0.01

finetune_optimizer = optim.AdamW(
    model.parameters(),
    lr=learning_rate,
    weight_decay=weight_decay,
)


#optimizer
optimizer_finetune = optim.AdamW(model.parameters(), lr=learning_rate) #  weight_decay = 1e-4
warmup_epochs = 3
num_train_steps = math.ceil(len(train_loader))
num_warmup_steps= num_train_steps * warmup_epochs
num_training_steps=int(num_train_steps * epochs_finetune)

#learning rate scheduler
cosine_scheduler = get_cosine_schedule_with_warmup(optimizer_finetune,num_warmup_steps = num_warmup_steps,num_training_steps =num_training_steps) 

In [None]:
finetune_criterion = nn.CrossEntropyLoss()

In [None]:
# Finetune
finetune(
    epochs_finetune,
    model,
    device,
    train_loader,
    finetune_criterion,
    optimizer_finetune,
    finetuned_model_path,
    valid_loader=val_loader,
    scheduler=cosine_scheduler,
    ci=ci,
)

In [None]:
test_dataset_dir = dataset_dir
test_dataset = DeepLenseDatasetSSL(destination_dir = test_dataset_dir, transforms = train_transforms, mode="test", dataset_name=dataset_name, download=True, channels=1)
test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
)

In [None]:
channels = 1
infer_obj = InferenceSSL(
    model,
    test_loader,
    device,
    num_classes,
    test_dataset,
    dataset_name,
    labels_map=classes, # classes
    image_size=image_size,
    channels=channels,
    destination_dir="data",
    log_dir=log_dir,  # log_dir
)

infer_obj.infer_plot_roc()
infer_obj.generate_plot_confusion_matrix()