# SimCLR implementation #

Implementation following: https://theaisummer.com/simclr/

In [1]:
# !pip install ssl_remote_sensing@git+https://github.com/AlexanderLontke/ssl-remote-sensing.git@feature/pipeline

In [4]:
# Log in to your W&B account
import wandb

wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33malontke[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [5]:
import os

import torch
from torchvision.datasets import EuroSAT
from torch.utils.data import DataLoader
from torch.multiprocessing import cpu_count
from pytorch_lightning.callbacks import GradientAccumulationScheduler, ModelCheckpoint
from pytorch_lightning import Trainer

from ssl_remote_sensing.pretext_tasks.simclr.utils import reproducibility
from ssl_remote_sensing.pretext_tasks.simclr.training import SimCLRTraining
from ssl_remote_sensing.pretext_tasks.simclr.augmentation import Augment
from ssl_remote_sensing.pretext_tasks.simclr.config import get_simclr_config
from ssl_remote_sensing.data.get_eurosat import get_eurosat_normalizer, get_eurosat_dataloader

In [6]:
means = [87.81586935763889, 96.97416420717593, 103.98142336697049]
stds = [51.67849701591506, 34.908630837585186, 29.465280593587384]

In [7]:
# Machine setup
available_gpus = torch.cuda.device_count()
save_model_path = os.path.join(os.getcwd(), "saved_models/")
print("available_gpus:", available_gpus)

# Model Setup
train_config = get_simclr_config()

# Run setup
filename = "SimCLR_ResNet18_adam"
save_name = filename + ".ckpt"
resume_from_checkpoint = False
wandb.init(project="ssl-remote-sensing-simclr", config=train_config.__dict__)


reproducibility(train_config)

model = SimCLRTraining(
    config=train_config,
    feat_dim=512,
)

# Setup data loading and augments
eurosat_normalizer = get_eurosat_normalizer()
transform = Augment(train_config.img_size, normalizer=eurosat_normalizer)
data_loader = get_eurosat_dataloader(
    root="./",
    transform=transform,
    batchsize=train_config.batch_size,
    numworkers=os.cpu_count(),
    split=False,
)


# Needed to get simulate a large batch size
accumulator = GradientAccumulationScheduler(scheduling={0: 1})

checkpoint_callback = ModelCheckpoint(
    filename=filename,
    dirpath=save_model_path,
    every_n_epochs=2,
    save_last=True,
    save_top_k=2,
    monitor="InfoNCE loss_epoch",
    mode="min",
)

if resume_from_checkpoint:
    trainer = Trainer(
        callbacks=[accumulator, checkpoint_callback],
        gpus=available_gpus,
        max_epochs=train_config.epochs,
        resume_from_checkpoint=train_config.checkpoint_path,
    )
else:
    trainer = Trainer(
        callbacks=[accumulator, checkpoint_callback],
        gpus=available_gpus,
        max_epochs=train_config.epochs,
    )

trainer.fit(model, data_loader)
trainer.save_checkpoint(save_name)
wandb.save(checkpoint_callback.best_model_path)
print(f"Best model is stored under {checkpoint_callback.best_model_path}")

available_gpus: 0


  rank_zero_deprecation(
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /Users/alexanderlontke/Documents/Uni/St. Gallen/HS_22_23/deep_learning/ssl-remote-sensing/notebooks/lightning_logs

  | Name  | Type          | Params
----------------------------------------
0 | model | AddProjection | 11.5 M
1 | loss  | InfoNceLoss   | 0     
----------------------------------------
11.5 M    Trainable params
0         Non-trainable params
11.5 M    Total params
46.024    Total estimated model params size (MB)


Optimizer Adam, Learning Rate 0.001, Effective batch size 64


Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


Best model is stored under 
