# Pesudocode

In [None]:
%load_ext lab_black
%load_ext autoreload
%autoreload 2

In [None]:
import sys

if not ".." in sys.path:
    sys.path.append("..")

In [None]:
import torch

from lit_modules.litdatamodules.lit_lamem import LitLaMemDataModule
from datasets.LaMem.LaMemDataset import LaMem

In [None]:
from time import time

start = time()
root = "/home/soroush1/projects/def-kohitij/soroush1/pretrain-imagenet/data/LaMem/lamem_images/lamem"

data = LitLaMemDataModule(
    root=root, num_workers=1, batch_size=128, dev_mode=False, desired_image_size=224
)
data.setup("train")

train_dl = data.train_dataloader()
x, y = next(iter(train_dl))

print(f"{abs(start - time())}")
print(f"{x.size() = }")

In [None]:
import gc

del data, train_dl, x, y

gc.collect()

In [None]:
from torchvision.transforms import ToPILImage
import matplotlib.pyplot as plt
import numpy as np

# Select 20 random images and their scores
indices = torch.randperm(len(x))[:20]
selected_images = x[indices]
selected_scores = y[indices]

# Convert images to PIL images for plotting
to_pil = ToPILImage()

# Plotting the images with memorability scores as titles
fig, axes = plt.subplots(4, 5, figsize=(20, 16))  # Adjust the size as needed
axes = axes.ravel()

for i, (img, score) in enumerate(zip(selected_images, selected_scores)):
    img = to_pil(img)  # Convert to PIL for display
    axes[i].imshow(np.asarray(img))
    axes[i].set_title(f"Memorability Score = {score:.2f}")
    axes[i].axis("off")  # Hide axes

plt.tight_layout()
plt.show()

In [None]:
from lit_modules.litmodels.lit_vgg import VGGRegression

from lit_modules.litmodels.lit_resnet import LitResNet50

import torch.nn.functional as F

In [None]:
# Example usage
model = LitResNet50(learning_rate=1e-4)

# Example input tensor (batch size, channels, height, width)
input_tensor = torch.randn(8, 3, 224, 224)

# Get the regression output
output = model(input_tensor)
print(output)

In [None]:
from tqdm.notebook import tqdm

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

root = "/home/soroush1/projects/def-kohitij/soroush1/pretrain-imagenet/data/LaMem/lamem_images/lamem/"

data = LitLaMemDataModule(root=root, num_workers=1, batch_size=32, dev_mode=True)
data.setup("train")

train_dl = data.train_dataloader()
x, y = next(iter(train_dl))


model = VGGRegression(output_dim=1)
model.to(device)


for x, y in tqdm(train_dl):
    x = x.to(device)
    y = y.to(device)
    
    output = model(x)
    
    loss = F.mse_loss(output.squeeze(), y)
    
    print(f"{loss = }")

del x, y, data, train_dl
del model

import gc

# Clear GPU cache
if torch.cuda.is_available():
    torch.cuda.empty_cache()

# Collect garbage
gc.collect()

# Lightning

In [4]:
%load_ext lab_black
%load_ext autoreload
%autoreload 2

In [2]:
import sys

if not ".." in sys.path:
    sys.path.append("..")

In [3]:
import torch

from lit_modules.litdatamodules.lit_lamem import LitLaMemDataModule
from lit_modules.litmodels.lit_resnet import LitResNet50

from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import DeviceStatsMonitor, StochasticWeightAveraging

from lightning.pytorch.tuner import Tuner
from lightning.pytorch.loggers import TensorBoardLogger

from lightning import Trainer

from tqdm.notebook import tqdm

### Experiments 

- Version0
- Version1
- Version2
    - Experiment:
        - First Run on LaMem Dataset
        - Finding the Batch size
        - Finding the Learning Rate
    - Result:
        - Batch size = 96
        - LR = 1e-4

- Version7
    - Experiment:
        - First Run on LaMem Dataset

In [None]:
data_path = "../data/LaMem/lamem_images/lamem/"
datamodule = LitLaMemDataModule(
    root=data_path, batch_size=128, num_workers=47, dev_mode=False
)
tb_logger = TensorBoardLogger("./resnet_lalem")

# 1.9054607179632464e-05
model = LitResNet50(learning_rate=1.9054607179632464e-05)

trainer = Trainer(
    max_epochs=500,
    callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=10)],
    fast_dev_run=False,
    devices="auto",
    accelerator="gpu",
    num_nodes=1,
    strategy="auto",
    # overfit_batches=0.1,
    gradient_clip_val=0.5,
    logger=tb_logger,
    profiler="simple",
    # check_val_every_n_epoch = 1,
    # log_every_n_steps=1
)

trainer.fit(model=model, datamodule=datamodule)

Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
/lustre06/project/6067616/soroush1/pretrain-imagenet/.venv/lib/python3.11/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /lustre06/project/6067616/soroush1/pretrain-imagenet ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name      | Type               | Params | In sizes          | Out sizes
-------------------------------------------------------------------

Epoch 0: 100%|████████████████████████████████████████████████████████████| 352/352 [01:51<00:00,  3.17it/s, v_num=5, training_loss_step=0.0141]
Validation: |                                                                                                             | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|██▋                                                                               | 1/30 [00:00<00:00, 127.08it/s][A
Validation DataLoader 0:   7%|█████▌                                                                             | 2/30 [00:00<00:00, 34.62it/s][A
Validation DataLoader 0:  10%|████████▎                                                                          | 

In [None]:
data_path = "../data/LaMem/lamem_images/lamem/"
datamodule = LitLaMemDataModule(
    root=data_path, batch_size=128, num_workers=47, dev_mode=False
)
tb_logger = TensorBoardLogger("./resnet_lalem")

model = LitResNet50(learning_rate=0.01)

trainer = Trainer(
    max_epochs=200,
    # callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=10)],
    callbacks=[StochasticWeightAveraging(swa_lrs=1e-2)],
    fast_dev_run=False,
    devices="auto",
    accelerator="gpu",
    num_nodes=1,
    strategy="auto",
    overfit_batches=1,
    gradient_clip_val=0.5,
    logger=tb_logger,
    # check_val_every_n_epoch = 1,
    # log_every_n_steps=1
)

# trainer.fit(model=model, datamodule=datamodule)

tuner = Tuner(trainer)
# Auto-scale batch size by growing it exponentially (default)
# tuner.scale_batch_size(model, datamodule=datamodule, mode="power")

# finds learning rate automatically
# sets hparams.lr or hparams.learning_rate to that learning rate
# Run learning rate finder
lr_finder = tuner.lr_find(model, datamodule=datamodule)

# Results can be found in
print(lr_finder.results)

# Pick point based on plot, or get suggestion
new_lr = lr_finder.suggestion()

fig = lr_finder.plot(suggest=True)

# # update hparams of the model
# model.hparams.learning_rate = new_lr

In [None]:
fig = lr_finder.plot(suggest=True)
new_lr = lr_finder.suggestion()

In [8]:
new_lr

1.9054607179632464e-05