In [1]:
import torch
import numpy as np
from torchvision import transforms as T
from lightly.transforms import SimCLRTransform, DINOTransform, MAETransform, MoCoV2Transform, utils
from datasets import create_dataset, create_bootstrap_dataloader, create_dataset_targets_provided, \
    create_stratified_bootstrap_dataloader
from models import *
from tqdm import tqdm
import pytorch_lightning as pl
import os
import copy
import gc
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader





# Mozliwość reimportowania bez restartowania kernela

In [2]:
%load_ext autoreload
%autoreload 2

# Zapisywanie checkpointów

In [3]:
from pytorch_lightning.callbacks import Callback

class SaveAtEpochsCallback(Callback):
    def __init__(self, save_epochs, dirpath="checkpoints"):
        super().__init__()
        self.save_epochs = set(save_epochs)
        self.dirpath = dirpath
        os.makedirs(self.dirpath, exist_ok=True)

    def on_train_epoch_end(self, trainer, pl_module):
        current_epoch = trainer.current_epoch + 1  # epoka 0-based
        if current_epoch in self.save_epochs:
            filename = f"model_epoch_{current_epoch}.ckpt"
            path = os.path.join(self.dirpath, filename)
            trainer.save_checkpoint(path)
            print(f"Zapisano model po epoce {current_epoch}: {path}")

# Losowość

In [4]:
SEED = 42

def seed_everything(seed: int=42):
    pl.seed_everything(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    ####### Normaly you would also need to seed those generators but `pytorch_lightning` does it in one func
    # random.seed(seed)
    # np.random.seed(seed)
    # torch.manual_seed(seed)
    ######
    torch.cuda.manual_seed(seed) # Don't know if pytorch lightning does this
    torch.cuda.manual_seed_all(seed) # Don't know if pytorch lightning does this
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


seed_everything(SEED)



Seed set to 42


# Transformacje

In [5]:
test_transform = T.v2.Compose(
    [
        T.Resize((224, 224)),
        T.v2.ToImage(),
        T.v2.ToDtype(torch.float32, scale=True),
        T.Normalize(
            mean=utils.IMAGENET_NORMALIZE["mean"],
            std=utils.IMAGENET_NORMALIZE["std"],
        )
    ]
)
scratch_transform = T.v2.Compose(
    [
        T.RandomResizedCrop((224, 224)),
        T.RandomHorizontalFlip(),
        T.v2.ToImage(),
        T.v2.ToDtype(torch.float32, scale=True),
        T.Normalize(
            mean=utils.IMAGENET_NORMALIZE["mean"],
            std=utils.IMAGENET_NORMALIZE["std"],
        )
    ]
)

byol_transform = SimCLRTransform(input_size=(224, 224), vf_prob=0.5, rr_prob=0.5, normalize={
        "mean": utils.IMAGENET_NORMALIZE["mean"],
        "std": utils.IMAGENET_NORMALIZE["std"]
    })
simclr_transform = SimCLRTransform(input_size=(224, 224), vf_prob=0.5, rr_prob=0.5, normalize={
        "mean": utils.IMAGENET_NORMALIZE["mean"],
        "std": utils.IMAGENET_NORMALIZE["std"]
    })
mae_transform = MAETransform(input_size=(224, 224), normalize={
        "mean": utils.IMAGENET_NORMALIZE["mean"],
        "std": utils.IMAGENET_NORMALIZE["std"]
    })



# MAE

## Datasets

### CIFAR10

In [6]:
train_full_cifar10_mae, train_ssl_cifar10_mae, train_cifar10_mae, test_cifar10_mae, targets_cifar10 = create_dataset("CIFAR10", 0.9, mae_transform, scratch_transform, test_transform, "data", False)

Length of entire train dataset:  50000
Length of SSL train dataset:  45000
Length of classification train dataset:  5000
Length of test dataset:  10000


### CIFAR100

In [6]:
train_full_cifar100_mae, train_ssl_cifar100_mae, train_cifar100_mae, test_cifar100_mae, targets_cifar100 = create_dataset("CIFAR100", 0.9, mae_transform, scratch_transform, test_transform, "data", False)

Length of entire train dataset:  50000
Length of SSL train dataset:  45000
Length of classification train dataset:  5000
Length of test dataset:  10000


### IMAGENET

In [9]:
train_full_imagenet_mae, train_ssl_imagenet_mae, train_imagenet_mae, test_imagenet_mae, targets_imagenet = create_dataset("ImageNet1K", 0.9, mae_transform, scratch_transform, test_transform, "data", False)

Length of entire train dataset:  1281167
Length of SSL train dataset:  1153050
Length of classification train dataset:  128117
Length of test dataset:  50000


## Models

In [25]:
import pickle

with open("train_full_imagenet_mae.pkl", "wb") as f:
    pickle.dump(train_full_imagenet_mae, f)
print("raz")
with open("train_ssl_imagenet_mae.pkl", "wb") as f:
    pickle.dump(train_ssl_imagenet_mae, f)
print("dwa")
with open("train_imagenet_mae.pkl", "wb") as f:
    pickle.dump(train_imagenet_mae, f)
print("trzy")
with open("test_imagenet_mae.pkl", "wb") as f:
    pickle.dump(test_imagenet_mae, f)

print("cztery")
np.savetxt("targets_imagenet.csv", targets_imagenet, delimiter=",", fmt="%d")

raz
dwa
trzy
cztery


In [6]:
import pickle

with open("train_full_imagenet_mae.pkl", "rb") as f:
    train_full_imagenet_mae = pickle.load(f)

with open("train_ssl_imagenet_mae.pkl", "rb") as f:
    train_ssl_imagenet_mae = pickle.load(f)

with open("train_imagenet_mae.pkl", "rb") as f:
    train_imagenet_mae = pickle.load(f)

with open("test_imagenet_mae.pkl", "rb") as f:
    test_imagenet_mae = pickle.load(f)

In [13]:
BATCH_SIZE = 128
NUM_EPOCHS = 20
LEARNING_RATE = 0.001
NUM_WORKERS = 8
MASK_RATIO = 0.75
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
#GARBAGE COLLECTOR FAJNA SPRAWA - BEZ NIEGO VRAMu BRAKUJE
if device == "gpu":
    torch.cuda.empty_cache()
    gc.collect()



Using device: cuda


In [14]:
model1 = MAEModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    mask_ratio=MASK_RATIO
)

model2 = MAEModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    mask_ratio=MASK_RATIO
)


LEARNING_RATE = 0.01


model3 = MAEModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    mask_ratio=MASK_RATIO
)

model4 = MAEModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    mask_ratio=MASK_RATIO
)



LEARNING_RATE = 0.001
MASK_RATIO = 0.9

model5 = MAEModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    mask_ratio=MASK_RATIO
)

model6 = MAEModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    mask_ratio=MASK_RATIO
)


LEARNING_RATE = 0.01


model7 = MAEModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    mask_ratio=MASK_RATIO
)

model8 = MAEModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    mask_ratio=MASK_RATIO
)








## Dataloaders

In [15]:
train_dl = DataLoader(train_ssl_imagenet_mae, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, persistent_workers=True, pin_memory=True, drop_last=True)
test_dl = DataLoader(test_imagenet_mae, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, persistent_workers=True)

## Trainers

In [16]:
checkpoint_callback_1 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/mae/model1")

trainer1 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_1],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_2 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/mae/model2")

trainer2 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_2],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_3 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/mae/model3")

trainer3 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_3],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_4 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/mae/model4")

trainer4 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_4],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_5 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/mae/model5")

trainer5 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_5],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_6 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/mae/model6")

trainer6 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_6],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_7 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/mae/model7")

trainer7 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_7],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_8 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/mae/model8")

trainer8 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_8],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using 16bit Automatic Mixed Precision (AMP)
Us

In [17]:
for i in range(8):
    model_str = f"model{i+1}"
    trainer_str = f"trainer{i+1}"
    model_cur = globals()[model_str]
    trainer_cur = globals()[trainer_str]
    trainer_cur.fit(model_cur, train_dl, test_dl)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | encoder | MAEEncoder | 11.2 M | train
1 | decoder | MAEDecoder | 787 K  | train
-----------------------------------------------
12.0 M    Trainable params
0         Non-trainable params
12.0 M    Total params
47.858    Total estimated model params size (MB)
75        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/mae/model1/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/mae/model1/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/mae/model1/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | encoder | MAEEncoder | 11.2 M | train
1 | decoder | MAEDecoder | 787 K  | train
-----------------------------------------------
12.0 M    Trainable params
0         Non-trainable params
12.0 M    Total params
47.858    Total estimated model params size (MB)
75        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/mae/model2/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/mae/model2/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/mae/model2/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | encoder | MAEEncoder | 11.2 M | train
1 | decoder | MAEDecoder | 787 K  | train
-----------------------------------------------
12.0 M    Trainable params
0         Non-trainable params
12.0 M    Total params
47.858    Total estimated model params size (MB)
75        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/mae/model3/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/mae/model3/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/mae/model3/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | encoder | MAEEncoder | 11.2 M | train
1 | decoder | MAEDecoder | 787 K  | train
-----------------------------------------------
12.0 M    Trainable params
0         Non-trainable params
12.0 M    Total params
47.858    Total estimated model params size (MB)
75        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/mae/model4/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/mae/model4/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/mae/model4/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | encoder | MAEEncoder | 11.2 M | train
1 | decoder | MAEDecoder | 787 K  | train
-----------------------------------------------
12.0 M    Trainable params
0         Non-trainable params
12.0 M    Total params
47.858    Total estimated model params size (MB)
75        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/mae/model5/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/mae/model5/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/mae/model5/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | encoder | MAEEncoder | 11.2 M | train
1 | decoder | MAEDecoder | 787 K  | train
-----------------------------------------------
12.0 M    Trainable params
0         Non-trainable params
12.0 M    Total params
47.858    Total estimated model params size (MB)
75        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/mae/model6/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/mae/model6/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/mae/model6/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | encoder | MAEEncoder | 11.2 M | train
1 | decoder | MAEDecoder | 787 K  | train
-----------------------------------------------
12.0 M    Trainable params
0         Non-trainable params
12.0 M    Total params
47.858    Total estimated model params size (MB)
75        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/mae/model7/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/mae/model7/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/mae/model7/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | encoder | MAEEncoder | 11.2 M | train
1 | decoder | MAEDecoder | 787 K  | train
-----------------------------------------------
12.0 M    Trainable params
0         Non-trainable params
12.0 M    Total params
47.858    Total estimated model params size (MB)
75        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/mae/model8/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/mae/model8/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/mae/model8/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.


# SimCLR

## Datasety

In [6]:
targets_imagenet = np.loadtxt("targets_imagenet.csv", delimiter=",", dtype=int)
train_full_imagenet_simclr, train_ssl_imagenet_simclr, train_imagenet_simclr, test_imagenet_simclr= create_dataset_targets_provided("ImageNet1K", 0.9, simclr_transform, scratch_transform, test_transform, "data", targets_imagenet, SEED, False)

Length of entire train dataset:  1281167
Length of SSL train dataset:  1153050
Length of classification train dataset:  128117
Length of test dataset:  50000


## Modele

In [7]:
BATCH_SIZE = 128
NUM_EPOCHS = 20
LEARNING_RATE = 0.001
NUM_WORKERS = 8
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
#GARBAGE COLLECTOR FAJNA SPRAWA - BEZ NIEGO VRAMu BRAKUJE
if device == "gpu":
    torch.cuda.empty_cache()
    gc.collect()

Using device: cuda


In [8]:
model1 = SimCLRModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224
)

model2 = SimCLRModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224
)


LEARNING_RATE = 0.01


model3 = SimCLRModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224
)

model4 = SimCLRModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224
)



## Dataloadery

In [9]:
train_dl = DataLoader(train_ssl_imagenet_simclr, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, persistent_workers=True, pin_memory=True, drop_last=True)
test_dl = DataLoader(test_imagenet_simclr, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, persistent_workers=True)

## Trainery

In [10]:
checkpoint_callback_1 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/simclr/model1")

trainer1 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_1],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_2 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/simclr/model2")

trainer2 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_2],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_3 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/simclr/model3")

trainer3 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_3],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_4 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/simclr/model4")

trainer4 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_4],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/hussein/pytoniec/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint

In [11]:
for i in range(4):
    model_str = f"model{i+1}"
    trainer_str = f"trainer{i+1}"
    model_cur = globals()[model_str]
    trainer_cur = globals()[trainer_str]
    trainer_cur.fit(model_cur, train_dl, test_dl)

You are using a CUDA device ('NVIDIA GeForce RTX 4070 SUPER') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type                 | Params | Mode 
-----------------------------------------------------------------
0 | backbone        | Sequential           | 11.2 M | train
1 | projection_head | SimCLRProjectionHead | 328 K  | train
2 | criterion       | NTXentLoss           | 0      | train
-----------------------------------------------------------------
11.5 M    Trainable params
0         Non-trainable params
11.5 M    Total params
46.022    Total estimated model params size (MB)
77        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/simclr/model1/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/simclr/model1/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/simclr/model1/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type                 | Params | Mode 
-----------------------------------------------------------------
0 | backbone        | Sequential           | 11.2 M | train
1 | projection_head | SimCLRProjectionHead | 328 K  | train
2 | criterion       | NTXentLoss           | 0      | train
-----------------------------------------------------------------
11.5 M    Trainable params
0         Non-trainable params
11.5 M    Total params
46.022    Total estimated model params size (MB)
77        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/simclr/model2/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/simclr/model2/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/simclr/model2/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type                 | Params | Mode 
-----------------------------------------------------------------
0 | backbone        | Sequential           | 11.2 M | train
1 | projection_head | SimCLRProjectionHead | 328 K  | train
2 | criterion       | NTXentLoss           | 0      | train
-----------------------------------------------------------------
11.5 M    Trainable params
0         Non-trainable params
11.5 M    Total params
46.022    Total estimated model params size (MB)
77        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/simclr/model3/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/simclr/model3/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/simclr/model3/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type                 | Params | Mode 
-----------------------------------------------------------------
0 | backbone        | Sequential           | 11.2 M | train
1 | projection_head | SimCLRProjectionHead | 328 K  | train
2 | criterion       | NTXentLoss           | 0      | train
-----------------------------------------------------------------
11.5 M    Trainable params
0         Non-trainable params
11.5 M    Total params
46.022    Total estimated model params size (MB)
77        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/simclr/model4/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/simclr/model4/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/simclr/model4/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.


In [30]:
trainer4.fit(model4, train_dl, test_dl)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type                 | Params | Mode 
-----------------------------------------------------------------
0 | backbone        | Sequential           | 11.2 M | train
1 | projection_head | SimCLRProjectionHead | 328 K  | train
2 | criterion       | NTXentLoss           | 0      | train
-----------------------------------------------------------------
11.5 M    Trainable params
0         Non-trainable params
11.5 M    Total params
46.022    Total estimated model params size (MB)
77        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

# BYOL

## Datasety

In [12]:
targets_imagenet = np.loadtxt("targets_imagenet.csv", delimiter=",", dtype=int)
train_full_imagenet_byol, train_ssl_imagenet_byol, train_imagenet_byol, test_imagenet_byol = create_dataset_targets_provided("ImageNet1K", 0.9, byol_transform, scratch_transform, test_transform, "data", targets_imagenet, SEED, False)

Length of entire train dataset:  1281167
Length of SSL train dataset:  1153050
Length of classification train dataset:  128117
Length of test dataset:  50000


## Modele

In [13]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
BATCH_SIZE = 128
NUM_EPOCHS = 20
LEARNING_RATE = 0.001
NUM_WORKERS = 8
TAU = 0.98
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
#GARBAGE COLLECTOR FAJNA SPRAWA - BEZ NIEGO VRAMu BRAKUJE
if device == "cuda":
    torch.cuda.empty_cache()
    gc.collect()

Using device: cuda


In [14]:
model1 = BYOLModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    tau=TAU

)

model2 = BYOLModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    tau=TAU
)


LEARNING_RATE = 0.01


model3 = BYOLModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    tau=TAU
)

model4 = BYOLModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    tau=TAU
)



LEARNING_RATE = 0.001
TAU = 0.996

model5 = BYOLModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    tau=TAU
)

model6 = BYOLModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    tau=TAU
)


LEARNING_RATE = 0.01


model7 = BYOLModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    tau=TAU
)

model8 = BYOLModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    tau=TAU
)



## Dataloadery

In [15]:
train_dl = create_stratified_bootstrap_dataloader(train_ssl_imagenet_byol, batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    random_state=SEED,
    labels=targets_imagenet[:int(0.9*len(targets_imagenet))],
    pin_memory=True,
    drop_last=True)
test_dl = DataLoader(test_imagenet_byol, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, persistent_workers=True)

## Trainery

In [16]:
checkpoint_callback_1 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/byol/model1")

trainer1 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_1],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_2 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/byol/model2")

trainer2 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_2],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_3 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/byol/model3")

trainer3 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_3],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_4 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/byol/model4")

trainer4 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_4],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_5 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/byol/model5")

trainer5 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_5],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_6 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/byol/model6")

trainer6 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_6],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_7 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/byol/model7")

trainer7 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_7],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_8 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/byol/model8")

trainer8 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_8],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using 16bit Automatic Mixed Precision (AMP)
Us

In [17]:
for i in range(8):
    model_str = f"model{i+1}"
    trainer_str = f"trainer{i+1}"
    model_cur = globals()[model_str]
    trainer_cur = globals()[trainer_str]
    trainer_cur.fit(model_cur, train_dl, test_dl)
    if device == "cuda":
        torch.cuda.empty_cache()
        gc.collect()
        del globals()[model_str], model_cur
        del globals()[trainer_str], trainer_cur

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type                 | Params | Mode 
-------------------------------------------------------------------
0 | online_backbone   | Sequential           | 11.2 M | train
1 | online_projection | SimCLRProjectionHead | 328 K  | train
2 | online_predictor  | Sequential           | 16.7 K | train
3 | target_backbone   | Sequential           | 11.2 M | train
4 | target_projection | SimCLRProjectionHead | 328 K  | train
-------------------------------------------------------------------
11.5 M    Trainable params
11.5 M    Non-trainable params
23.0 M    Total params
92.111    Total estimated model params size (MB)
153       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/byol/model1/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/byol/model1/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/byol/model1/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type                 | Params | Mode 
-------------------------------------------------------------------
0 | online_backbone   | Sequential           | 11.2 M | train
1 | online_projection | SimCLRProjectionHead | 328 K  | train
2 | online_predictor  | Sequential           | 16.7 K | train
3 | target_backbone   | Sequential           | 11.2 M | train
4 | target_projection | SimCLRProjectionHead | 328 K  | train
-------------------------------------------------------------------
11.5 M    Trainable params
11.5 M    Non-trainable params
23.0 M    Total params
92.111    Total estimated model params size (MB)
153       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/byol/model2/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/byol/model2/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/byol/model2/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type                 | Params | Mode 
-------------------------------------------------------------------
0 | online_backbone   | Sequential           | 11.2 M | train
1 | online_projection | SimCLRProjectionHead | 328 K  | train
2 | online_predictor  | Sequential           | 16.7 K | train
3 | target_backbone   | Sequential           | 11.2 M | train
4 | target_projection | SimCLRProjectionHead | 328 K  | train
-------------------------------------------------------------------
11.5 M    Trainable params
11.5 M    Non-trainable params
23.0 M    Total params
92.111    Total estimated model params size (MB)
153       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/byol/model3/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/byol/model3/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/byol/model3/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type                 | Params | Mode 
-------------------------------------------------------------------
0 | online_backbone   | Sequential           | 11.2 M | train
1 | online_projection | SimCLRProjectionHead | 328 K  | train
2 | online_predictor  | Sequential           | 16.7 K | train
3 | target_backbone   | Sequential           | 11.2 M | train
4 | target_projection | SimCLRProjectionHead | 328 K  | train
-------------------------------------------------------------------
11.5 M    Trainable params
11.5 M    Non-trainable params
23.0 M    Total params
92.111    Total estimated model params size (MB)
153       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/byol/model4/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/byol/model4/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/byol/model4/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type                 | Params | Mode 
-------------------------------------------------------------------
0 | online_backbone   | Sequential           | 11.2 M | train
1 | online_projection | SimCLRProjectionHead | 328 K  | train
2 | online_predictor  | Sequential           | 16.7 K | train
3 | target_backbone   | Sequential           | 11.2 M | train
4 | target_projection | SimCLRProjectionHead | 328 K  | train
-------------------------------------------------------------------
11.5 M    Trainable params
11.5 M    Non-trainable params
23.0 M    Total params
92.111    Total estimated model params size (MB)
153       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/byol/model5/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/byol/model5/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/byol/model5/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type                 | Params | Mode 
-------------------------------------------------------------------
0 | online_backbone   | Sequential           | 11.2 M | train
1 | online_projection | SimCLRProjectionHead | 328 K  | train
2 | online_predictor  | Sequential           | 16.7 K | train
3 | target_backbone   | Sequential           | 11.2 M | train
4 | target_projection | SimCLRProjectionHead | 328 K  | train
-------------------------------------------------------------------
11.5 M    Trainable params
11.5 M    Non-trainable params
23.0 M    Total params
92.111    Total estimated model params size (MB)
153       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/byol/model6/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/byol/model6/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/byol/model6/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type                 | Params | Mode 
-------------------------------------------------------------------
0 | online_backbone   | Sequential           | 11.2 M | train
1 | online_projection | SimCLRProjectionHead | 328 K  | train
2 | online_predictor  | Sequential           | 16.7 K | train
3 | target_backbone   | Sequential           | 11.2 M | train
4 | target_projection | SimCLRProjectionHead | 328 K  | train
-------------------------------------------------------------------
11.5 M    Trainable params
11.5 M    Non-trainable params
23.0 M    Total params
92.111    Total estimated model params size (MB)
153       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/byol/model7/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/byol/model7/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/byol/model7/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type                 | Params | Mode 
-------------------------------------------------------------------
0 | online_backbone   | Sequential           | 11.2 M | train
1 | online_projection | SimCLRProjectionHead | 328 K  | train
2 | online_predictor  | Sequential           | 16.7 K | train
3 | target_backbone   | Sequential           | 11.2 M | train
4 | target_projection | SimCLRProjectionHead | 328 K  | train
-------------------------------------------------------------------
11.5 M    Trainable params
11.5 M    Non-trainable params
23.0 M    Total params
92.111    Total estimated model params size (MB)
153       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/byol/model8/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/byol/model8/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/byol/model8/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.


# Scratch model(s)

## Datasety

In [13]:
train_imagenet_scratch = train_imagenet_mae
test_imagenet_scratch = test_imagenet_mae

## Modele

In [11]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
BATCH_SIZE = 128
NUM_EPOCHS = 20
LEARNING_RATE = 0.001
NUM_WORKERS = 8
TAU = 0.98
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
#GARBAGE COLLECTOR FAJNA SPRAWA - BEZ NIEGO VRAMu BRAKUJE
if device == "cuda":
    torch.cuda.empty_cache()
    gc.collect()

Using device: cuda


In [12]:
model1 = ClassifierModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    num_classes=1000
)

model2 = ClassifierModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    num_classes=1000
)


LEARNING_RATE = 0.01


model3 = ClassifierModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    num_classes=1000
)

model4 = ClassifierModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    num_classes=1000
)



## Dataloadery

In [17]:
train_dl = torch.utils.data.DataLoader(
    train_imagenet_scratch,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True,
    drop_last=True
)
test_dl = torch.utils.data.DataLoader(
    test_imagenet_scratch,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True,
    persistent_workers=True
)

##

In [15]:
checkpoint_callback_1 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/scratch/model1")

trainer1 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_1],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_2 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/scratch/model2")

trainer2 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_2],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_3 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/scratch/model3")

trainer3 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_3],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_4 = SaveAtEpochsCallback(save_epochs=[2, 3, 5], dirpath="checkpoints/scratch/model4")

trainer4 = pl.Trainer(
    max_epochs=5,
    callbacks=[checkpoint_callback_4],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/hussein/pytoniec/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint

In [18]:
for i in range(4):
    model_str = f"model{i+1}"
    trainer_str = f"trainer{i+1}"
    model_cur = globals()[model_str]
    trainer_cur = globals()[trainer_str]
    trainer_cur.fit(model_cur, train_dl, test_dl)
    if device == "cuda":
        torch.cuda.empty_cache()
        gc.collect()
        del globals()[model_str], model_cur
        del globals()[trainer_str], trainer_cur

You are using a CUDA device ('NVIDIA GeForce RTX 4070 SUPER') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | model        | ResNet             | 11.7 M | train
1 | criterion    | CrossEntropyLoss   | 0      | train
2 | train_metric | MulticlassAccuracy | 0      | train
3 | val_metric   | MulticlassAccuracy | 0      | train
4 | test_metric  | MulticlassAccuracy | 0      | train
------------------------------------------------------------
11.7 M    Trainable params
0         Non-trainable params
11.7 M    Total params
46.758    Total estimated model params size (MB)
72   

Sanity Checking: |          | 0/? [00:00<?, ?it/s]



Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/scratch/model1/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/scratch/model1/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


Zapisano model po epoce 5: checkpoints/scratch/model1/model_epoch_5.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | model        | ResNet             | 11.7 M | train
1 | criterion    | CrossEntropyLoss   | 0      | train
2 | train_metric | MulticlassAccuracy | 0      | train
3 | val_metric   | MulticlassAccuracy | 0      | train
4 | test_metric  | MulticlassAccuracy | 0      | train
------------------------------------------------------------
11.7 M    Trainable params
0         Non-trainable params
11.7 M    Total params
46.758    Total estimated model params size (MB)
72        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/scratch/model2/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/scratch/model2/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/scratch/model2/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | model        | ResNet             | 11.7 M | train
1 | criterion    | CrossEntropyLoss   | 0      | train
2 | train_metric | MulticlassAccuracy | 0      | train
3 | val_metric   | MulticlassAccuracy | 0      | train
4 | test_metric  | MulticlassAccuracy | 0      | train
------------------------------------------------------------
11.7 M    Trainable params
0         Non-trainable params
11.7 M    Total params
46.758    Total estimated model params size (MB)
72        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/scratch/model3/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/scratch/model3/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


Zapisano model po epoce 5: checkpoints/scratch/model3/model_epoch_5.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | model        | ResNet             | 11.7 M | train
1 | criterion    | CrossEntropyLoss   | 0      | train
2 | train_metric | MulticlassAccuracy | 0      | train
3 | val_metric   | MulticlassAccuracy | 0      | train
4 | test_metric  | MulticlassAccuracy | 0      | train
------------------------------------------------------------
11.7 M    Trainable params
0         Non-trainable params
11.7 M    Total params
46.758    Total estimated model params size (MB)
72        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 2: checkpoints/scratch/model4/model_epoch_2.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 3: checkpoints/scratch/model4/model_epoch_3.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints/scratch/model4/model_epoch_5.ckpt


`Trainer.fit` stopped: `max_epochs=5` reached.


# CIFAR10 SCRATCH

## Datasety

In [19]:
train_full_cifar10_scratch, train_ssl_cifar10_scratch, train_cifar10_scratch, test_cifar10_scratch, targets_cifar10 = create_dataset("CIFAR10", 0.9, scratch_transform, scratch_transform, test_transform, "data", False)

Length of entire train dataset:  50000
Length of SSL train dataset:  45000
Length of classification train dataset:  5000
Length of test dataset:  10000


## Dataloadery

In [21]:
train_dl = torch.utils.data.DataLoader(
    train_cifar10_scratch,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True,
    drop_last=True
)
test_dl = torch.utils.data.DataLoader(
    test_cifar10_scratch,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True,
    persistent_workers=True
)

## Modele

In [22]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
BATCH_SIZE = 128
NUM_EPOCHS = 20
LEARNING_RATE = 0.001
NUM_WORKERS = 8
TAU = 0.98
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
#GARBAGE COLLECTOR FAJNA SPRAWA - BEZ NIEGO VRAMu BRAKUJE
if device == "cuda":
    torch.cuda.empty_cache()
    gc.collect()

Using device: cuda


In [29]:
model1 = ClassifierModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    num_classes=10
)

model2 = ClassifierModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    num_classes=10
)


LEARNING_RATE = 0.01


model3 = ClassifierModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    num_classes=10
)

model4 = ClassifierModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    num_classes=10
)

## Trainery

In [30]:
checkpoint_callback_1 = SaveAtEpochsCallback(save_epochs=[5, 10, 15], dirpath="checkpoints10/scratch/model1")

trainer1 = pl.Trainer(
    max_epochs=15,
    callbacks=[checkpoint_callback_1],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_2 = SaveAtEpochsCallback(save_epochs=[5, 10, 15], dirpath="checkpoints10/scratch/model2")

trainer2 = pl.Trainer(
    max_epochs=15,
    callbacks=[checkpoint_callback_2],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_3 = SaveAtEpochsCallback(save_epochs=[5, 10, 15], dirpath="checkpoints10/scratch/model3")

trainer3 = pl.Trainer(
    max_epochs=15,
    callbacks=[checkpoint_callback_3],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_4 = SaveAtEpochsCallback(save_epochs=[5, 10, 15], dirpath="checkpoints10/scratch/model4")

trainer4 = pl.Trainer(
    max_epochs=15,
    callbacks=[checkpoint_callback_4],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using 16bit Automatic Mixed Precision (AMP)
Us

In [31]:
for i in range(4):
    model_str = f"model{i+1}"
    trainer_str = f"trainer{i+1}"
    model_cur = globals()[model_str]
    trainer_cur = globals()[trainer_str]
    trainer_cur.fit(model_cur, train_dl, test_dl)
    if device == "cuda":
        torch.cuda.empty_cache()
        gc.collect()
        del globals()[model_str], model_cur
        del globals()[trainer_str], trainer_cur

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | model        | ResNet             | 11.2 M | train
1 | criterion    | CrossEntropyLoss   | 0      | train
2 | train_metric | MulticlassAccuracy | 0      | train
3 | val_metric   | MulticlassAccuracy | 0      | train
4 | test_metric  | MulticlassAccuracy | 0      | train
------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)
72        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints10/scratch/model1/model_epoch_5.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 10: checkpoints10/scratch/model1/model_epoch_10.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 15: checkpoints10/scratch/model1/model_epoch_15.ckpt


`Trainer.fit` stopped: `max_epochs=15` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | model        | ResNet             | 11.2 M | train
1 | criterion    | CrossEntropyLoss   | 0      | train
2 | train_metric | MulticlassAccuracy | 0      | train
3 | val_metric   | MulticlassAccuracy | 0      | train
4 | test_metric  | MulticlassAccuracy | 0      | train
------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)
72        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints10/scratch/model2/model_epoch_5.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 10: checkpoints10/scratch/model2/model_epoch_10.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 15: checkpoints10/scratch/model2/model_epoch_15.ckpt


`Trainer.fit` stopped: `max_epochs=15` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | model        | ResNet             | 11.2 M | train
1 | criterion    | CrossEntropyLoss   | 0      | train
2 | train_metric | MulticlassAccuracy | 0      | train
3 | val_metric   | MulticlassAccuracy | 0      | train
4 | test_metric  | MulticlassAccuracy | 0      | train
------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)
72        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints10/scratch/model3/model_epoch_5.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 10: checkpoints10/scratch/model3/model_epoch_10.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 15: checkpoints10/scratch/model3/model_epoch_15.ckpt


`Trainer.fit` stopped: `max_epochs=15` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | model        | ResNet             | 11.2 M | train
1 | criterion    | CrossEntropyLoss   | 0      | train
2 | train_metric | MulticlassAccuracy | 0      | train
3 | val_metric   | MulticlassAccuracy | 0      | train
4 | test_metric  | MulticlassAccuracy | 0      | train
------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.727    Total estimated model params size (MB)
72        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints10/scratch/model4/model_epoch_5.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 10: checkpoints10/scratch/model4/model_epoch_10.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 15: checkpoints10/scratch/model4/model_epoch_15.ckpt


`Trainer.fit` stopped: `max_epochs=15` reached.


# CIFAR100

## Datasety

In [32]:
train_full_cifar100_scratch, train_ssl_cifar100_scratch, train_cifar100_scratch, test_cifar100_scratch, targets_cifar100 = create_dataset("CIFAR100", 0.9, scratch_transform, scratch_transform, test_transform, "data", False)

Length of entire train dataset:  50000
Length of SSL train dataset:  45000
Length of classification train dataset:  5000
Length of test dataset:  10000


## Dataloadery

In [33]:
train_dl = torch.utils.data.DataLoader(
    train_cifar100_scratch,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True,
    drop_last=True
)
test_dl = torch.utils.data.DataLoader(
    test_cifar100_scratch,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True,
    persistent_workers=True
)

## Modele

In [34]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
BATCH_SIZE = 128
NUM_EPOCHS = 20
LEARNING_RATE = 0.001
NUM_WORKERS = 8
TAU = 0.98
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
#GARBAGE COLLECTOR FAJNA SPRAWA - BEZ NIEGO VRAMu BRAKUJE
if device == "cuda":
    torch.cuda.empty_cache()
    gc.collect()

Using device: cuda


In [35]:
model1 = ClassifierModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    num_classes=100
)

model2 = ClassifierModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    num_classes=100
)


LEARNING_RATE = 0.01


model3 = ClassifierModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    num_classes=100
)

model4 = ClassifierModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    num_classes=100
)



## Trainery

In [36]:
checkpoint_callback_1 = SaveAtEpochsCallback(save_epochs=[5, 10, 15], dirpath="checkpoints100/scratch/model1")

trainer1 = pl.Trainer(
    max_epochs=15,
    callbacks=[checkpoint_callback_1],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_2 = SaveAtEpochsCallback(save_epochs=[5, 10, 15], dirpath="checkpoints100/scratch/model2")

trainer2 = pl.Trainer(
    max_epochs=15,
    callbacks=[checkpoint_callback_2],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_3 = SaveAtEpochsCallback(save_epochs=[5, 10, 15], dirpath="checkpoints100/scratch/model3")

trainer3 = pl.Trainer(
    max_epochs=15,
    callbacks=[checkpoint_callback_3],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

checkpoint_callback_4 = SaveAtEpochsCallback(save_epochs=[5, 10, 15], dirpath="checkpoints100/scratch/model4")

trainer4 = pl.Trainer(
    max_epochs=15,
    callbacks=[checkpoint_callback_4],
    accelerator='auto',
    devices=1,
    precision="16-mixed",
    log_every_n_steps=10
)

Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Using 16bit Automatic Mixed Precision (AMP)
Us

In [37]:
for i in range(4):
    model_str = f"model{i+1}"
    trainer_str = f"trainer{i+1}"
    model_cur = globals()[model_str]
    trainer_cur = globals()[trainer_str]
    trainer_cur.fit(model_cur, train_dl, test_dl)
    if device == "cuda":
        torch.cuda.empty_cache()
        gc.collect()
        del globals()[model_str], model_cur
        del globals()[trainer_str], trainer_cur

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | model        | ResNet             | 11.2 M | train
1 | criterion    | CrossEntropyLoss   | 0      | train
2 | train_metric | MulticlassAccuracy | 0      | train
3 | val_metric   | MulticlassAccuracy | 0      | train
4 | test_metric  | MulticlassAccuracy | 0      | train
------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.911    Total estimated model params size (MB)
72        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]



Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints100/scratch/model1/model_epoch_5.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 10: checkpoints100/scratch/model1/model_epoch_10.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 15: checkpoints100/scratch/model1/model_epoch_15.ckpt


`Trainer.fit` stopped: `max_epochs=15` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | model        | ResNet             | 11.2 M | train
1 | criterion    | CrossEntropyLoss   | 0      | train
2 | train_metric | MulticlassAccuracy | 0      | train
3 | val_metric   | MulticlassAccuracy | 0      | train
4 | test_metric  | MulticlassAccuracy | 0      | train
------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.911    Total estimated model params size (MB)
72        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints100/scratch/model2/model_epoch_5.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 10: checkpoints100/scratch/model2/model_epoch_10.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 15: checkpoints100/scratch/model2/model_epoch_15.ckpt


`Trainer.fit` stopped: `max_epochs=15` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | model        | ResNet             | 11.2 M | train
1 | criterion    | CrossEntropyLoss   | 0      | train
2 | train_metric | MulticlassAccuracy | 0      | train
3 | val_metric   | MulticlassAccuracy | 0      | train
4 | test_metric  | MulticlassAccuracy | 0      | train
------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.911    Total estimated model params size (MB)
72        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints100/scratch/model3/model_epoch_5.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 10: checkpoints100/scratch/model3/model_epoch_10.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=15` reached.


Zapisano model po epoce 15: checkpoints100/scratch/model3/model_epoch_15.ckpt


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type               | Params | Mode 
------------------------------------------------------------
0 | model        | ResNet             | 11.2 M | train
1 | criterion    | CrossEntropyLoss   | 0      | train
2 | train_metric | MulticlassAccuracy | 0      | train
3 | val_metric   | MulticlassAccuracy | 0      | train
4 | test_metric  | MulticlassAccuracy | 0      | train
------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.911    Total estimated model params size (MB)
72        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: checkpoints100/scratch/model4/model_epoch_5.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 10: checkpoints100/scratch/model4/model_epoch_10.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 15: checkpoints100/scratch/model4/model_epoch_15.ckpt


`Trainer.fit` stopped: `max_epochs=15` reached.


# Dla agata

In [7]:
BATCH_SIZE = 128
NUM_EPOCHS = 15
LEARNING_RATE = 0.001
NUM_WORKERS = 4
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
#GARBAGE COLLECTOR FAJNA SPRAWA - BEZ NIEGO VRAMu BRAKUJE
if device == "gpu":
    torch.cuda.empty_cache()
    gc.collect()

Using device: cuda


In [8]:
train_loader = DataLoader(train_ssl_cifar100_mae, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, persistent_workers=True)
val_loader = DataLoader(test_cifar100_mae, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, persistent_workers=True)

In [9]:


model2 = MAEModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="random_resnet18",
    input_dim=3 * 224 * 224,
    mask_ratio=0.75
)

LEARNING_RATE = 0.01

model3 = MAEModel(
    lr=LEARNING_RATE,
    weight_decay=1e-4,
    max_epochs=50,
    backbone_type="pretrained_resnet18",
    input_dim=3 * 224 * 224,
    mask_ratio=0.75
)



In [10]:


checkpoint_callback_2 = SaveAtEpochsCallback(save_epochs=[5, 10, 15], dirpath="dlagata/lr_0.001_mae_cifar100_random")

trainer2 = pl.Trainer(
    max_epochs=15,
    callbacks=[checkpoint_callback_2],
    accelerator='auto',
    devices=1,
    log_every_n_steps=10
)



checkpoint_callback_3 = SaveAtEpochsCallback(save_epochs=[5, 10, 15], dirpath="dlagata/lr_0.01_mae_cifar100")

trainer3 = pl.Trainer(
    max_epochs=15,
    callbacks=[checkpoint_callback_3],
    accelerator='auto',
    devices=1,
    log_every_n_steps=10
)



Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/hussein/pytoniec/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used

In [11]:
trainer3.fit(model3, train_loader, val_loader)

You are using a CUDA device ('NVIDIA GeForce RTX 4070 SUPER') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type       | Params | Mode 
-----------------------------------------------
0 | encoder | MAEEncoder | 11.2 M | train
1 | decoder | MAEDecoder | 787 K  | train
-----------------------------------------------
12.0 M    Trainable params
0         Non-trainable params
12.0 M    Total params
47.858    Total estimated model params size (MB)
75        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 5: dlagata/lr_0.01_mae_cifar100/model_epoch_5.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 10: dlagata/lr_0.01_mae_cifar100/model_epoch_10.ckpt


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Zapisano model po epoce 15: dlagata/lr_0.01_mae_cifar100/model_epoch_15.ckpt


`Trainer.fit` stopped: `max_epochs=15` reached.
