In [None]:
!pip install --upgrade ipykernel



In [None]:
!pip install -q pytorch-lightning wandb

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

#Unzip and load file data file onto server, then delete zip file for optimizing performance
zip_path = "drive/MyDrive/nature_12K.zip"
!cp "{zip_path}" .
!unzip -qn nature_12K.zip
!rm nature_12K.zip

In [None]:
import os
import torch
import wandb
import torch.nn as nn
import torchvision
import pytorch_lightning as pl
import torch.nn.functional as F

from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader, random_split

from torchvision.models import resnet50, inception_v3, googlenet, vgg16, efficientnet_v2_s
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import EarlyStopping

In [None]:

BASE_MODELS = {
    "RN50": resnet50,
    "IV3": inception_v3,
    "GOOGLENET": googlenet,
    "VGG16": vgg16,
    "EFFICIENTNETV2": efficientnet_v2_s,
}

DATA_DIR = "./inaturalist_12K"
IMG_SIZE = (224, 224)


NUM_CLASSES = 10


# -------------------  Transforms & Dataloaders -------------------
def get_transforms(augment=False, IMG_SIZE =(224, 224)):

# def get_transforms(augment=False):
    if augment:
        return transforms.Compose([
            transforms.Resize(IMG_SIZE),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(15),
            transforms.ToTensor(),
        ])
    else:
        return transforms.Compose([
            transforms.Resize(IMG_SIZE),
            transforms.ToTensor(),
        ])

# def get_dataloaders(batch_size, augment):
def get_dataloaders(batch_size, augment, IMG_SIZE=(224, 224)):
    transform = get_transforms(augment, IMG_SIZE)
    full_dataset = ImageFolder(os.path.join(DATA_DIR, 'train'), transform=transform)
    train_size = int(0.9 * len(full_dataset))
    val_size = len(full_dataset) - train_size
    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
    test_dataset = ImageFolder(os.path.join(DATA_DIR, 'val'), transform=transform)

    return (
        DataLoader(train_dataset, batch_size=batch_size, shuffle=True),
        DataLoader(val_dataset, batch_size=batch_size, shuffle=False),
        DataLoader(test_dataset, batch_size=batch_size, shuffle=False),
    )


# -------------------  TransferModel with Fine-Tuning Strategy -------------------
class TransferModel(pl.LightningModule):
    def __init__(self, base_model_name='RN50', dense_neurons=256, optimizer_name='adam', lr=1e-3,
                 finetune_strategy='freeze_all', unfreeze_k=0):
        super().__init__()
        self.save_hyperparameters()

        base_model = BASE_MODELS[base_model_name](weights='IMAGENET1K_V1')
        self.finetune_strategy = finetune_strategy




        # Handle model structure for final feature layer
        if hasattr(base_model, 'fc') and isinstance(base_model.fc, nn.Linear):
            # ResNet, GoogLeNet, InceptionV3
            in_features = base_model.fc.in_features
            base_model.fc = nn.Identity()

        elif hasattr(base_model, 'classifier') and isinstance(base_model.classifier, nn.Sequential):
            # VGG
            in_features = base_model.classifier[-1].in_features
            base_model.classifier = nn.Identity()

        elif hasattr(base_model, 'classifier') and isinstance(base_model.classifier, nn.Module):
            # EfficientNetV2
            linear_layers = [m for m in base_model.classifier.modules() if isinstance(m, nn.Linear)]
            if len(linear_layers) == 1:
                in_features = linear_layers[0].in_features
                base_model.classifier = nn.Identity()
            else:
                raise ValueError(f"Couldn't find a single Linear layer in {base_model}. Please check the model definition.")

        else:
            raise ValueError(f"Unknown model structure for: {base_model}")


        self.base_model = base_model
        self.classifier = nn.Sequential(
            nn.Linear(in_features, dense_neurons),
            nn.ReLU(),
            nn.Linear(dense_neurons, NUM_CLASSES)
        )
        self.loss_fn = nn.CrossEntropyLoss()

        self.configure_finetune(finetune_strategy, unfreeze_k)

    def configure_finetune(self, strategy, k):
        all_params = list(self.base_model.parameters())
        if strategy == 'freeze_all':
            for p in all_params:
                p.requires_grad = False
        elif strategy == 'unfreeze_all':
            for p in all_params:
                p.requires_grad = True
        elif strategy == 'unfreeze_last_k':
            for p in all_params[:-k]:
                p.requires_grad = False
            for p in all_params[-k:]:
                p.requires_grad = True
        else:
            raise ValueError(f"Invalid strategy: {strategy}")



    def forward(self, x):
        base_model = getattr(self.hparams, "base_model", None)
        if base_model == "IV3":
      #  if self.hparams.base_model == "IV3":
           outputs = self.base_model(x)
           if isinstance(outputs, tuple):
               return outputs[0]  # InceptionV3 returns (logits, aux)
           return outputs
        else:
           return self.base_model(x)



    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss_fn(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log('train_loss', loss, prog_bar=True)
        self.log('train_acc', acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss_fn(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log('test_acc', acc, prog_bar=True)

    def configure_optimizers(self):
        lr = self.hparams.lr
        if self.hparams.optimizer_name == 'adam':
            return torch.optim.Adam(self.parameters(), lr=lr)
        elif self.hparams.optimizer_name == 'nadam':
            return torch.optim.NAdam(self.parameters(), lr=lr)
        elif self.hparams.optimizer_name == 'rmsprop':
            return torch.optim.RMSprop(self.parameters(), lr=lr)
        else:
            raise ValueError(f"Unsupported optimizer: {self.hparams.optimizer_name}")

In [None]:
def train_wandb():
    wandb.init(project="iNat12k-transfer", job_type="sweep", entity = "ma23c044-indian-institute-of-technology-madras")
    config = wandb.config

    wandb.run.name=f"bm_{wandb.config.base_model}_opt_{wandb.config.optimizer}_lr_{wandb.config.lr:.1e}_strat_{wandb.config.finetune_strategy}_dn_{wandb.config.dense_neurons}_bs_{wandb.config.batch_size}_aug_{wandb.config.augment}"



    # Log sweep strategy
    wandb.log({
        'finetune_strategy': config.finetune_strategy,
        'unfreeze_k': config.unfreeze_k if config.finetune_strategy == 'unfreeze_last_k' else 0
    })

    train_loader, val_loader, test_loader = get_dataloaders(
        batch_size=config.batch_size,
        augment=config.augment
    )

    model = TransferModel(
        base_model_name=config.base_model,
        dense_neurons=config.dense_neurons,
        optimizer_name=config.optimizer,
        lr=config.lr,
        finetune_strategy=config.finetune_strategy,
        unfreeze_k=config.unfreeze_k
    )

    wandb_logger = WandbLogger()
    trainer = pl.Trainer(
        max_epochs=config.epochs,
        logger=wandb_logger,
        callbacks=[EarlyStopping(monitor='val_acc', mode='max', patience=3)],
        accelerator='auto'
    )

    trainer.fit(model, train_loader, val_loader)
    trainer.test(model, test_loader)
    wandb.finish()


# -------------------  Sweep Config -------------------
sweep_config = {
    'method': 'random',
    'metric': {'name': 'val_acc', 'goal': 'maximize'},
    'parameters': {
        'base_model': {'values': ['RN50', 'VGG16', 'GOOGLENET','EFFICIENTNETV2','IV3']},
        'dense_neurons': {'values': [128, 256]},
        'optimizer': {'values': ['adam', 'nadam', 'rmsprop']},
        'lr': {'min': 1e-5, 'max': 1e-3},
        'batch_size': {'values': [32, 64]},
        'augment': {'values': [True, False]},
        'epochs': {'value': 3},
        'finetune_strategy': {'values': ['freeze_all', 'unfreeze_all', 'unfreeze_last_k']},
        'unfreeze_k': {'values': [5, 10, 20]}
    }
}

# Launch sweep
sweep_id = wandb.sweep(sweep_config, project="iNat12k-transfer",entity = "ma23c044-indian-institute-of-technology-madras")
wandb.agent(sweep_id, function=train_wandb, count=5)


Create sweep with ID: 99ka72qd
Sweep URL: https://wandb.ai/ma23c044-indian-institute-of-technology-madras/iNat12k-transfer/sweeps/99ka72qd


[34m[1mwandb[0m: Agent Starting Run: 2fqunxwx with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: GOOGLENET
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_last_k
[34m[1mwandb[0m: 	lr: 0.0005727118338956798
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	unfreeze_k: 10


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.11/dist-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | GoogLeNet        | 5.6 M  | train
1 | classifier | Seq

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▃▃▃▃▅▅▅▅▆▆▆▆█
test_acc,▁
train_acc,▁█▁█▁▁▁▁█▁▁
train_loss,▇▅▆▇▄▃▄█▁▃▆
trainer/global_step,▁▂▂▂▃▄▄▄▅▆▆▆▇███
unfreeze_k,▁
val_acc,██▁█
val_loss,█▅▃▁

0,1
epoch,4
finetune_strategy,unfreeze_last_k
test_acc,0.002
train_acc,0
train_loss,6.92824
trainer/global_step,564
unfreeze_k,10
val_acc,0.002
val_loss,6.84861


[34m[1mwandb[0m: Agent Starting Run: e93fbfux with config:
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_model: RN50
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	finetune_strategy: freeze_all
[34m[1mwandb[0m: 	lr: 0.00035651950581139566
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	unfreeze_k: 10


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth

  0%|          | 0.00/97.8M [00:00<?, ?B/s][A
  2%|▏         | 1.50M/97.8M [00:00<00:06, 15.2MB/s][A
  5%|▌         | 5.38M/97.8M [00:00<00:03, 29.9MB/s][A
 17%|█▋        | 16.5M/97.8M [00:00<00:01, 69.0MB/s][A
 36%|███▌      | 35.0M/97.8M [00:00<00:00, 118MB/s] [A
 54%|█████▍    | 53.2M/97.8M [00:00<00:00, 144MB/s][A
 76%|███████▌  | 74.0M/97.8M [00:00<00:00, 169MB/s][A
100%|██████████| 97.8M/97.8M [00:00<00:00, 135MB/s]
INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run e93fbfux errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | EfficientNet     | 20.2 M | train
1 | classifier | Sequential       | 165 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
20.3 M    Trainable params
0         Non-trainable params
20.3 M    Total params
81.371    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▁▂▂▂▂▂▂▂▄▄▄▄▄▄▅▅▅▅▅▅▅▇▇▇▇▇▇▇█
test_acc,▁
train_acc,▁▂▃▃▅▂▂▃▅▄▄▅▆▅▃▁▆▆▅█▅▄▅▅▅▄▃▆
train_loss,██▆▆▆▆▇▅▅▅▅▄▃▄▄▇▃▃▃▁▂▃▃▃▃▂▃▁
trainer/global_step,▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇████
unfreeze_k,▁
val_acc,▂▁▄█▇
val_loss,▇█▃▁▁

0,1
epoch,5
finetune_strategy,unfreeze_all
test_acc,0.527
train_acc,0.625
train_loss,2.29847
trainer/global_step,1410
unfreeze_k,0
val_acc,0.51
val_loss,3.43846


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2wsenr4w with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: VGG16
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_all
[34m[1mwandb[0m: 	lr: 0.00010567080831926448
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	unfreeze_k: 20


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | VGG              | 14.7 M | train
1 | classifier | Sequential       | 525 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
15.2 M    Trainable params
0         Non-trainable params
15.2 M    Total params
60.962    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▂▄▄▄▄▅▅▅▅▇▇▇▇█
test_acc,▁
train_acc,▁▂▂▂▁▆▃▆▆▂▄█▇▃
train_loss,█▅▆▇▆▂▅▁▂▆▃▁▂▂
trainer/global_step,▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇███
unfreeze_k,▁
val_acc,▁▄▅██
val_loss,█▅▂▂▁

0,1
epoch,5
finetune_strategy,unfreeze_all
test_acc,0.311
train_acc,0.20312
train_loss,1.9606
trainer/global_step,705
unfreeze_k,0
val_acc,0.322
val_loss,1.95693


[34m[1mwandb[0m: Agent Starting Run: u163io6v with config:
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_model: IV3
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_last_k
[34m[1mwandb[0m: 	lr: 0.0005070831121111056
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	unfreeze_k: 5


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | Inception3       | 25.1 M | train
1 | classifier | Sequential       | 263 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
657 K     Trainable params
24.7 M    Non-trainable params
25.4 M    Total params
101.503   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,unfreeze_last_k
unfreeze_k,5


[34m[1mwandb[0m: [32m[41mERROR[0m Run u163io6v errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | GoogLeNet        | 5.6 M  | train
1 | classifier | Sequential       | 264 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
1.6 M     Trainable params
4.3 M     Non-trainable params
5.9 M     Total params
23.459    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▂▄▄▄▄▅▅▅▅▇▇▇▇█
test_acc,▁
train_acc,▁▃▆▄▇▆▆▆▆▆▆▆▇█
train_loss,█▆▄▆▄▄▃▄▃▂▃▂▁▁
trainer/global_step,▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇███
unfreeze_k,▁
val_acc,▁▆▇▇█
val_loss,█▅▃▂▁

0,1
epoch,5
finetune_strategy,unfreeze_last_k
test_acc,0.5735
train_acc,0.57812
train_loss,5.35516
trainer/global_step,705
unfreeze_k,20
val_acc,0.57
val_loss,5.32414


[34m[1mwandb[0m: Agent Starting Run: 01rogj8g with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: IV3
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	finetune_strategy: freeze_all
[34m[1mwandb[0m: 	lr: 0.0009096894074180624
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	unfreeze_k: 5


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | Inception3       | 25.1 M | train
1 | classifier | Sequential       | 263 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
263 K     Trainable params
25.1 M    Non-trainable params
25.4 M    Total params
101.503   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run 01rogj8g errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | ResNet           | 23.5 M | train
1 | classifier | Sequential       | 527 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
24.0 M    Trainable params
0         Non-trainable params
24.0 M    Total params
96.141    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▂▄▄▄▄▅▅▅▅▇▇▇▇█
test_acc,▁
train_acc,▃▃▁▃▃▅▅▄▆▇▅▇█▅
train_loss,█▇▇▆▅▄▄▃▂▂▃▂▁▂
trainer/global_step,▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇███
unfreeze_k,▁
val_acc,▄▇█▆▁
val_loss,█▃▁▁▃

0,1
epoch,5
finetune_strategy,unfreeze_all
test_acc,0.342
train_acc,0.67188
train_loss,1.88297
trainer/global_step,705
unfreeze_k,0
val_acc,0.315
val_loss,4.26097


[34m[1mwandb[0m: Agent Starting Run: 3f44cdqf with config:
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_model: EFFICIENTNETV2
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_all
[34m[1mwandb[0m: 	lr: 0.00044995871350447656
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	unfreeze_k: 20


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | EfficientNet     | 20.2 M | train
1 | classifier | Sequential       | 330 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
20.5 M    Trainable params
0         Non-trainable params
20.5 M    Total params
82.032    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▁▂▂▂▂▂▂▂▄▄▄▄▄▄▅▅▅▅▅▅▅▇▇▇▇▇▇▇█
test_acc,▁
train_acc,▁▂▃▅▃▃▄▅▄▅▄▄▄▅▅▆▆▅▄▇▄▇▅▇▇▇█▅
train_loss,█▆▇▅▆▆▆▄▆▄▆▅▄▄▅▃▃▃▄▂▅▂▄▃▂▂▁▃
trainer/global_step,▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇████
unfreeze_k,▁
val_acc,▁▄▆█▆
val_loss,█▅▄▁▂

0,1
epoch,5
finetune_strategy,unfreeze_all
test_acc,0.5745
train_acc,0.59375
train_loss,2.87113
trainer/global_step,1410
unfreeze_k,0
val_acc,0.568
val_loss,3.42546


[34m[1mwandb[0m: Agent Starting Run: urlpltax with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: GOOGLENET
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_all
[34m[1mwandb[0m: 	lr: 0.0002036412332853036
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	unfreeze_k: 5


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | GoogLeNet        | 5.6 M  | train
1 | classifier | Sequential       | 132 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
5.7 M     Trainable params
0         Non-trainable params
5.7 M     Total params
22.930    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▂▄▄▄▄▅▅▅▅▇▇▇▇█
test_acc,▁
train_acc,▁▂▃▄▃▆▃▂▆▅▆█▃▅
train_loss,█▇▇▆▆▃▆▅▂▃▃▁▅▃
trainer/global_step,▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇███
unfreeze_k,▁
val_acc,▁▄▄▇█
val_loss,█▆▄▂▁

0,1
epoch,5
finetune_strategy,unfreeze_all
test_acc,0.7185
train_acc,0.59375
train_loss,4.62909
trainer/global_step,705
unfreeze_k,0
val_acc,0.712
val_loss,4.51915


[34m[1mwandb[0m: Agent Starting Run: gahu1cuj with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: GOOGLENET
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	finetune_strategy: freeze_all
[34m[1mwandb[0m: 	lr: 0.0006788470389309296
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	unfreeze_k: 20


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | GoogLeNet        | 5.6 M  | train
1 | classifier | Sequential       | 264 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
264 K     Trainable params
5.6 M     Non-trainable params
5.9 M     Total params
23.459    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run gahu1cuj errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | VGG              | 14.7 M | train
1 | classifier | Sequential       | 1.1 M  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
1.1 M     Trainable params
14.7 M    Non-trainable params
15.8 M    Total params
63.064    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run 6rflmxe1 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | VGG              | 14.7 M | train
1 | classifier | Sequential       | 525 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
525 K     Trainable params
14.7 M    Non-trainable params
15.2 M    Total params
60.962    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run elm3kmi2 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | EfficientNet     | 20.2 M | train
1 | classifier | Sequential       | 330 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
330 K     Trainable params
20.2 M    Non-trainable params
20.5 M    Total params
82.032    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run x1dumzik errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | ResNet           | 23.5 M | train
1 | classifier | Sequential       | 263 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
1.3 M     Trainable params
22.5 M    Non-trainable params
23.8 M    Total params
95.086    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▂▄▄▄▄▅▅▅▅▇▇▇▇█
test_acc,▁
train_acc,▁▂▃▄▃▅▅▆▆█▇▇▇▇
train_loss,█▇▅▅▆▄▄▃▃▂▂▂▁▁
trainer/global_step,▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇███
unfreeze_k,▁
val_acc,▁▄▆▇█
val_loss,█▆▄▂▁

0,1
epoch,5
finetune_strategy,unfreeze_last_k
test_acc,0.684
train_acc,0.6875
train_loss,5.67453
trainer/global_step,705
unfreeze_k,5
val_acc,0.705
val_loss,5.63449


[34m[1mwandb[0m: Agent Starting Run: yk31dysl with config:
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_model: VGG16
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_all
[34m[1mwandb[0m: 	lr: 0.0009170155232621794
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	unfreeze_k: 10


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | VGG              | 14.7 M | train
1 | classifier | Sequential       | 1.1 M  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
15.8 M    Trainable params
0         Non-trainable params
15.8 M    Total params
63.064    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▂▄▄▄▄▅▅▅▅▇▇▇▇█
test_acc,▁
train_acc,▁▁▂▂▅▂▅▃▄▅▆▆█▅
train_loss,█▇▆▇▆▅▅▅▃▄▇▂▁▃
trainer/global_step,▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇███
unfreeze_k,▁
val_acc,▁▅▆▄█
val_loss,█▅▇▅▁

0,1
epoch,5
finetune_strategy,unfreeze_all
test_acc,0.202
train_acc,0.21875
train_loss,2.10672
trainer/global_step,705
unfreeze_k,0
val_acc,0.225
val_loss,2.18317


[34m[1mwandb[0m: Agent Starting Run: 4gslwzid with config:
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_model: GOOGLENET
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_all
[34m[1mwandb[0m: 	lr: 0.0008691178710473373
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	unfreeze_k: 10


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | GoogLeNet        | 5.6 M  | train
1 | classifier | Sequential       | 132 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
5.7 M     Trainable params
0         Non-trainable params
5.7 M     Total params
22.930    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▂▄▄▄▄▅▅▅▅▇▇▇▇█
test_acc,▁
train_acc,▂▃▁▇▆▆█▅▅▆▅█▆█
train_loss,█▇█▅▅▅▃▅▄▃▃▂▂▁
trainer/global_step,▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇███
unfreeze_k,▁
val_acc,▃▁▇█▇
val_loss,█▆▄▁▁

0,1
epoch,5
finetune_strategy,unfreeze_all
test_acc,0.59
train_acc,0.67188
train_loss,3.09749
trainer/global_step,705
unfreeze_k,0
val_acc,0.573
val_loss,3.98857


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kg0nqkjd with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: GOOGLENET
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_last_k
[34m[1mwandb[0m: 	lr: 0.0006936956012185921
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	unfreeze_k: 10


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | GoogLeNet        | 5.6 M  | train
1 | classifier | Sequential       | 264 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
467 K     Trainable params
5.4 M     Non-trainable params
5.9 M     Total params
23.459    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▂▂▂▂▄▄▄▄▅▅▅▅▇▇▇▇█
test_acc,▁
train_acc,▁█▁▁▁▁█▁▁▁▁▁▁▁
train_loss,▆▄▆▅▅▃▆▄▁▃▆█▆▆
trainer/global_step,▁▂▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇███
unfreeze_k,▁
val_acc,▁▁██▁
val_loss,█▇▄▃▁

0,1
epoch,5
finetune_strategy,unfreeze_last_k
test_acc,0.0025
train_acc,0
train_loss,6.93339
trainer/global_step,705
unfreeze_k,10
val_acc,0
val_loss,6.83706


[34m[1mwandb[0m: Agent Starting Run: 2zka48yc with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: RN50
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	finetune_strategy: freeze_all
[34m[1mwandb[0m: 	lr: 0.0008901313948210957
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	unfreeze_k: 10


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | ResNet           | 23.5 M | train
1 | classifier | Sequential       | 527 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
527 K     Trainable params
23.5 M    Non-trainable params
24.0 M    Total params
96.141    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run 2zka48yc errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | ResNet           | 23.5 M | train
1 | classifier | Sequential       | 527 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
527 K     Trainable params
23.5 M    Non-trainable params
24.0 M    Total params
96.141    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run a9dk0g1u errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | Inception3       | 25.1 M | train
1 | classifier | Sequential       | 263 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
263 K     Trainable params
25.1 M    Non-trainable params
25.4 M    Total params
101.503   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run dxv6y384 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | Inception3       | 25.1 M | train
1 | classifier | Sequential       | 527 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
25.6 M    Trainable params
0         Non-trainable params
25.6 M    Total params
102.558   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,unfreeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run xm4eordc errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | Inception3       | 25.1 M | train
1 | classifier | Sequential       | 527 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
527 K     Trainable params
25.1 M    Non-trainable params
25.6 M    Total params
102.558   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run hb01k86e errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | ResNet           | 23.5 M | train
1 | classifier | Sequential       | 263 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
263 K     Trainable params
23.5 M    Non-trainable params
23.8 M    Total params
95.086    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run fsgsrjul errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-13-3f0963644e81>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | VGG              | 14.7 M | train
1 | classifier | Sequential       | 525 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
5.2 M     Trainable params
10.0 M    Non-trainable params
15.2 M    Total params
60.962    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

- Using generates sweep Id = "zhiduixn"
- Running more epochs to generate plots in wandb
**(Since code is crashed due to runtime issue) **


In [None]:
def train_wandb():
    wandb.init(project="iNat12k-transfer", job_type="sweep", entity = "ma23c044-indian-institute-of-technology-madras")
    config = wandb.config

    wandb.run.name=f"bm_{wandb.config.base_model}_opt_{wandb.config.optimizer}_lr_{wandb.config.lr:.1e}_strat_{wandb.config.finetune_strategy}_dn_{wandb.config.dense_neurons}_bs_{wandb.config.batch_size}_aug_{wandb.config.augment}"



    # Log sweep strategy
    wandb.log({
        'finetune_strategy': config.finetune_strategy,
        'unfreeze_k': config.unfreeze_k if config.finetune_strategy == 'unfreeze_last_k' else 0
    })

    train_loader, val_loader, test_loader = get_dataloaders(
        batch_size=config.batch_size,
        augment=config.augment
    )

    model = TransferModel(
        base_model_name=config.base_model,
        dense_neurons=config.dense_neurons,
        optimizer_name=config.optimizer,
        lr=config.lr,
        finetune_strategy=config.finetune_strategy,
        unfreeze_k=config.unfreeze_k
    )

    wandb_logger = WandbLogger()
    trainer = pl.Trainer(
        max_epochs=config.epochs,
        logger=wandb_logger,
        callbacks=[EarlyStopping(monitor='val_acc', mode='max', patience=3)],
        accelerator='auto'
    )

    trainer.fit(model, train_loader, val_loader)
    trainer.test(model, test_loader)
    wandb.finish()


# -------------------  Sweep Config -------------------
sweep_config = {
    'method': 'random',
    'metric': {'name': 'val_acc', 'goal': 'maximize'},
    'parameters': {
        'base_model': {'values': ['RN50', 'VGG16', 'GOOGLENET','EFFICIENTNETV2','IV3']},
        'dense_neurons': {'values': [128, 256]},
        'optimizer': {'values': ['adam', 'nadam', 'rmsprop']},
        'lr': {'min': 1e-5, 'max': 1e-3},
        'batch_size': {'values': [32, 64]},
        'augment': {'values': [True, False]},
        'epochs': {'value': 3},
        'finetune_strategy': {'values': ['freeze_all', 'unfreeze_all', 'unfreeze_last_k']},
        'unfreeze_k': {'values': [5, 10, 20]}
    }
}

# Launch sweep
# sweep_id = wandb.sweep(sweep_config, project="iNat12k-transfer",entity = "ma23c044-indian-institute-of-technology-madras")
wandb.agent(sweep_id= "zhiduixn",project="iNat12k-transfer",entity = "ma23c044-indian-institute-of-technology-madras", function=train_wandb, count=5)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: zhiduixn
Sweep URL: https://wandb.ai/ma23c044-indian-institute-of-technology-madras/iNat12k-transfer/sweeps/zhiduixn


[34m[1mwandb[0m: Agent Starting Run: vee5enf6 with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: EFFICIENTNETV2
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_all
[34m[1mwandb[0m: 	lr: 0.0006797018654472736
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	unfreeze_k: 5
[34m[1mwandb[0m: Currently logged in as: [33mma23c044[0m ([33mma23c044-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_v2_s-dd5fe13b.pth
100%|██████████| 82.7M/82.7M [00:01<00:00, 83.6MB/s]
INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.11/dist-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▃▃▃▃▆▆▆▆█
test_acc,▁
train_acc,▁▅▅▆▅▇█▆
train_loss,█▆▅▅▅▁▁▂
trainer/global_step,▁▂▃▃▄▅▅▆▇███
unfreeze_k,▁
val_acc,▁▇█
val_loss,█▃▁

0,1
epoch,3
finetune_strategy,unfreeze_all
test_acc,0.417
train_acc,0.375
train_loss,4.76825
trainer/global_step,423
unfreeze_k,0
val_acc,0.404
val_loss,4.61102


[34m[1mwandb[0m: Agent Starting Run: ihst9hvi with config:
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_model: VGG16
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_all
[34m[1mwandb[0m: 	lr: 0.0009163160127609048
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	unfreeze_k: 5


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:04<00:00, 124MB/s]
INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | VGG              | 14.7 M | train
1 | classifier | Sequential       | 525 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
----------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▁▃▃▃▃▃▃▃▆▆▆▆▆▆█
test_acc,▁
train_acc,▃▃▆▅▁▃▅▆▂▆▅▇▅▄█▂
train_loss,▅▆▅▃█▆▄▄▃▂▄▂▃▃▁▅
trainer/global_step,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇███
unfreeze_k,▁
val_acc,▁▅█
val_loss,█▄▁

0,1
epoch,3
finetune_strategy,unfreeze_all
test_acc,0.225
train_acc,0.09375
train_loss,2.36321
trainer/global_step,846
unfreeze_k,0
val_acc,0.229
val_loss,2.21718


[34m[1mwandb[0m: Agent Starting Run: 5a529ol3 with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: VGG16
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_all
[34m[1mwandb[0m: 	lr: 0.0006363452421404956
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	unfreeze_k: 5


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | VGG              | 14.7 M | train
1 | classifier | Sequential       | 1.1 M  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
15.8 M    Trainable params
0         Non-trainable params
15.8 M    Total params
63.064    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▁▃▃▃▃▃▃▃▆▆▆▆▆▆█
test_acc,▁
train_acc,█▅▇▁▂▇▅▆▅▂▃▇█▂▅▂
train_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇███
unfreeze_k,▁
val_acc,▁▁▁
val_loss,▁▁▁

0,1
epoch,3
finetune_strategy,unfreeze_all
test_acc,0.1
train_acc,0.03125
train_loss,10.13015
trainer/global_step,846
unfreeze_k,0
val_acc,0.097
val_loss,10.13015


[34m[1mwandb[0m: Agent Starting Run: uiz2zdpl with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: IV3
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_last_k
[34m[1mwandb[0m: 	lr: 0.0004625827515610887
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	unfreeze_k: 5


Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:00<00:00, 126MB/s] 
INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | Inception3       | 25.1 M | train
1 | classifier | Sequential       | 263 K  | train
2 | loss_fn    | CrossEntropyLos

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,unfreeze_last_k
unfreeze_k,5


[34m[1mwandb[0m: [32m[41mERROR[0m Run uiz2zdpl errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-7-bfd61fafc3ac>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1m

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | EfficientNet     | 20.2 M | train
1 | classifier | Sequential       | 165 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
165 K     Trainable params
20.2 M    Non-trainable params
20.3 M    Total params
81.371    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run c6o6psbz errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-7-bfd61fafc3ac>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1m

In [None]:
def train_wandb():
    wandb.init(project="iNat12k-transfer", job_type="sweep", entity = "ma23c044-indian-institute-of-technology-madras")
    config = wandb.config

    wandb.run.name=f"bm_{wandb.config.base_model}_opt_{wandb.config.optimizer}_lr_{wandb.config.lr:.1e}_strat_{wandb.config.finetune_strategy}_dn_{wandb.config.dense_neurons}_bs_{wandb.config.batch_size}_aug_{wandb.config.augment}"



    # Log sweep strategy
    wandb.log({
        'finetune_strategy': config.finetune_strategy,
        'unfreeze_k': config.unfreeze_k if config.finetune_strategy == 'unfreeze_last_k' else 0
    })

    train_loader, val_loader, test_loader = get_dataloaders(
        batch_size=config.batch_size,
        augment=config.augment
    )

    model = TransferModel(
        base_model_name=config.base_model,
        dense_neurons=config.dense_neurons,
        optimizer_name=config.optimizer,
        lr=config.lr,
        finetune_strategy=config.finetune_strategy,
        unfreeze_k=config.unfreeze_k
    )

    wandb_logger = WandbLogger()
    trainer = pl.Trainer(
        max_epochs=config.epochs,
        logger=wandb_logger,
        callbacks=[EarlyStopping(monitor='val_acc', mode='max', patience=3)],
        accelerator='auto'
    )

    trainer.fit(model, train_loader, val_loader)
    trainer.test(model, test_loader)
    wandb.finish()


# -------------------  Sweep Config -------------------
sweep_config = {
    'method': 'random',
    'metric': {'name': 'val_acc', 'goal': 'maximize'},
    'parameters': {
        'base_model': {'values': ['RN50', 'VGG16', 'GOOGLENET','EFFICIENTNETV2','IV3']},
        'dense_neurons': {'values': [128, 256]},
        'optimizer': {'values': ['adam', 'nadam', 'rmsprop']},
        'lr': {'min': 1e-5, 'max': 1e-3},
        'batch_size': {'values': [32, 64]},
        'augment': {'values': [True, False]},
        'epochs': {'value': 3},
        'finetune_strategy': {'values': ['freeze_all', 'unfreeze_all', 'unfreeze_last_k']},
        'unfreeze_k': {'values': [5, 10, 20]}
    }
}

# Launch sweep
# sweep_id = wandb.sweep(sweep_config, project="iNat12k-transfer",entity = "ma23c044-indian-institute-of-technology-madras")
wandb.agent(sweep_id = "zhiduixn", ,project="iNat12k-transfer",entity = "ma23c044-indian-institute-of-technology-madras",function= train_wandb, count=5)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: zhiduixn
Sweep URL: https://wandb.ai/ma23c044-indian-institute-of-technology-madras/iNat12k-transfer/sweeps/zhiduixn


[34m[1mwandb[0m: Agent Starting Run: vee5enf6 with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: EFFICIENTNETV2
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_all
[34m[1mwandb[0m: 	lr: 0.0006797018654472736
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	unfreeze_k: 5
[34m[1mwandb[0m: Currently logged in as: [33mma23c044[0m ([33mma23c044-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_v2_s-dd5fe13b.pth
100%|██████████| 82.7M/82.7M [00:01<00:00, 83.6MB/s]
INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.11/dist-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▃▃▃▃▆▆▆▆█
test_acc,▁
train_acc,▁▅▅▆▅▇█▆
train_loss,█▆▅▅▅▁▁▂
trainer/global_step,▁▂▃▃▄▅▅▆▇███
unfreeze_k,▁
val_acc,▁▇█
val_loss,█▃▁

0,1
epoch,3
finetune_strategy,unfreeze_all
test_acc,0.417
train_acc,0.375
train_loss,4.76825
trainer/global_step,423
unfreeze_k,0
val_acc,0.404
val_loss,4.61102


[34m[1mwandb[0m: Agent Starting Run: ihst9hvi with config:
[34m[1mwandb[0m: 	augment: False
[34m[1mwandb[0m: 	base_model: VGG16
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_all
[34m[1mwandb[0m: 	lr: 0.0009163160127609048
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	unfreeze_k: 5


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:04<00:00, 124MB/s]
INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | VGG              | 14.7 M | train
1 | classifier | Sequential       | 525 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
----------

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▁▃▃▃▃▃▃▃▆▆▆▆▆▆█
test_acc,▁
train_acc,▃▃▆▅▁▃▅▆▂▆▅▇▅▄█▂
train_loss,▅▆▅▃█▆▄▄▃▂▄▂▃▃▁▅
trainer/global_step,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇███
unfreeze_k,▁
val_acc,▁▅█
val_loss,█▄▁

0,1
epoch,3
finetune_strategy,unfreeze_all
test_acc,0.225
train_acc,0.09375
train_loss,2.36321
trainer/global_step,846
unfreeze_k,0
val_acc,0.229
val_loss,2.21718


[34m[1mwandb[0m: Agent Starting Run: 5a529ol3 with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: VGG16
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_all
[34m[1mwandb[0m: 	lr: 0.0006363452421404956
[34m[1mwandb[0m: 	optimizer: rmsprop
[34m[1mwandb[0m: 	unfreeze_k: 5


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | VGG              | 14.7 M | train
1 | classifier | Sequential       | 1.1 M  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
15.8 M    Trainable params
0         Non-trainable params
15.8 M    Total params
63.064    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

0,1
epoch,▁▁▁▁▁▁▃▃▃▃▃▃▃▆▆▆▆▆▆█
test_acc,▁
train_acc,█▅▇▁▂▇▅▆▅▂▃▇█▂▅▂
train_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇███
unfreeze_k,▁
val_acc,▁▁▁
val_loss,▁▁▁

0,1
epoch,3
finetune_strategy,unfreeze_all
test_acc,0.1
train_acc,0.03125
train_loss,10.13015
trainer/global_step,846
unfreeze_k,0
val_acc,0.097
val_loss,10.13015


[34m[1mwandb[0m: Agent Starting Run: uiz2zdpl with config:
[34m[1mwandb[0m: 	augment: True
[34m[1mwandb[0m: 	base_model: IV3
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	finetune_strategy: unfreeze_last_k
[34m[1mwandb[0m: 	lr: 0.0004625827515610887
[34m[1mwandb[0m: 	optimizer: nadam
[34m[1mwandb[0m: 	unfreeze_k: 5


Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:00<00:00, 126MB/s] 
INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | Inception3       | 25.1 M | train
1 | classifier | Sequential       | 263 K  | train
2 | loss_fn    | CrossEntropyLos

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,unfreeze_last_k
unfreeze_k,5


[34m[1mwandb[0m: [32m[41mERROR[0m Run uiz2zdpl errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-7-bfd61fafc3ac>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1m

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type             | Params | Mode 
--------------------------------------------------------
0 | base_model | EfficientNet     | 20.2 M | train
1 | classifier | Sequential       | 165 K  | train
2 | loss_fn    | CrossEntropyLoss | 0      | train
--------------------------------------------------------
165 K     Trainable params
20.2 M    Non-trainable params
20.3 M    Total params
81.371    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

0,1
unfreeze_k,▁

0,1
finetune_strategy,freeze_all
unfreeze_k,0


[34m[1mwandb[0m: [32m[41mERROR[0m Run c6o6psbz errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-7-bfd61fafc3ac>", line 38, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, train_loader, val_loader)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1m