# Copyright

<PRE>
This notebook was created as part of the "Deep learning / VITMMA19" class at
Budapest University of Technology and Economics, Hungary,
https://portal.vik.bme.hu/kepzes/targyak/VITMMA19

Any re-use or publication of any part of the notebook is only allowed with the
 written consent of the authors.

2024 (c) Mohammed Salah Al-Radhi and Tamás Gábor Csapó (malradhi@tmit.bme.hu)
</PRE>

In [1]:
### HYPEROPT: task during the class - we will do this together
# add WandB.ai integration to the code
# (help: https://docs.wandb.ai/guides/integrations/lightning )
# run at least 3 different trainings

In [2]:
# install pytorch lightning
!pip install pytorch-lightning --quiet
## high level interface for Pytorch
!pip install wandb --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/815.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m399.4/815.2 kB[0m [31m11.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m815.2/815.2 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m869.5/869.5 kB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.0/13.0 MB[0m [31m91.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.8/313.8 kB[0m [31m23.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import pytorch_lightning as pl
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader,random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import CIFAR10
from pytorch_lightning.loggers import WandbLogger ## interface for logging experiments to W&B
import wandb


In [4]:
# create one class to deal with data
class CifarDataModule(pl.LightningDataModule):
  def __init__(self, batch_size, data_dir="./"):
    super().__init__()
    self.data_dir=data_dir
    self.batch_size=batch_size
    self.transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
    self.num_classes=10

  def prepare_data(self):
    CIFAR10(self.data_dir,train=True,download=True)
    CIFAR10(self.data_dir,train=False,download=True)

  def setup(self, stage=None):
    if stage=='fit' or stage is None:
      cifar_full=CIFAR10(self.data_dir,train=True,transform=self.transform)
      self.cifar_train,self.cifar_val=random_split(cifar_full,[45000,5000])

    if stage=='test' or stage is None:
      self.cifar_test=CIFAR10(self.data_dir,train=False,transform=self.transform)

  def train_dataloader(self):
    return DataLoader(self.cifar_train,batch_size=self.batch_size,shuffle=True,num_workers=2)

  def val_dataloader(self):
    return DataLoader(self.cifar_val,batch_size=self.batch_size,shuffle=False,num_workers=2)

  def test_dataloader(self):
    return DataLoader(self.cifar_test,batch_size=self.batch_size,shuffle=False,num_workers=2)

In [5]:
class CIFAR10LitModel(pl.LightningModule):
    def __init__(self, input_shape, num_classes, learning_rate=3e-4, neurons_FC1=512, neurons_FC2=256,
                 conv_filter_1=32, conv_filter_2=64, conv_filter_3=128, conv_filter_4=256,
                 dropout_rate=0.25, optimizer_type='adam', weight_decay=0, scheduler=None):
        super().__init__()
        self.save_hyperparameters()

        # Convolutional layers
        self.conv1 = nn.Conv2d(3, self.hparams.conv_filter_1, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(self.hparams.conv_filter_1, self.hparams.conv_filter_2, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(2)

        self.conv3 = nn.Conv2d(self.hparams.conv_filter_2, self.hparams.conv_filter_3, kernel_size=3, stride=1, padding=1)
        # Only one pooling layer after conv3
        self.pool2 = nn.MaxPool2d(2)

        self.conv4 = nn.Conv2d(self.hparams.conv_filter_3, self.hparams.conv_filter_4, kernel_size=3, stride=1, padding=1)

        n_sizes = self._get_output_shape(input_shape)

        # Fully connected layers
        self.fc1 = nn.Linear(n_sizes, self.hparams.neurons_FC1)
        self.fc2 = nn.Linear(self.hparams.neurons_FC1, self.hparams.neurons_FC2)
        self.fc3 = nn.Linear(self.hparams.neurons_FC2, num_classes)

        # Dropout layer
        self.dropout = nn.Dropout(self.hparams.dropout_rate)

        # Accuracy metrics
        self.train_acc = Accuracy(task='multiclass', num_classes=num_classes)
        self.val_acc = Accuracy(task='multiclass', num_classes=num_classes)
        self.test_acc = Accuracy(task='multiclass', num_classes=num_classes)

    def _get_output_shape(self, shape):
        '''Returns the size of the output tensor from the conv layers'''
        batch_size = 1
        input_tensor = torch.autograd.Variable(torch.rand(batch_size, *shape))
        output_feat = self._feature_extractor(input_tensor)
        n_size = output_feat.data.view(batch_size, -1).size(1)
        print("Output feature shape:", output_feat.shape)  # Debugging statement
        return n_size

    def _feature_extractor(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool1(F.relu(self.conv2(x)))  # Use pool1 after conv2
        x = self.pool2(F.relu(self.conv3(x)))
        x = F.relu(self.conv4(x))
        return x

    def forward(self, x):
        x = self._feature_extractor(x)
        x = x.view(x.size(0), -1)  # Flatten for fully connected layers
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.dropout(x)  # Applying dropout
        x = F.log_softmax(self.fc3(x), dim=1)  # Final classification layer
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        # Metric
        preds = torch.argmax(logits, dim=1)
        acc = self.train_acc(preds, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, logger=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, logger=True)
        return loss

    # Validation loop
    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = self.val_acc(preds, y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
        return loss

    # Test loop
    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)

        preds = torch.argmax(logits, dim=1)
        acc = self.test_acc(preds, y)
        self.log('test_loss', loss, on_epoch=True)
        self.log('test_acc', acc, on_epoch=True)
        return loss

    def configure_optimizers(self):
        if self.hparams.optimizer_type == 'adam':
            optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate,
                                           weight_decay=self.hparams.weight_decay)
        elif self.hparams.optimizer_type == 'adamw':
            optimizer = torch.optim.AdamW(self.parameters(), lr=self.hparams.learning_rate,
                                           weight_decay=self.hparams.weight_decay)
        elif self.hparams.optimizer_type == 'sgd':
            optimizer = torch.optim.SGD(self.parameters(), lr=self.hparams.learning_rate,
                                         weight_decay=self.hparams.weight_decay, momentum=0.9)

        if self.hparams.scheduler == 'StepLR':
            scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
            return [optimizer], [scheduler]
        elif self.hparams.scheduler == 'CosineAnnealingLR':
            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
            return [optimizer], [scheduler]
        else:
            return optimizer


In [6]:
sweep_config = {
    'method': 'random',  # 'random' or 'bayes'
    'metric': {
        'name': 'val_acc',  # Metric to optimize
        'goal': 'maximize'   # Aim to maximize the metric
    },
    'parameters': {
        'learning_rate': {
            'values': [0.001, 0.0001, 0.00001]  # Learning rates to explore
        },
        'optimizer_type': {
            'values': ['adam', 'sgd', 'adamw']  # Optimizers to try
        },
        'neurons_FC1': {  # First fully connected layer neurons
            'values': [128, 256, 512]
        },
        'neurons_FC2': {  # Second fully connected layer neurons
            'values': [64, 128]
        },
        'conv_filter_1': {  # First convolutional layer filter count
            'values': [32, 64, 128]
        },
        'conv_filter_2': {  # Second convolutional layer filter count
            'values': [64, 128, 256]
        },
        'conv_filter_3': {  # Third convolutional layer filter count
            'values': [128, 256]
        },
        'conv_filter_4': {  # Fourth convolutional layer filter count
            'values': [256, 512]
        },
        'dropout_rate': {  # Dropout rate for regularization
            'values': [0.2, 0.25, 0.3]
        },
        'weight_decay': {  # Weight decay for regularization
            'values': [0, 1e-4, 1e-5]  # Different values of weight decay to try
        },
        'batch_size': {  # Batch sizes to experiment with
            'values': [32, 64, 128]
        },
        'scheduler': {  # Learning rate scheduler options
            'values': ['None', 'StepLR', 'CosineAnnealingLR']
        }
    }
    # ,
    # 'early_terminate': {
    #     'type': 'hyperband',  # Hyperband for early termination
    #     'min_iter': 3  # Minimum iterations for early termination
    # }
}


In [7]:
# class for visualizing one batch of validation images along with predicted and rall class label
class ImagePredictionLogger(pl.Callback):
    def __init__(self, val_samples, num_samples=32):
        super().__init__()
        self.val_imgs, self.val_labels = val_samples
        self.val_imgs = self.val_imgs[:num_samples]
        self.val_labels = self.val_labels[:num_samples]

    def on_validation_epoch_end(self, trainer, pl_module):
        val_imgs = self.val_imgs.to(device=pl_module.device)
        logits = pl_module(val_imgs)
        preds = torch.argmax(logits, 1)

        trainer.logger.experiment.log({
            "examples": [wandb.Image(x, caption=f"Pred:{pred}, Label:{y}")
                            for x, pred, y in zip(val_imgs, preds, self.val_labels)],
            "global_step": trainer.global_step
            })

In [8]:
# Instantiate the cifar and model
cifar = CifarDataModule(batch_size=32)
cifar.prepare_data()
cifar.setup()

# Grab samples to log predictions on
samples = next(iter(cifar.val_dataloader()))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 77652483.04it/s]


Extracting ./cifar-10-python.tar.gz to ./
Files already downloaded and verified


In [None]:
# ## WandB, you have have an account(if you don't, create one)
# def train_model(learning_rate=1e-3):
#     wandb.login(key='')
#     config=wandb.config
#     wandb_logger = WandbLogger(project='lastt', job_type='train', log_model="all")
#     # instantiate classes
#     dm = CifarDataModule(32)
#     dm.prepare_data()
#     dm.setup()
#     model = CIFAR10LitModel((3, 32, 32), dm.num_classes, learning_rate)
#     wandb_logger.watch(model)
#     # Initialize Callbacks
#     checkpoint_callback = pl.callbacks.ModelCheckpoint()
#     early_stop_callback = pl.callbacks.EarlyStopping(monitor="val_acc", patience=3, verbose=False, mode="max")
#     ### WandB
#     trainer = pl.Trainer(max_epochs=5,
#                      logger=wandb_logger,
#                      callbacks=[checkpoint_callback, early_stop_callback,ImagePredictionLogger(samples)]
#                     )
#     # Train the model
#     trainer.fit(model, dm)

#     # Evaluate the model
#     trainer.test(dataloaders=cifar.test_dataloader())
#     # tell the WandB you have finished
#     wandb.finish()

# # 762b2006c3b890ea691d140986f83537087abf7d

In [9]:
# Starting sweep
sweep_id = wandb.sweep(sweep=sweep_config, project='assignment_2')

def train_model():
    # Initialize a new WandB run
    wandb.init()

    config = wandb.config  # Accessing the config after wandb.init()
    wandb_logger = WandbLogger(project='lastt', job_type='train', log_model="all")

    # Instantiate classes
    dm = CifarDataModule(config.batch_size)  # Use batch size from config
    dm.prepare_data()
    dm.setup()

    # Instantiate model with hyperparameters from config
    model = CIFAR10LitModel(
        (3, 32, 32),
        dm.num_classes,
        learning_rate=config.learning_rate,
        neurons_FC1=config.neurons_FC1,
        neurons_FC2=config.neurons_FC2,
        conv_filter_1=config.conv_filter_1,
        conv_filter_2=config.conv_filter_2,
        dropout_rate=config.dropout_rate,
        optimizer_type=config.optimizer_type
    )

    wandb_logger.watch(model)

    # Initialize Callbacks
    checkpoint_callback = pl.callbacks.ModelCheckpoint()
    early_stop_callback = pl.callbacks.EarlyStopping(monitor="val_acc", patience=3, verbose=False, mode="max")

    # Trainer
    trainer = pl.Trainer(
        max_epochs=5,
        logger=wandb_logger,
        callbacks=[checkpoint_callback, early_stop_callback, ImagePredictionLogger(samples)]
    )

    # Training the model
    trainer.fit(model, dm)

    # Evaluate the model
    trainer.test(dataloaders=dm.test_dataloader())

    wandb.finish()

# Use the WandB agent to run the training function
wandb.agent(sweep_id, function=train_model, count=10)  # This will run 10 different training jobs


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: 4s3wp7hw
Sweep URL: https://wandb.ai/dhyaneswarpraneshraj-bme/assignment_2/sweeps/4s3wp7hw


[34m[1mwandb[0m: Agent Starting Run: 5j8nj4mx with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filter_1: 64
[34m[1mwandb[0m: 	conv_filter_2: 128
[34m[1mwandb[0m: 	conv_filter_3: 128
[34m[1mwandb[0m: 	conv_filter_4: 512
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons_FC1: 256
[34m[1mwandb[0m: 	neurons_FC2: 64
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	weight_decay: 1e-05
[34m[1mwandb[0m: Currently logged in as: [33mdhyaneswarpraneshraj[0m ([33mdhyaneswarpraneshraj-bme[0m). Use [1m`wandb login --relogin`[0m to force relogin


Files already downloaded and verified
Files already downloaded and verified


/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Output feature shape: torch.Size([1, 256, 4, 4])
Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 1.8 K  | train
1  | conv2     | Conv2d             | 73.9 K | train
2  | pool1     | MaxPool2d          | 0      | train
3  | conv3     | Conv2d             | 147 K  | train
4  | pool2     | MaxPool2d          | 0      | train
5  | conv4     | Conv2d             | 295 K  | train
6  | fc1       | Linear             | 1.0 M  | train
7  | fc2       | Linear             | 16.4 K | train
8  | fc3       | Linear             | 650    | train
9  | dropout   | Dropout            | 0      | train
10 | train_acc | MulticlassAccuracy | 0      | train
11 | val_acc   | MulticlassAccuracy | 0      | train
12 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.6 M    

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./lastt/5j8nj4mx/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./lastt/5j8nj4mx/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='91.219 MB of 91.219 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▃▃▃▃▃▃▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆████████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▅▆▇█
train_acc_step,▂▁▃▄▆▅▆▆▆▆▅▆▆▅▇▆▆▇▇▆█▇▆▆▅▇█▆█▇▇▇▆█▆██▆▇▇
train_loss_epoch,█▅▃▂▁
train_loss_step,██▇▇█▇▅▃▄▅▄▅▃▂▃▅▄▅▁▃▃▃▂▁▁▃▂▂▅▂▂▁▂▂▂▅▁▂▂▂
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▇▇▇▇▇▇███
val_acc,▁▅▇██

0,1
epoch,5.0
global_step,7035.0
test_acc,0.7343
test_loss,0.79676
train_acc_epoch,0.80413
train_acc_step,0.875
train_loss_epoch,0.57316
train_loss_step,0.35696
trainer/global_step,7035.0
val_acc,0.7272


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sq9it9io with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	conv_filter_1: 128
[34m[1mwandb[0m: 	conv_filter_2: 128
[34m[1mwandb[0m: 	conv_filter_3: 256
[34m[1mwandb[0m: 	conv_filter_4: 512
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	neurons_FC1: 128
[34m[1mwandb[0m: 	neurons_FC2: 128
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	scheduler: None
[34m[1mwandb[0m: 	weight_decay: 0


Files already downloaded and verified
Files already downloaded and verified


/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Output feature shape: torch.Size([1, 256, 4, 4])
Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 3.6 K  | train
1  | conv2     | Conv2d             | 147 K  | train
2  | pool1     | MaxPool2d          | 0      | train
3  | conv3     | Conv2d             | 147 K  | train
4  | pool2     | MaxPool2d          | 0      | train
5  | conv4     | Conv2d             | 295 K  | train
6  | fc1       | Linear             | 524 K  | train
7  | fc2       | Linear             | 16.5 K | train
8  | fc3       | Linear             | 1.3 K  | train
9  | dropout   | Dropout            | 0      | train
10 | train_acc | MulticlassAccuracy | 0      | train
11 | val_acc   | MulticlassAccuracy | 0      | train
12 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.1 M    

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./lastt/sq9it9io/checkpoints/epoch=4-step=1760.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./lastt/sq9it9io/checkpoints/epoch=4-step=1760.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='65.574 MB of 65.574 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆████████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▅▆▇█
train_acc_step,▁▁▃▂▃▄▄▅▅▅▅▄▄▆▄▅▅▅▅▇▅▇▆▅▇█▅▆▆▇▆▇█▅▆
train_loss_epoch,█▄▃▂▁
train_loss_step,███▇▅▅▆▅▄▃▃▅▄▄▄▃▃▃▂▃▃▂▃▂▃▁▃▂▁▁▂▂▁▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█████
val_acc,▁▄▆▇█

0,1
epoch,5.0
global_step,1760.0
test_acc,0.3889
test_loss,1.65856
train_acc_epoch,0.35962
train_acc_step,0.35938
train_loss_epoch,1.73102
train_loss_step,1.6738
trainer/global_step,1760.0
val_acc,0.3866


[34m[1mwandb[0m: Agent Starting Run: 5i9r0p2c with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filter_1: 32
[34m[1mwandb[0m: 	conv_filter_2: 128
[34m[1mwandb[0m: 	conv_filter_3: 256
[34m[1mwandb[0m: 	conv_filter_4: 256
[34m[1mwandb[0m: 	dropout_rate: 0.25
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons_FC1: 128
[34m[1mwandb[0m: 	neurons_FC2: 128
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	scheduler: StepLR
[34m[1mwandb[0m: 	weight_decay: 0


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Output feature shape: torch.Size([1, 256, 4, 4])
Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 37.0 K | train
2  | pool1     | MaxPool2d          | 0      | train
3  | conv3     | Conv2d             | 147 K  | train
4  | pool2     | MaxPool2d          | 0      | train
5  | conv4     | Conv2d             | 295 K  | train
6  | fc1       | Linear             | 524 K  | train
7  | fc2       | Linear             | 16.5 K | train
8  | fc3       | Linear             | 1.3 K  | train
9  | dropout   | Dropout            | 0      | train
10 | train_acc | MulticlassAccuracy | 0      | train
11 | val_acc   | MulticlassAccuracy | 0      | train
12 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.0 M    

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./lastt/5i9r0p2c/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./lastt/5i9r0p2c/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='59.091 MB of 59.091 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▇▇▇▇▇▇█
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▄▆▇█
train_acc_step,▁▂▂▅▂▃▃▂▅▄▄▆▄▆▂▅▅▆▄▄▄▅▂▅▅▅▇▅▆▄▅▆▆▆▄▇█▇▆▆
train_loss_epoch,█▅▃▂▁
train_loss_step,█▅▅▅▆▄▄▆▄▅▄▃▃▄▃▄▅▂▃▄▅▃▄▃▃▃▄▃▃▄▂▃▃▃▃▂▂▃▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█
val_acc,▁▄▅▇█

0,1
epoch,5.0
global_step,7035.0
test_acc,0.6244
test_loss,1.04396
train_acc_epoch,0.60189
train_acc_step,0.625
train_loss_epoch,1.10916
train_loss_step,0.97866
trainer/global_step,7035.0
val_acc,0.63


[34m[1mwandb[0m: Agent Starting Run: htoq3o4m with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filter_1: 32
[34m[1mwandb[0m: 	conv_filter_2: 128
[34m[1mwandb[0m: 	conv_filter_3: 128
[34m[1mwandb[0m: 	conv_filter_4: 512
[34m[1mwandb[0m: 	dropout_rate: 0.25
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons_FC1: 256
[34m[1mwandb[0m: 	neurons_FC2: 64
[34m[1mwandb[0m: 	optimizer_type: adamw
[34m[1mwandb[0m: 	scheduler: StepLR
[34m[1mwandb[0m: 	weight_decay: 0.0001


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Output feature shape: torch.Size([1, 256, 4, 4])
Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 37.0 K | train
2  | pool1     | MaxPool2d          | 0      | train
3  | conv3     | Conv2d             | 147 K  | train
4  | pool2     | MaxPool2d          | 0      | train
5  | conv4     | Conv2d             | 295 K  | train
6  | fc1       | Linear             | 1.0 M  | train
7  | fc2       | Linear             | 16.4 K | train
8  | fc3       | Linear             | 650    | train
9  | dropout   | Dropout            | 0      | train
10 | train_acc | MulticlassAccuracy | 0      | train
11 | val_acc   | MulticlassAccuracy | 0      | train
12 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.5 M    

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./lastt/htoq3o4m/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./lastt/htoq3o4m/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='89.058 MB of 89.058 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▃▃▅▅▅▅▅▅▆▆▆▆▆▆▆▆███████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▅▆▇█
train_acc_step,▂▁▁▄▂▄▂▃▂▂▄▅▆▆▃▆▄▅▆▆▆▅▅█▆██▇▆▆▅▇▅▅▇▇█▅▆█
train_loss_epoch,█▅▃▂▁
train_loss_step,█▇▆▆▅▆▆▅▅▆▄▄▄▅▄▃▄▂▃▃▃▂▃▂▄▃▂▃▂▁▂▃▁▂▃▂▂▂▂▂
trainer/global_step,▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇█████
val_acc,▁▅▇██

0,1
epoch,5.0
global_step,7035.0
test_acc,0.7359
test_loss,0.80785
train_acc_epoch,0.79004
train_acc_step,0.8125
train_loss_epoch,0.60587
train_loss_step,0.62046
trainer/global_step,7035.0
val_acc,0.735


[34m[1mwandb[0m: Agent Starting Run: qz2dz2mf with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filter_1: 128
[34m[1mwandb[0m: 	conv_filter_2: 128
[34m[1mwandb[0m: 	conv_filter_3: 256
[34m[1mwandb[0m: 	conv_filter_4: 256
[34m[1mwandb[0m: 	dropout_rate: 0.25
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	neurons_FC1: 256
[34m[1mwandb[0m: 	neurons_FC2: 64
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	scheduler: StepLR
[34m[1mwandb[0m: 	weight_decay: 0


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Output feature shape: torch.Size([1, 256, 4, 4])
Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 3.6 K  | train
1  | conv2     | Conv2d             | 147 K  | train
2  | pool1     | MaxPool2d          | 0      | train
3  | conv3     | Conv2d             | 147 K  | train
4  | pool2     | MaxPool2d          | 0      | train
5  | conv4     | Conv2d             | 295 K  | train
6  | fc1       | Linear             | 1.0 M  | train
7  | fc2       | Linear             | 16.4 K | train
8  | fc3       | Linear             | 650    | train
9  | dropout   | Dropout            | 0      | train
10 | train_acc | MulticlassAccuracy | 0      | train
11 | val_acc   | MulticlassAccuracy | 0      | train
12 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.7 M    

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./lastt/qz2dz2mf/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./lastt/qz2dz2mf/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='95.540 MB of 95.540 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆██████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▅▆▇█
train_acc_step,▁▁▁▃▃▃▅▅▅▅▃▄▅▅▅▆▆▅▆▅▅▅▅▆▇▆▆▇▅▇▇██▇▇█▆▇▆█
train_loss_epoch,█▄▃▂▁
train_loss_step,█▇▆▆▆▅▄▄▄▄▃▅▂▄▃▃▃▃▃▂▂▃▂▄▂▁▁▂▄▁▃▂▂▂▂▂▂▂▁▁
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▇▇▇▇▇███████
val_acc,▁▅▇██

0,1
epoch,5.0
global_step,7035.0
test_acc,0.7391
test_loss,0.78482
train_acc_epoch,0.78031
train_acc_step,0.71875
train_loss_epoch,0.63605
train_loss_step,0.54526
trainer/global_step,7035.0
val_acc,0.7398


[34m[1mwandb[0m: Agent Starting Run: f0yjdz3g with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filter_1: 128
[34m[1mwandb[0m: 	conv_filter_2: 256
[34m[1mwandb[0m: 	conv_filter_3: 256
[34m[1mwandb[0m: 	conv_filter_4: 256
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	neurons_FC1: 512
[34m[1mwandb[0m: 	neurons_FC2: 128
[34m[1mwandb[0m: 	optimizer_type: adamw
[34m[1mwandb[0m: 	scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	weight_decay: 0.0001


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Output feature shape: torch.Size([1, 256, 4, 4])
Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 3.6 K  | train
1  | conv2     | Conv2d             | 295 K  | train
2  | pool1     | MaxPool2d          | 0      | train
3  | conv3     | Conv2d             | 295 K  | train
4  | pool2     | MaxPool2d          | 0      | train
5  | conv4     | Conv2d             | 295 K  | train
6  | fc1       | Linear             | 2.1 M  | train
7  | fc2       | Linear             | 65.7 K | train
8  | fc3       | Linear             | 1.3 K  | train
9  | dropout   | Dropout            | 0      | train
10 | train_acc | MulticlassAccuracy | 0      | train
11 | val_acc   | MulticlassAccuracy | 0      | train
12 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
3.1 M    

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./lastt/f0yjdz3g/checkpoints/epoch=4-step=7035.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./lastt/f0yjdz3g/checkpoints/epoch=4-step=7035.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='175.290 MB of 175.290 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆█████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▅▆▇█
train_acc_step,▁▃▂▂▄▂▅▄▅▁▆█▅▄▄▇▅▅█▅▆▄▇▄▃▄▅▃▇▅▆▅▆▅▅▆▄▇▆▆
train_loss_epoch,█▄▃▂▁
train_loss_step,█▇▆▅▆▅▄▆▅▅▃▅▄▅▃▅▂▄▅▄▄▅▅▄▄▃▄▅▃▃▄▃▃▃▃▃▁▄▃▃
trainer/global_step,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
val_acc,▁▃▅▆█

0,1
epoch,5.0
global_step,7035.0
test_acc,0.4768
test_loss,1.43849
train_acc_epoch,0.4452
train_acc_step,0.5625
train_loss_epoch,1.50931
train_loss_step,1.37893
trainer/global_step,7035.0
val_acc,0.4718


[34m[1mwandb[0m: Agent Starting Run: 82emtu1t with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_filter_1: 64
[34m[1mwandb[0m: 	conv_filter_2: 128
[34m[1mwandb[0m: 	conv_filter_3: 256
[34m[1mwandb[0m: 	conv_filter_4: 256
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	neurons_FC1: 128
[34m[1mwandb[0m: 	neurons_FC2: 128
[34m[1mwandb[0m: 	optimizer_type: adam
[34m[1mwandb[0m: 	scheduler: None
[34m[1mwandb[0m: 	weight_decay: 0.0001


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Output feature shape: torch.Size([1, 256, 4, 4])
Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 1.8 K  | train
1  | conv2     | Conv2d             | 73.9 K | train
2  | pool1     | MaxPool2d          | 0      | train
3  | conv3     | Conv2d             | 147 K  | train
4  | pool2     | MaxPool2d          | 0      | train
5  | conv4     | Conv2d             | 295 K  | train
6  | fc1       | Linear             | 524 K  | train
7  | fc2       | Linear             | 16.5 K | train
8  | fc3       | Linear             | 1.3 K  | train
9  | dropout   | Dropout            | 0      | train
10 | train_acc | MulticlassAccuracy | 0      | train
11 | val_acc   | MulticlassAccuracy | 0      | train
12 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.1 M    

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./lastt/82emtu1t/checkpoints/epoch=4-step=3520.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./lastt/82emtu1t/checkpoints/epoch=4-step=3520.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='61.252 MB of 61.252 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▃▃▃▃▃▃▃▃▃▃▃▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆████████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▅▆▇█
train_acc_step,▁▂▄▄▃▄▃▃▅▅▆▆▇▅▅▇▆▆▅▆▇▇▇▅▅▅▆▅▇▆▇█▆▆▅▆█▆█▇
train_loss_epoch,█▅▃▂▁
train_loss_step,███▆▆▆▆▅▅▅▆▄▅▅▃▅▃▄▄▄▃▄▂▃▃▃▃▃▂▂▃▄▄▂▅▂▃▂▁▂
trainer/global_step,▁▁▁▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,▁▅▇▇█

0,1
epoch,5.0
global_step,3520.0
test_acc,0.3937
test_loss,1.62908
train_acc_epoch,0.36867
train_acc_step,0.35938
train_loss_epoch,1.69901
train_loss_step,1.66064
trainer/global_step,3520.0
val_acc,0.3822


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qji8hj0c with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_filter_1: 32
[34m[1mwandb[0m: 	conv_filter_2: 64
[34m[1mwandb[0m: 	conv_filter_3: 128
[34m[1mwandb[0m: 	conv_filter_4: 512
[34m[1mwandb[0m: 	dropout_rate: 0.25
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons_FC1: 512
[34m[1mwandb[0m: 	neurons_FC2: 128
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	scheduler: CosineAnnealingLR
[34m[1mwandb[0m: 	weight_decay: 0


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Output feature shape: torch.Size([1, 256, 4, 4])
Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 896    | train
1  | conv2     | Conv2d             | 18.5 K | train
2  | pool1     | MaxPool2d          | 0      | train
3  | conv3     | Conv2d             | 73.9 K | train
4  | pool2     | MaxPool2d          | 0      | train
5  | conv4     | Conv2d             | 295 K  | train
6  | fc1       | Linear             | 2.1 M  | train
7  | fc2       | Linear             | 65.7 K | train
8  | fc3       | Linear             | 1.3 K  | train
9  | dropout   | Dropout            | 0      | train
10 | train_acc | MulticlassAccuracy | 0      | train
11 | val_acc   | MulticlassAccuracy | 0      | train
12 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
2.6 M    

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./lastt/qji8hj0c/checkpoints/epoch=3-step=2816.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./lastt/qji8hj0c/checkpoints/epoch=3-step=2816.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='78.357 MB of 78.357 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆█
global_step,▁▃▅▆█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▃▅█
train_acc_step,▇▆▅▅▃▃▃▃▃▃▃▃▆▇▄▇▁▆▃▇▃█▄▅▃▆▃▃▄▂▄█▃▄▄▅▃▅▆▃
train_loss_epoch,█▅▃▁
train_loss_step,▄▁▄▇▃▆▇▆▇▆▅▄▇█▂▇▇▅▆▇▂▄▆▇▆▄▃▅▅█▆▃▅▅▆▆▆▆▅▃
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇███
val_acc,▁▁▁▁

0,1
epoch,4.0
global_step,2816.0
test_acc,0.1
test_loss,2.30282
train_acc_epoch,0.10118
train_acc_step,0.0625
train_loss_epoch,2.30304
train_loss_step,2.31173
trainer/global_step,2816.0
val_acc,0.1036


[34m[1mwandb[0m: Agent Starting Run: q0l5agjy with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	conv_filter_1: 128
[34m[1mwandb[0m: 	conv_filter_2: 128
[34m[1mwandb[0m: 	conv_filter_3: 128
[34m[1mwandb[0m: 	conv_filter_4: 256
[34m[1mwandb[0m: 	dropout_rate: 0.25
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	neurons_FC1: 128
[34m[1mwandb[0m: 	neurons_FC2: 128
[34m[1mwandb[0m: 	optimizer_type: adamw
[34m[1mwandb[0m: 	scheduler: StepLR
[34m[1mwandb[0m: 	weight_decay: 1e-05


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Output feature shape: torch.Size([1, 256, 4, 4])
Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 3.6 K  | train
1  | conv2     | Conv2d             | 147 K  | train
2  | pool1     | MaxPool2d          | 0      | train
3  | conv3     | Conv2d             | 147 K  | train
4  | pool2     | MaxPool2d          | 0      | train
5  | conv4     | Conv2d             | 295 K  | train
6  | fc1       | Linear             | 524 K  | train
7  | fc2       | Linear             | 16.5 K | train
8  | fc3       | Linear             | 1.3 K  | train
9  | dropout   | Dropout            | 0      | train
10 | train_acc | MulticlassAccuracy | 0      | train
11 | val_acc   | MulticlassAccuracy | 0      | train
12 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
1.1 M    

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./lastt/q0l5agjy/checkpoints/epoch=4-step=1760.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./lastt/q0l5agjy/checkpoints/epoch=4-step=1760.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='65.573 MB of 65.573 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆████████
global_step,▁▂▄▅▇█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▅▆▇█
train_acc_step,▁▃▂▄▃▄▅▄▄▅▅▅▄▅▆▅▆▅▅▆▇▆▆▇▅▇█▆▅▆▅██▆▇
train_loss_epoch,█▄▃▂▁
train_loss_step,█▇▇▆▆▅▄▅▄▅▄▄▄▄▃▃▃▃▃▃▃▂▃▂▄▂▁▂▃▂▃▁▁▂▃
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█████
val_acc,▁▄▆▇█

0,1
epoch,5.0
global_step,1760.0
test_acc,0.5564
test_loss,1.22927
train_acc_epoch,0.5346
train_acc_step,0.53906
train_loss_epoch,1.28955
train_loss_step,1.42178
trainer/global_step,1760.0
val_acc,0.5452


[34m[1mwandb[0m: Agent Starting Run: 6ayti1zg with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_filter_1: 64
[34m[1mwandb[0m: 	conv_filter_2: 256
[34m[1mwandb[0m: 	conv_filter_3: 256
[34m[1mwandb[0m: 	conv_filter_4: 512
[34m[1mwandb[0m: 	dropout_rate: 0.2
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	neurons_FC1: 512
[34m[1mwandb[0m: 	neurons_FC2: 128
[34m[1mwandb[0m: 	optimizer_type: sgd
[34m[1mwandb[0m: 	scheduler: StepLR
[34m[1mwandb[0m: 	weight_decay: 0


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Output feature shape: torch.Size([1, 256, 4, 4])
Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
   | Name      | Type               | Params | Mode 
----------------------------------------------------------
0  | conv1     | Conv2d             | 1.8 K  | train
1  | conv2     | Conv2d             | 147 K  | train
2  | pool1     | MaxPool2d          | 0      | train
3  | conv3     | Conv2d             | 295 K  | train
4  | pool2     | MaxPool2d          | 0      | train
5  | conv4     | Conv2d             | 295 K  | train
6  | fc1       | Linear             | 2.1 M  | train
7  | fc2       | Linear             | 65.7 K | train
8  | fc3       | Linear             | 1.3 K  | train
9  | dropout   | Dropout            | 0      | train
10 | train_acc | MulticlassAccuracy | 0      | train
11 | val_acc   | MulticlassAccuracy | 0      | train
12 | test_acc  | MulticlassAccuracy | 0      | train
----------------------------------------------------------
2.9 M    

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Files already downloaded and verified
Files already downloaded and verified


INFO:pytorch_lightning.utilities.rank_zero:Restoring states from the checkpoint path at ./lastt/6ayti1zg/checkpoints/epoch=3-step=2816.ckpt
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:Loaded model weights from the checkpoint at ./lastt/6ayti1zg/checkpoints/epoch=3-step=2816.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

VBox(children=(Label(value='89.078 MB of 89.078 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆█
global_step,▁▃▅▆█
test_acc,▁
test_loss,▁
train_acc_epoch,▁▅▃█
train_acc_step,▇▄▆▅█▅▆▇▄▅▄▄▅▄▂▆▃▇▇▅▅▄▃▃▂▅▅▅▅▅▆▄▅▅▆▃▃▂▅▁
train_loss_epoch,█▄▂▁
train_loss_step,▄▇▄▄▂▂▄▃▃▁▄▂▄▇█▆▄▄▆▅▄▁▅▃▇▅▅▄▂▄▃▄▆▄▃▅▆▅▄▄
trainer/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇██
val_acc,█▇▇▁

0,1
epoch,4.0
global_step,2816.0
test_acc,0.1104
test_loss,2.3037
train_acc_epoch,0.10144
train_acc_step,0.01562
train_loss_epoch,2.30375
train_loss_step,2.3232
trainer/global_step,2816.0
val_acc,0.1044


In [None]:
### TASK OF THE STUDENT

# extend WandB.ai integration in the code with sweeps
# (e.g. add variables like learning rate, optimizer, neurons_FC1, neurons_FC2)
# help: https://docs.wandb.ai/guides/sweeps and
#       https://github.com/wandb/wandb/issues/5003
# store the hyperparameters and val_acc to wandb
# run at least 10 trainings
# in wandb.ai, export the result of the runs as a .csv file,
# in wandb.ai, create a report from the sweep results and share it by submitting
# the link in Moodle.

In [None]:
## Parameters used in configuration dictionary

# 1. learning_rate
# 2. Number of neurons in Fully connected layer 1
# 3. Number of neurons in Fully connected layer 2
# 4. optimiser
# 5. convolution filters for layers 1 to 4
# 6. weight_decay