<a href="https://colab.research.google.com/github/Tejaswini170104/DA6401-A2/blob/main/PartA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Question 01

In [5]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
import os
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

# Extract dataset
zip_path = "/content/drive/MyDrive/nature_12K.zip"
data_dir = "/content/inaturalist_12K"
if not os.path.exists(data_dir):
    !cp "{zip_path}" .
    !unzip -q nature_12K.zip
    !rm nature_12K.zip

# Function to compute dataset mean & std
def compute_mean_std(data_dir, input_size=224):
    transform = transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor()
    ])
    dataset = datasets.ImageFolder(root=data_dir, transform=transform)
    loader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=0)  # Colab: num_workers=0

    sum_rgb = torch.zeros(3)
    sum_sq_rgb = torch.zeros(3)
    total_pixels = 0

    for images, _ in loader:
        batch_samples = images.size(0)
        pixels_per_image = images.size(2) * images.size(3)
        total_pixels += batch_samples * pixels_per_image

        sum_rgb += images.sum(dim=[0, 2, 3])
        sum_sq_rgb += (images ** 2).sum(dim=[0, 2, 3])

    mean = sum_rgb / total_pixels
    std = torch.sqrt((sum_sq_rgb / total_pixels) - (mean ** 2))

    return mean.tolist(), std.tolist()

# Compute and print
data_dir_train = os.path.join(data_dir, "train")
inat_mean, inat_std = compute_mean_std(data_dir_train)
print(f"Mean: {inat_mean}")
print(f"Std: {inat_std}")

Mounted at /content/drive
Mean: [0.47122955322265625, 0.46000856161117554, 0.3896463215351105]
Std: [0.24068380892276764, 0.23018933832645416, 0.2406243085861206]


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
import torchvision.transforms as transforms

class FlexibleCNN(pl.LightningModule):
    def __init__(self, input_channels=3, num_classes=10, conv_filters=[32, 64, 128, 256, 512],
                 kernel_size=3, activation=F.relu, dense_neurons=256, input_size=224):
        super(FlexibleCNN, self).__init__()

        self.activation = activation
        layers = []
        in_channels = input_channels

        # Adding 5 convolutional blocks
        for out_channels in conv_filters:
            layers.append(nn.Conv2d(in_channels, out_channels, kernel_size, padding=1))
            layers.append(nn.BatchNorm2d(out_channels))
            layers.append(nn.ReLU())
            layers.append(nn.MaxPool2d(2, 2))
            in_channels = out_channels

        self.conv_layers = nn.Sequential(*layers)

        # Compute final feature map size after 5 max-pool layers
        final_size = input_size // (2 ** len(conv_filters))

        # Fully connected layers
        self.fc1 = nn.Linear(conv_filters[-1] * final_size * final_size, dense_neurons)
        self.fc2 = nn.Linear(dense_neurons, num_classes)

        # Compute parameters and computations
        self.total_parameters = self.compute_parameters(conv_filters, kernel_size, dense_neurons, num_classes)
        self.total_computations = self.compute_computations(conv_filters, kernel_size, input_size)

        # Define preprocessing transform for iNaturalist dataset
        self.transform = transforms.Compose([
            transforms.Resize((input_size, input_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.47122955322265625, 0.46000856161117554, 0.3896463215351105],
                                 std=[0.24068380892276764, 0.23018933832645416, 0.2406243085861206])
        ])

    def forward(self, x):
        x = self.conv_layers(x)
        x = torch.flatten(x, start_dim=1)
        x = self.activation(self.fc1(x))
        x = self.fc2(x)
        return x

    def compute_parameters(self, conv_filters, kernel_size, dense_neurons, num_classes):
        total_params = 0
        in_channels = 3  # Input image channels

        # Compute parameters for convolution layers
        for out_channels in conv_filters:
            total_params += (in_channels * kernel_size * kernel_size + 1) * out_channels
            in_channels = out_channels

        # Compute parameters for fully connected layers
        total_params += (conv_filters[-1] * (224 // (2**5)) * (224 // (2**5))) * dense_neurons + dense_neurons
        total_params += dense_neurons * num_classes + num_classes

        return total_params

    def compute_computations(self, conv_filters, kernel_size, input_size):
        total_computations = 0
        in_channels = 3  # Input image channels
        feature_map_size = input_size

        # Compute computations for convolution layers
        for out_channels in conv_filters:
            feature_map_size //= 2  # Max-pooling reduces size by half
            total_computations += out_channels * feature_map_size * feature_map_size * (in_channels * kernel_size * kernel_size)
            in_channels = out_channels

        return total_computations

# Example usage:
model = FlexibleCNN(input_channels=3, num_classes=10, conv_filters=[32, 64, 128, 256, 512],
                    kernel_size=3, activation=F.relu, dense_neurons=256, input_size=224)
print(model)
print(f"Total Parameters: {model.total_parameters}")
print(f"Total Computations: {model.total_computations}")

FlexibleCNN(
  (conv_layers): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=

Question 02

In [None]:
import os
import torch
import random
import wandb
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
import torchvision.transforms as transforms
from torch.utils.data import Subset, DataLoader
from torchvision.datasets import ImageFolder
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint

# Set seed for reproducibility
pl.seed_everything(42)

# Dataset path (update if needed)
DATA_DIR = "/content/inaturalist_12K/train"

# Sample 400 images per class
def get_balanced_subset(dataset, samples_per_class=400):
    class_indices = {cls: [] for cls in range(len(dataset.classes))}
    for idx, (_, label) in enumerate(dataset.samples):
        if len(class_indices[label]) < samples_per_class:
            class_indices[label].append(idx)
    indices = sum(class_indices.values(), [])
    random.shuffle(indices)
    return Subset(dataset, indices)

# Activation mapping
ACTIVATION_MAP = {
    "ReLU": nn.ReLU(),
    "GELU": nn.GELU(),
    "SiLU": nn.SiLU(),
    "Mish": nn.Mish()
}

# Flexible CNN Model
class FlexibleCNN(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.save_hyperparameters()

        input_channels = 3
        num_classes = 10
        kernel_size = config.get("kernel_size")
        dense_neurons = config.get("dense_neurons")
        input_size = 224
        filter_type = config.get("filter_type")
        base_filter = config.get("base_filter")

        # Set convolutional filters
        if filter_type == "same":
            conv_filters = [base_filter] * 5
        elif filter_type == "double":
            conv_filters = [base_filter * (2 ** i) for i in range(5)]
        elif filter_type == "half":
            conv_filters = [base_filter // (2 ** i) for i in range(5)]
        conv_filters = [max(4, f) for f in conv_filters]  # avoid 0 filters

        layers = []
        in_channels = input_channels

        for out_channels in conv_filters:
            layers.append(nn.Conv2d(in_channels, out_channels, kernel_size, padding=1))
            if config.get("batch_norm"):
                layers.append(nn.BatchNorm2d(out_channels))
            layers.append(ACTIVATION_MAP[config.get("activation")])
            layers.append(nn.MaxPool2d(2, 2))
            in_channels = out_channels

        self.conv = nn.Sequential(*layers)

        final_size = input_size // (2 ** len(conv_filters))
        fc_in = conv_filters[-1] * final_size * final_size
        self.fc1 = nn.Linear(fc_in, dense_neurons)
        self.dropout = nn.Dropout(config.get("dropout"))
        self.fc2 = nn.Linear(dense_neurons, num_classes)

        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss_fn(logits, y)
        acc = (logits.argmax(1) == y).float().mean()
        self.log("train_loss", loss, prog_bar=True)
        self.log("train_acc", acc, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss_fn(logits, y)
        acc = (logits.argmax(1) == y).float().mean()
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)

# DataModule
class INatDataModule(pl.LightningDataModule):
    def __init__(self, config):
        super().__init__()
        self.batch_size = 64
        self.aug = config.get("augmentation")

    def setup(self, stage=None):
        mean = [0.4712, 0.4600, 0.3896]
        std = [0.2406, 0.2301, 0.2406]
        base_transforms = [transforms.Resize((224, 224))]

        if self.aug:
            base_transforms += [
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(15)
            ]

        base_transforms += [transforms.ToTensor(), transforms.Normalize(mean, std)]

        transform = transforms.Compose(base_transforms)
        dataset = ImageFolder(DATA_DIR, transform=transform)
        subset = get_balanced_subset(dataset)
        val_size = int(0.2 * len(subset))
        train_size = len(subset) - val_size
        self.train_set, self.val_set = torch.utils.data.random_split(subset, [train_size, val_size])

    def train_dataloader(self):
        return DataLoader(self.train_set, batch_size=self.batch_size, shuffle=True, num_workers=2)

    def val_dataloader(self):
        return DataLoader(self.val_set, batch_size=self.batch_size, shuffle=False, num_workers=2)

# Training function
def train_wandb(config=None):
    with wandb.init(config=config):
        config = dict(wandb.config)  # Convert to dict to use .get()
        run_name = "_".join([f"{k}-{v}" for k, v in config.items()])
        wandb.run.name = run_name

        model = FlexibleCNN(config)
        datamodule = INatDataModule(config)
        wandb_logger = WandbLogger(project="inat-sweep", log_model=False)
        trainer = pl.Trainer(max_epochs=10, logger=wandb_logger, accelerator="auto")
        trainer.fit(model, datamodule=datamodule)

# Sweep config with dense_neurons added
sweep_config = {
    "method": "bayes",
    "metric": {"goal": "maximize", "name": "val_acc"},
    "parameters": {
        "base_filter": {"values": [32, 64, 128]},
        "kernel_size": {"values": [3, 5]},
        "activation": {"values": ["ReLU", "GELU", "SiLU", "Mish"]},
        "filter_type": {"values": ["same", "double", "half"]},
        "batch_norm": {"values": [True, False]},
        "augmentation": {"values": [True, False]},
        "dropout": {"values": [0.2, 0.3]},
        "dense_neurons": {"values": [128, 256, 512]}  # ✅ added this
    }
}

# Start sweep
sweep_id = wandb.sweep(sweep_config, project="inat-sweep")
wandb.agent(sweep_id, function=train_wandb, count=20)


INFO:lightning_fabric.utilities.seed:Seed set to 42


Create sweep with ID: ealwwqfg
Sweep URL: https://wandb.ai/tejaswiniksssn-indian-institute-of-technology-madras/inat-sweep/sweeps/ealwwqfg


[34m[1mwandb[0m: Agent Starting Run: ywc4216d with config:
[34m[1mwandb[0m: 	activation: SiLU
[34m[1mwandb[0m: 	augmentation: False
[34m[1mwandb[0m: 	base_filter: 128
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	filter_type: half
[34m[1mwandb[0m: 	kernel_size: 5


INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.11/dist-packages/pytorch_lightning/loggers/wandb.py:397: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | conv    | Sequential       | 281 K  | train
1 | fc1     | Linear           | 50.3 K | train
2 | dropout | Dropout          | 0      | train
3 | fc2     | Linear 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Traceback (most recent call last):
  File "<ipython-input-10-9433b7f550bf>", line 153, in train_wandb
    trainer.fit(model, datamodule=datamodule)
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
    call._call_and_handle_interrupt(
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 599, in _fit_impl
    self._run(model, ckpt_path=ckpt_path)
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 1012, in _run
    results = self._run_stage()
              ^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 1054, in _run_stage
    self._run_sanity_check()
  File "/usr/local/lib/python3.11/dist-packages

[34m[1mwandb[0m: [32m[41mERROR[0m Run ywc4216d errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-10-9433b7f550bf>", line 153, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, datamodule=datamodule)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1mw

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | conv    | Sequential       | 69.6 M | train
1 | fc1     | Linear           | 25.7 M | train
2 | dropout | Dropout          | 0      | train
3 | fc2     | Linear           | 2.6 K  | train
4 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
95.3 M    Trainable params
0         Non-trainable params
95.3 M    Total params
381.354   Total estimated model params size (MB)
16        

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Traceback (most recent call last):
  File "<ipython-input-10-9433b7f550bf>", line 153, in train_wandb
    trainer.fit(model, datamodule=datamodule)
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
    call._call_and_handle_interrupt(
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 599, in _fit_impl
    self._run(model, ckpt_path=ckpt_path)
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 1012, in _run
    results = self._run_stage()
              ^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 1054, in _run_stage
    self._run_sanity_check()
  File "/usr/local/lib/python3.11/dist-packages

[34m[1mwandb[0m: [32m[41mERROR[0m Run my3w0fky errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-10-9433b7f550bf>", line 153, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, datamodule=datamodule)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1mw

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | conv    | Sequential       | 19.7 K | train
1 | fc1     | Linear           | 50.4 K | train
2 | dropout | Dropout          | 0      | train
3 | fc2     | Linear           | 2.6 K  | train
4 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
72.7 K    Trainable params
0         Non-trainable params
72.7 K    Total params
0.291     Total estimated model params size (MB)
15        

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Traceback (most recent call last):
  File "<ipython-input-10-9433b7f550bf>", line 153, in train_wandb
    trainer.fit(model, datamodule=datamodule)
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
    call._call_and_handle_interrupt(
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 599, in _fit_impl
    self._run(model, ckpt_path=ckpt_path)
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 1012, in _run
    results = self._run_stage()
              ^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 1054, in _run_stage
    self._run_sanity_check()
  File "/usr/local/lib/python3.11/dist-packages

[34m[1mwandb[0m: [32m[41mERROR[0m Run 8qd9j75l errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-10-9433b7f550bf>", line 153, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, datamodule=datamodule)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1mw

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | conv    | Sequential       | 73.2 K | train
1 | fc1     | Linear           | 50.4 K | train
2 | dropout | Dropout          | 0      | train
3 | fc2     | Linear           | 2.6 K  | train
4 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
126 K     Trainable params
0         Non-trainable params
126 K     Total params
0.505     Total estimated model params size (MB)
21        

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Traceback (most recent call last):
  File "<ipython-input-10-9433b7f550bf>", line 153, in train_wandb
    trainer.fit(model, datamodule=datamodule)
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
    call._call_and_handle_interrupt(
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 599, in _fit_impl
    self._run(model, ckpt_path=ckpt_path)
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 1012, in _run
    results = self._run_stage()
              ^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 1054, in _run_stage
    self._run_sanity_check()
  File "/usr/local/lib/python3.11/dist-packages

[34m[1mwandb[0m: [32m[41mERROR[0m Run 83l8ng2e errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 306, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "<ipython-input-10-9433b7f550bf>", line 153, in train_wandb
[34m[1mwandb[0m: [32m[41mERROR[0m     trainer.fit(model, datamodule=datamodule)
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
[34m[1mwandb[0m: [32m[41mERROR[0m     call._call_and_handle_interrupt(
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
[34m[1mwandb[0m: [32m[41mERROR[0m     return trainer_fn(*args, **kwargs)
[34m[1mw

INFO:pytorch_lightning.utilities.rank_zero:You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | conv    | Sequential       | 7.1 K  | train
1 | fc1     | Linear           | 50.4 K | train
2 | dropout | Dropout          | 0      | train
3 | fc2     | Linear           | 2.6 K  | train
4 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
60.1 K    Trainable params
0         Non-trainable params
60.1 K    Total params
0.240     Total estimated model params size (MB)
15        

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]