## Importing Modules

필요한 모듈을 Import 합니다.


In [1]:
# Modules About Hydra
from hydra import initialize, initialize_config_module, initialize_config_dir, compose
from hydra.utils import instantiate
from omegaconf import DictConfig, OmegaConf

# Modules About Torch, Numpy
import numpy as np
import torch
import torch.nn.functional as F
import torchmetrics
import torchvision
from torch import nn
from torch.utils.data import TensorDataset, DataLoader, random_split
from torchvision import datasets, transforms

# Modules About Pytorch Lightning
import pytorch_lightning as pl
from pytorch_lightning import LightningModule, LightningDataModule
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, ProgressBar

# Modules About Pandas, Matplotlib, Numpy
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Others
from PIL import Image
from typing import List, Any
import sys
import traceback
import yaml
import wandb
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

  from .autonotebook import tqdm as notebook_tqdm


## Configure Dataset

Custom Dataset을 구성합니다.


In [2]:

class MNISTDataModule(pl.LightningDataModule):
    def __init__(self, data_dir, batch_size, pred_batch_size, train_ratio, pred_dataset=None):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.pred_batch_size = pred_batch_size
        self.train_ratio = train_ratio
        self.pred_dataset = pred_dataset

        # Define Transforms
        def repeat_channels(x):
            return x.repeat(3, 1, 1)

        self.transform = transforms.Compose([
            transforms.ToTensor()
            # ViT expects 224x224 images
            # transforms.Resize((224, 224), antialias=True),
            # transforms.Lambda(repeat_channels)  # ViT expects 3 channels
        ])

    def prepare_data(self):
        # Download MNIST Data
        datasets.MNIST(
            self.data_dir, train=True, download=True)
        datasets.MNIST(
            self.data_dir, train=False, download=True)

    def setup(self, stage=None):
        mnist_train = datasets.MNIST(
            self.data_dir, train=True, transform=self.transform)
        mnist_test = datasets.MNIST(
            self.data_dir, train=False, transform=self.transform)

        # Split Dataset

        self.train_dataset, self.val_dataset = random_split(
            mnist_train, list(map(lambda x: int(x * len(mnist_train)), [self.train_ratio, 1-self.train_ratio])))
        self.test_dataset = mnist_test

    # def _collate_fn(self, samples):
    #     이 함수를 사용할 경우
    #     DataLoader에 인자로 collate_fn=_collate_fn 를 추가해야합니다.
    #     pass

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size)

    def predict_dataloader(self):
        return DataLoader(self.pred_dataset, batch_size=self.pred_batch_size)

    def predict_instantly(self, x: List[Any], y: List[int]):
        to_tensor = torchvision.transforms.ToTensor()
        tensor_x = torch.stack([to_tensor(item) for item in x])
        tensor_y = torch.tensor(y)

        return tensor_x, tensor_y

## Design Model

Model 구조를 정의합니다.


In [3]:
class CNNModel(pl.LightningModule):
    def __init__(self, type):
        super().__init__()
        self.type = type
        self.save_hyperparameters("type")

        self.model_list = {"small": (32, 64), "large": (64, 128)}
        self.id2label = {i: i for i in range(10)}
        self.label2id = {i: i for i in range(10)}
        self.loss_func = nn.CrossEntropyLoss()
        self.model = nn.Sequential(
            # Convolutional layer 1
            nn.Conv2d(1, self.model_list[self.type][0],
                      kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Convolutional layer 2
            nn.Conv2d(self.model_list[self.type][0], self.model_list[self.type]
                      [1], kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Fully connected layers
            nn.Flatten(),

            nn.Linear(self.model_list[self.type][1] * 7 * 7, 128),
            nn.ReLU(),

            nn.Linear(128, 64),
            nn.ReLU(),

            nn.Linear(64, 10),  # assuming output has 10 classes
        )

    def forward(self, x, y):
        logits = self.model(x)
        loss = self.loss_func(logits, y)
        return loss, logits

## Task Model

Task 구조를 정의합니다.


In [4]:
class ClassificationTask(pl.LightningModule):
    def __init__(self, model, num_classes, optimizer, lr_scheduler=None):
        super().__init__()
        self.model = model
        self.num_classes = num_classes
        self.optimizer = optimizer
        self.lr_scheduler = lr_scheduler
        self.training_step_outputs = []
        self.validation_step_outputs = []
        self.save_hyperparameters("num_classes", "optimizer", "lr_scheduler")

    def forward(self, x, y):
        x, y = x.to(self.device), y.to(self.device)
        loss, logits = self.model(x, y)
        return loss, logits

    def training_step(self, batch, batch_idx):
        loss, acc = self._shared_step(batch)
        metrics = {"train_acc": acc, "train_loss": loss}
        self.training_step_outputs.append(metrics)
        self.log_dict(metrics, prog_bar=True)
        return loss

    # def on_train_epoch_end(self):
    #     pass

    def validation_step(self, batch, batch_idx):
        loss, acc = self._shared_step(batch)
        metrics = {"val_acc": acc, "val_loss": loss}
        self.validation_step_outputs.append(metrics)
        self.log_dict(metrics)

    def on_validation_epoch_end(self):
        if self.training_step_outputs:
            train_avg_loss = torch.stack([x["train_loss"]
                                          for x in self.training_step_outputs]).mean()
            train_avg_acc = torch.stack([x["train_acc"]
                                        for x in self.training_step_outputs]).mean()
            metrics = {"train_avg_acc": train_avg_acc,
                       "train_avg_loss": train_avg_loss}
            self.log_dict(metrics)
        else:
            return
        if self.validation_step_outputs:
            val_avg_loss = torch.stack([x["val_loss"]
                                        for x in self.validation_step_outputs]).mean()
            val_avg_acc = torch.stack([x["val_acc"]
                                       for x in self.validation_step_outputs]).mean()
            metrics = {"val_avg_acc": val_avg_acc,
                       "val_avg_loss": val_avg_loss}
            self.log_dict(metrics)
        else:
            return
        print("\n" +
              (f'Epoch {self.current_epoch}, Avg. Training Loss: {train_avg_loss:.3f}, Avg. Training Accuracy: {train_avg_acc:.3f} ' +
               f'Avg. Validation Loss: {val_avg_loss:.3f}, Avg. Validation Accuracy: {val_avg_acc:.3f}'), flush=True)
        self.training_step_outputs.clear()
        self.validation_step_outputs.clear()

    def test_step(self, batch, batch_idx):
        loss, acc = self._shared_step(batch)
        metrics = {"test_acc": acc, "test_loss": loss}
        self.log_dict(metrics, prog_bar=True)

    def _shared_step(self, batch):
        x, y = batch
        loss, logits = self.model(x, y)
        acc_fn = torchmetrics.classification.MulticlassAccuracy(
            num_classes=self.num_classes).to(self.device)
        acc = acc_fn(logits, y)
        return loss, acc

    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        x, y = batch
        loss, logits = self.model(x, y)
        return loss, logits

    def configure_optimizers(self):
        optimizer = self.optimizer
        if self.lr_scheduler is not None:
            return [optimizer], [self.lr_scheduler]
        else:
            return optimizer

        # return torch.optim.AdamW(self.model.parameters(), lr=2e-5)


In [5]:
with initialize(version_base=None, config_path="./"):
    cfg = compose(config_name="config.yaml")
if "batch_size" in cfg.data:
    print("⭐️")
else:
    print("🔥")


⭐️


## Model Training

Model Training을 수행합니다.


In [6]:
def generate_train_func(cfg):
    def find_key(cfg, query, new_value):
        for key, value in cfg.items():
            if key == query:
                cfg[key] = new_value
                return True
            elif isinstance(value, DictConfig):
                if find_key(value, query, new_value):
                    return True
        return False

    def train():
        try:
            # Set Constant
            CHECKPOINT_PATH = "./checkpoints"
            CONFIGS_PATH = "./configs"

            # Get Global Version Info
            with open("global.yaml", "r") as f:
                global_data = yaml.safe_load(f)
            version_count = global_data["next_version_count"]
            sweep_count = global_data["next_sweep_count"]

            # Initalize Wandb
            if "name" in cfg.train:
                name = cfg.train.name + f"_s{sweep_count}"
            else:
                name = f"v{version_count}_s{sweep_count}"
            wandb.init(name=name)

            # Get config.yaml file
            with open("config.yaml", "r") as f:
                cfg_data = yaml.safe_load(f)

            # Save Version Config Info On Configs Folder
            if sweep_count == 0:
                with open(f"{CONFIGS_PATH}/version_{version_count}_config.yaml",
                          "w") as f:
                    yaml.dump(cfg_data, f)

            # Set Sweeping Setting
            for key, item in wandb.config.items():
                if not find_key(cfg, key, item):
                    print(
                        f"key: {key} in your sweeping configuration was not found in your configuration")

            # Load Data Module
            data_module = MNISTDataModule(
                **cfg.data)

            # Load Training Configuration
            models = [instantiate(cfg.models[model])
                      for model in dir(cfg.models)]

            # Add Callbacks
            cfg_callbacks = cfg.train.callbacks
            callbacks = []
            checkpoint_callback = ModelCheckpoint(**cfg_callbacks.checkpoint_callback,
                                                  dirpath=f"{CHECKPOINT_PATH}/v{version_count}_s{sweep_count}/"
                                                  )
            callbacks.append(checkpoint_callback)

            early_stop_callback = EarlyStopping(
                **cfg_callbacks.early_stop_callback)
            callbacks.append(early_stop_callback)

            # Set Logger
            logger = instantiate(
                cfg.train.logger, name=f"version_{version_count}")

            # Train
            for model in models:
                # Set Optimizer
                optimizer = instantiate(
                    cfg.task.optimizer, params=model.parameters())

                # Set Lr Scheduler If exists
                if cfg.task.lr_scheduler.scheduler._target_ is not None:
                    lr_scheduler = {}
                    lr_scheduler["scheduler"] = instantiate(
                        cfg.task.lr_scheduler.scheduler, optimizer=optimizer)
                    lr_scheduler["interval"] = cfg.task.lr_scheduler.interval
                else:
                    lr_scheduler = None

                # Define Task
                cfg_task = OmegaConf.to_container(cfg.task)
                cfg_task.pop("optimizer")
                if cfg_task["lr_scheduler"]["scheduler"]["_target_"] is not None:
                    cfg_task.pop("lr_scheduler")
                task = ClassificationTask(**cfg_task,
                                          model=model, optimizer=optimizer, lr_scheduler=lr_scheduler)

                # Train and Test
                trainer = pl.Trainer(**cfg.train.trainer,
                                     callbacks=callbacks, logger=logger)
                trainer.fit(task, data_module)
                trainer.test(task, datamodule=data_module)
                trainer.save_checkpoint(f"{CHECKPOINT_PATH}/best_model.ckpt")

            # Save Version Config Info On Checkpoints Folder
            with open(f"{CHECKPOINT_PATH}/v{version_count}_s{sweep_count}/version_config.yaml",
                      "w") as f:
                yaml.dump(cfg_data, f)

            # Set Sweep Info
            global_data["next_sweep_count"] += 1
            with open("global.yaml", "w") as f:
                yaml.dump(global_data, f)

            # Finish wandb
            if cfg.train.logger._target_ == "pytorch_lightning.loggers.WandbLogger":
                wandb.finish()
        except Exception:
            # Finish wandb
            if cfg.train.logger._target_ == "pytorch_lightning.loggers.WandbLogger":
                wandb.finish()

            print("An error occurred:")
            print(traceback.format_exc())
            return
    return train


# Load Configuration Object
with initialize(version_base=None, config_path="./"):
    cfg = compose(config_name="config.yaml")

# Get Sweep ID
sweep_id = wandb.sweep(OmegaConf.to_container(
    cfg.sweep), project="sweep-test")

# Apply Sweeping
wandb.agent(sweep_id, function=generate_train_func(cfg))

# Update Version / Sweep Info
with open("global.yaml", "r") as f:
    global_data = yaml.safe_load(f)
global_data["next_version_count"] += 1
global_data["next_sweep_count"] = 0
with open("global.yaml", "w") as f:
    yaml.dump(global_data, f)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 0nf73hon
Sweep URL: https://wandb.ai/suwon-pabby/sweep-test/sweeps/0nf73hon


[34m[1mwandb[0m: Agent Starting Run: hecpj0v8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	type: small
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msuwon-pabby[0m. Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 429 K 
-----------------------------------
429 K     Trainable params
0         Non-trainable params
429 K     Total params
1.717     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 563/563 [00:21<00:00, 26.41it/s, v_num=j0v8, train_acc=0.780, train_loss=1.110]
Epoch 0, Avg. Training Loss: 1.870, Avg. Training Accuracy: 0.474 Avg. Validation Loss: 1.126, Avg. Validation Accuracy: 0.733
Epoch 1: 100%|██████████| 563/563 [00:21<00:00, 25.76it/s, v_num=j0v8, train_acc=0.897, train_loss=0.593]
Epoch 1, Avg. Training Loss: 0.743, Avg. Training Accuracy: 0.806 Avg. Validation Loss: 0.544, Avg. Validation Accuracy: 0.844
Epoch 1: 100%|██████████| 563/563 [00:33<00:00, 16.68it/s, v_num=j0v8, train_acc=0.897, train_loss=0.593]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 563/563 [00:33<00:00, 16.66it/s, v_num=j0v8, train_acc=0.897, train_loss=0.593]
Testing DataLoader 0: 100%|██████████| 157/157 [00:04<00:00, 35.46it/s]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▁▂▄▅▅▅▆▆▆▇▇▅▇▇▆▇▆▆▇▆█
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,████▇▇▇▅▅▅▃▃▄▃▂▂▂▂▃▂▂▁
trainer/global_step,▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇███
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.85229
test_loss,0.53164
train_acc,0.98333
train_avg_acc,0.8063
train_avg_loss,0.74269
train_loss,0.30483
trainer/global_step,1126.0
val_acc,0.84357
val_avg_acc,0.84357


[34m[1mwandb[0m: Agent Starting Run: q1a6q37e with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	type: small
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 429 K 
-----------------------------------
429 K     Trainable params
0         Non-trainable params
429 K     Total params
1.717     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 282/282 [00:11<00:00, 24.53it/s, v_num=q37e, train_acc=0.800, train_loss=1.770]
Epoch 0, Avg. Training Loss: 2.151, Avg. Training Accuracy: 0.411 Avg. Validation Loss: 1.829, Avg. Validation Accuracy: 0.684
Epoch 1: 100%|██████████| 282/282 [00:11<00:00, 24.98it/s, v_num=q37e, train_acc=0.883, train_loss=0.759]
Epoch 1, Avg. Training Loss: 1.318, Avg. Training Accuracy: 0.737 Avg. Validation Loss: 0.925, Avg. Validation Accuracy: 0.779
Epoch 1: 100%|██████████| 282/282 [00:17<00:00, 16.43it/s, v_num=q37e, train_acc=0.883, train_loss=0.759]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 282/282 [00:17<00:00, 16.35it/s, v_num=q37e, train_acc=0.883, train_loss=0.759]
Testing DataLoader 0: 100%|██████████| 79/79 [00:02<00:00, 30.39it/s]


0,1
epoch,▁▁▁▁▁▁▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▂▄▆▇█▇█▇█▇
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,███▇▆▅▄▃▃▂▁
trainer/global_step,▁▂▂▃▄▄▄▅▆▆▇███
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.79186
test_loss,0.90653
train_acc,0.73004
train_avg_acc,0.73731
train_avg_loss,1.31821
train_loss,0.90916
trainer/global_step,564.0
val_acc,0.77941
val_avg_acc,0.77938


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8fykijpu with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	type: large
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 886 K 
-----------------------------------
886 K     Trainable params
0         Non-trainable params
886 K     Total params
3.545     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 563/563 [00:22<00:00, 24.65it/s, v_num=ijpu, train_acc=0.845, train_loss=0.737]
Epoch 0, Avg. Training Loss: 1.487, Avg. Training Accuracy: 0.605 Avg. Validation Loss: 0.678, Avg. Validation Accuracy: 0.815
Epoch 1: 100%|██████████| 563/563 [00:24<00:00, 22.66it/s, v_num=ijpu, train_acc=0.885, train_loss=0.389]
Epoch 1, Avg. Training Loss: 0.505, Avg. Training Accuracy: 0.859 Avg. Validation Loss: 0.401, Avg. Validation Accuracy: 0.880
Epoch 1: 100%|██████████| 563/563 [00:37<00:00, 15.02it/s, v_num=ijpu, train_acc=0.885, train_loss=0.389]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 563/563 [00:37<00:00, 14.99it/s, v_num=ijpu, train_acc=0.885, train_loss=0.389]
Testing DataLoader 0: 100%|██████████| 157/157 [00:04<00:00, 32.89it/s]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▂▃▄▆▆▆▆▆▇▆▇▇▇▇▇▇██▇█▇
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,██▇▆▆▅▄▃▃▃▂▂▂▂▂▂▂▁▂▁▁▁
trainer/global_step,▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇███
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.89267
test_loss,0.38053
train_acc,0.87598
train_avg_acc,0.85884
train_avg_loss,0.50533
train_loss,0.35924
trainer/global_step,1126.0
val_acc,0.8799
val_avg_acc,0.8799


[34m[1mwandb[0m: Agent Starting Run: g08a8zuv with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	type: small
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 429 K 
-----------------------------------
429 K     Trainable params
0         Non-trainable params
429 K     Total params
1.717     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 282/282 [00:13<00:00, 20.36it/s, v_num=8zuv, train_acc=0.650, train_loss=1.860]
Epoch 0, Avg. Training Loss: 2.135, Avg. Training Accuracy: 0.291 Avg. Validation Loss: 1.771, Avg. Validation Accuracy: 0.654
Epoch 1: 100%|██████████| 282/282 [00:12<00:00, 22.66it/s, v_num=8zuv, train_acc=0.843, train_loss=1.070]
Epoch 1, Avg. Training Loss: 1.239, Avg. Training Accuracy: 0.736 Avg. Validation Loss: 0.849, Avg. Validation Accuracy: 0.790
Epoch 1: 100%|██████████| 282/282 [00:19<00:00, 14.71it/s, v_num=8zuv, train_acc=0.843, train_loss=1.070]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 282/282 [00:19<00:00, 14.67it/s, v_num=8zuv, train_acc=0.843, train_loss=1.070]
Testing DataLoader 0: 100%|██████████| 79/79 [00:02<00:00, 27.22it/s]


0,1
epoch,▁▁▁▁▁▁▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▁▁▅▆▆▇▇███
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,██▇▇▆▅▄▃▂▁▁
trainer/global_step,▁▂▂▃▄▄▄▅▆▆▇███
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.79983
test_loss,0.8306
train_acc,0.77078
train_avg_acc,0.73636
train_avg_loss,1.23908
train_loss,0.86426
trainer/global_step,564.0
val_acc,0.79039
val_avg_acc,0.79046


[34m[1mwandb[0m: Agent Starting Run: gb13xdil with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	type: large
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 886 K 
-----------------------------------
886 K     Trainable params
0         Non-trainable params
886 K     Total params
3.545     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 1125/1125 [00:45<00:00, 24.81it/s, v_num=xdil, train_acc=0.800, train_loss=0.382]
Epoch 0, Avg. Training Loss: 1.150, Avg. Training Accuracy: 0.637 Avg. Validation Loss: 0.490, Avg. Validation Accuracy: 0.825
Epoch 1: 100%|██████████| 1125/1125 [00:41<00:00, 27.20it/s, v_num=xdil, train_acc=0.825, train_loss=0.320] 
Epoch 1, Avg. Training Loss: 0.387, Avg. Training Accuracy: 0.855 Avg. Validation Loss: 0.334, Avg. Validation Accuracy: 0.867
Epoch 1: 100%|██████████| 1125/1125 [01:02<00:00, 18.09it/s, v_num=xdil, train_acc=0.825, train_loss=0.320]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 1125/1125 [01:02<00:00, 18.08it/s, v_num=xdil, train_acc=0.825, train_loss=0.320]
Testing DataLoader 0: 100%|██████████| 313/313 [00:08<00:00, 38.36it/s]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▂▂▃▄▄▅▆▅▆▆▆▆▇▇▅▇▆▅▆▇▇▇▇▇▆▅▆▇▇▇▇▇▇▆█▇██▇
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,███▇▆▆▅▄▅▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▃▂▂▃▃▂▁▂▂▂▂▁▂▁▁▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.88354
test_loss,0.31815
train_acc,0.825
train_avg_acc,0.85513
train_avg_loss,0.38653
train_loss,0.32003
trainer/global_step,2250.0
val_acc,0.86693
val_avg_acc,0.86693


[34m[1mwandb[0m: Agent Starting Run: fefm9dc4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	type: large
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 886 K 
-----------------------------------
886 K     Trainable params
0         Non-trainable params
886 K     Total params
3.545     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 563/563 [00:22<00:00, 24.87it/s, v_num=9dc4, train_acc=0.793, train_loss=0.714]
Epoch 0, Avg. Training Loss: 1.512, Avg. Training Accuracy: 0.632 Avg. Validation Loss: 0.706, Avg. Validation Accuracy: 0.809
Epoch 1: 100%|██████████| 563/563 [00:21<00:00, 26.20it/s, v_num=9dc4, train_acc=0.793, train_loss=0.509]
Epoch 1, Avg. Training Loss: 0.507, Avg. Training Accuracy: 0.859 Avg. Validation Loss: 0.392, Avg. Validation Accuracy: 0.885
Epoch 1: 100%|██████████| 563/563 [00:32<00:00, 17.27it/s, v_num=9dc4, train_acc=0.793, train_loss=0.509]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 563/563 [00:32<00:00, 17.23it/s, v_num=9dc4, train_acc=0.793, train_loss=0.509]
Testing DataLoader 0: 100%|██████████| 157/157 [00:04<00:00, 34.95it/s]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▃▅▅▅▅▆▆█▇▇▆▇▇█▇████▇█
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,██▇▇▆▅▄▄▃▃▃▂▂▂▂▂▂▂▁▂▂▁
trainer/global_step,▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇███
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.89073
test_loss,0.38107
train_acc,0.9196
train_avg_acc,0.85937
train_avg_loss,0.50685
train_loss,0.22193
trainer/global_step,1126.0
val_acc,0.88464
val_avg_acc,0.88464


[34m[1mwandb[0m: Agent Starting Run: 50r3k1qp with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	type: large
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 886 K 
-----------------------------------
886 K     Trainable params
0         Non-trainable params
886 K     Total params
3.545     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 563/563 [00:21<00:00, 25.65it/s, v_num=k1qp, train_acc=0.942, train_loss=0.516]
Epoch 0, Avg. Training Loss: 1.447, Avg. Training Accuracy: 0.623 Avg. Validation Loss: 0.613, Avg. Validation Accuracy: 0.849
Epoch 1: 100%|██████████| 563/563 [00:21<00:00, 26.41it/s, v_num=k1qp, train_acc=0.942, train_loss=0.259]
Epoch 1, Avg. Training Loss: 0.463, Avg. Training Accuracy: 0.872 Avg. Validation Loss: 0.371, Avg. Validation Accuracy: 0.893
Epoch 1: 100%|██████████| 563/563 [00:32<00:00, 17.43it/s, v_num=k1qp, train_acc=0.942, train_loss=0.259]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 563/563 [00:32<00:00, 17.40it/s, v_num=k1qp, train_acc=0.942, train_loss=0.259]
Testing DataLoader 0: 100%|██████████| 157/157 [00:04<00:00, 36.40it/s]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▃▄▅▆▆▇▇▇██▇▇██▇██▇▇██
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,██▇▆▅▄▃▃▃▂▂▂▂▂▂▁▁▂▂▂▁▁
trainer/global_step,▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇███
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.89896
test_loss,0.36592
train_acc,0.85667
train_avg_acc,0.87161
train_avg_loss,0.46324
train_loss,0.39828
trainer/global_step,1126.0
val_acc,0.89341
val_avg_acc,0.89341


[34m[1mwandb[0m: Agent Starting Run: bnbbla4v with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	type: large
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 886 K 
-----------------------------------
886 K     Trainable params
0         Non-trainable params
886 K     Total params
3.545     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 563/563 [00:21<00:00, 25.81it/s, v_num=la4v, train_acc=0.870, train_loss=0.629]
Epoch 0, Avg. Training Loss: 1.439, Avg. Training Accuracy: 0.629 Avg. Validation Loss: 0.628, Avg. Validation Accuracy: 0.832
Epoch 1: 100%|██████████| 563/563 [00:21<00:00, 26.41it/s, v_num=la4v, train_acc=0.915, train_loss=0.369]
Epoch 1, Avg. Training Loss: 0.453, Avg. Training Accuracy: 0.874 Avg. Validation Loss: 0.372, Avg. Validation Accuracy: 0.892
Epoch 1: 100%|██████████| 563/563 [00:32<00:00, 17.39it/s, v_num=la4v, train_acc=0.915, train_loss=0.369]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 563/563 [00:32<00:00, 17.36it/s, v_num=la4v, train_acc=0.915, train_loss=0.369]
Testing DataLoader 0: 100%|██████████| 157/157 [00:04<00:00, 33.36it/s]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▁▂▅▆▅▆▆▇▇▇█▇█▇▇█▇▇▇▇█
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,██▇▆▆▄▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁
trainer/global_step,▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇███
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.89653
test_loss,0.3534
train_acc,0.94389
train_avg_acc,0.87375
train_avg_loss,0.45287
train_loss,0.23457
trainer/global_step,1126.0
val_acc,0.89196
val_avg_acc,0.89196


[34m[1mwandb[0m: Agent Starting Run: zzm7uni0 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	type: large
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 886 K 
-----------------------------------
886 K     Trainable params
0         Non-trainable params
886 K     Total params
3.545     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 563/563 [00:22<00:00, 25.46it/s, v_num=uni0, train_acc=0.638, train_loss=0.587]
Epoch 0, Avg. Training Loss: 1.390, Avg. Training Accuracy: 0.636 Avg. Validation Loss: 0.611, Avg. Validation Accuracy: 0.824
Epoch 1: 100%|██████████| 563/563 [00:21<00:00, 26.34it/s, v_num=uni0, train_acc=0.717, train_loss=0.397]
Epoch 1, Avg. Training Loss: 0.450, Avg. Training Accuracy: 0.871 Avg. Validation Loss: 0.370, Avg. Validation Accuracy: 0.887
Epoch 1: 100%|██████████| 563/563 [00:32<00:00, 17.38it/s, v_num=uni0, train_acc=0.717, train_loss=0.397]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 563/563 [00:32<00:00, 17.35it/s, v_num=uni0, train_acc=0.717, train_loss=0.397]
Testing DataLoader 0: 100%|██████████| 157/157 [00:04<00:00, 35.83it/s]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▅▄▅▆▅▆▆▇▇█▇▇▆█▇▇█▇▇▇▇
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,██▇▆▅▄▄▃▂▃▁▂▂▂▁▁▃▁▁▁▁▁
trainer/global_step,▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇███
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.89708
test_loss,0.3502
train_acc,0.81071
train_avg_acc,0.87129
train_avg_loss,0.44978
train_loss,0.42652
trainer/global_step,1126.0
val_acc,0.88651
val_avg_acc,0.88651


[34m[1mwandb[0m: Agent Starting Run: 3fmzz18s with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	type: large
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 886 K 
-----------------------------------
886 K     Trainable params
0         Non-trainable params
886 K     Total params
3.545     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 563/563 [00:21<00:00, 25.70it/s, v_num=z18s, train_acc=0.722, train_loss=0.681]
Epoch 0, Avg. Training Loss: 1.406, Avg. Training Accuracy: 0.665 Avg. Validation Loss: 0.606, Avg. Validation Accuracy: 0.848
Epoch 1: 100%|██████████| 563/563 [00:21<00:00, 26.22it/s, v_num=z18s, train_acc=0.767, train_loss=0.370]
Epoch 1, Avg. Training Loss: 0.453, Avg. Training Accuracy: 0.874 Avg. Validation Loss: 0.360, Avg. Validation Accuracy: 0.894
Epoch 1: 100%|██████████| 563/563 [00:32<00:00, 17.27it/s, v_num=z18s, train_acc=0.767, train_loss=0.370]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 563/563 [00:32<00:00, 17.24it/s, v_num=z18s, train_acc=0.767, train_loss=0.370]
Testing DataLoader 0: 100%|██████████| 157/157 [00:04<00:00, 36.58it/s]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▄▅▄▅▅▅▇▇███▇▇█▇███▇▇█
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,██▇▆▅▄▄▂▂▂▁▂▁▁▁▂▁▁▂▁▁▁
trainer/global_step,▁▁▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇▇▇███
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.89889
test_loss,0.34859
train_acc,0.88249
train_avg_acc,0.87388
train_avg_loss,0.45333
train_loss,0.37701
trainer/global_step,1126.0
val_acc,0.89364
val_avg_acc,0.89364


[34m[1mwandb[0m: Agent Starting Run: ro43evrp with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	type: large
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 886 K 
-----------------------------------
886 K     Trainable params
0         Non-trainable params
886 K     Total params
3.545     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 282/282 [00:12<00:00, 21.86it/s, v_num=evrp, train_acc=0.765, train_loss=1.180]
Epoch 0, Avg. Training Loss: 1.909, Avg. Training Accuracy: 0.527 Avg. Validation Loss: 1.261, Avg. Validation Accuracy: 0.733
Epoch 1: 100%|██████████| 282/282 [00:12<00:00, 22.61it/s, v_num=evrp, train_acc=0.952, train_loss=0.539]
Epoch 1, Avg. Training Loss: 0.829, Avg. Training Accuracy: 0.803 Avg. Validation Loss: 0.608, Avg. Validation Accuracy: 0.839
Epoch 1: 100%|██████████| 282/282 [00:18<00:00, 15.04it/s, v_num=evrp, train_acc=0.952, train_loss=0.539]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 282/282 [00:18<00:00, 15.00it/s, v_num=evrp, train_acc=0.952, train_loss=0.539]
Testing DataLoader 0: 100%|██████████| 79/79 [00:02<00:00, 33.59it/s]


0,1
epoch,▁▁▁▁▁▁▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▄▅▅▆▇▇█▇█▇
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,██▇▆▄▃▂▂▂▁▁
trainer/global_step,▁▂▂▃▄▄▄▅▆▆▇███
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.85284
test_loss,0.58208
train_acc,0.81151
train_avg_acc,0.80332
train_avg_loss,0.82851
train_loss,0.59882
trainer/global_step,564.0
val_acc,0.83921
val_avg_acc,0.83919


[34m[1mwandb[0m: Agent Starting Run: zm696k5e with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	type: small
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 429 K 
-----------------------------------
429 K     Trainable params
0         Non-trainable params
429 K     Total params
1.717     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 1125/1125 [00:36<00:00, 30.60it/s, v_num=6k5e, train_acc=0.702, train_loss=0.499]
Epoch 0, Avg. Training Loss: 1.323, Avg. Training Accuracy: 0.603 Avg. Validation Loss: 0.567, Avg. Validation Accuracy: 0.809
Epoch 1: 100%|██████████| 1125/1125 [00:35<00:00, 31.31it/s, v_num=6k5e, train_acc=0.727, train_loss=0.319]
Epoch 1, Avg. Training Loss: 0.428, Avg. Training Accuracy: 0.842 Avg. Validation Loss: 0.360, Avg. Validation Accuracy: 0.864
Epoch 1: 100%|██████████| 1125/1125 [00:54<00:00, 20.75it/s, v_num=6k5e, train_acc=0.727, train_loss=0.319]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 1125/1125 [00:54<00:00, 20.74it/s, v_num=6k5e, train_acc=0.727, train_loss=0.319]
Testing DataLoader 0: 100%|██████████| 313/313 [00:07<00:00, 41.66it/s]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▂▁▂▅▅▅▄▅▆▇▅▇▆▅▆▇▇▇▇▆▇█▆█▇▇▇▇▇██▆█████▆▇▆
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,████▇▇▆▆▅▄▄▄▄▃▃▂▂▂▂▃▂▂▂▂▂▂▂▁▁▂▂▂▁▂▂▂▂▂▂▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.87899
test_loss,0.34169
train_acc,0.72667
train_avg_acc,0.84242
train_avg_loss,0.42758
train_loss,0.31894
trainer/global_step,2250.0
val_acc,0.8644
val_avg_acc,0.8644


[34m[1mwandb[0m: Agent Starting Run: g4m11t2i with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	type: small
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 429 K 
-----------------------------------
429 K     Trainable params
0         Non-trainable params
429 K     Total params
1.717     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 1125/1125 [00:36<00:00, 30.73it/s, v_num=1t2i, train_acc=0.763, train_loss=0.604]
Epoch 0, Avg. Training Loss: 1.434, Avg. Training Accuracy: 0.576 Avg. Validation Loss: 0.629, Avg. Validation Accuracy: 0.793
Epoch 1: 100%|██████████| 1125/1125 [00:36<00:00, 31.16it/s, v_num=1t2i, train_acc=0.813, train_loss=0.381]
Epoch 1, Avg. Training Loss: 0.480, Avg. Training Accuracy: 0.832 Avg. Validation Loss: 0.399, Avg. Validation Accuracy: 0.856
Epoch 1: 100%|██████████| 1125/1125 [00:54<00:00, 20.76it/s, v_num=1t2i, train_acc=0.813, train_loss=0.381]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 1125/1125 [00:54<00:00, 20.75it/s, v_num=1t2i, train_acc=0.813, train_loss=0.381]
Testing DataLoader 0: 100%|██████████| 313/313 [00:07<00:00, 42.80it/s]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▁▂▃▂▄▄▅▅▆▆▅▆▇▇▅▆▇▇▆▇█▇▇▆▇██▇█▆▆█▇▅▆▇▆▇▇
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,████▇▇▇▆▆▅▄▄▄▃▂▃▂▂▂▃▂▁▂▂▃▃▂▁▁▁▂▂▂▁▂▂▁▁▁▁
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.86897
test_loss,0.38518
train_acc,0.8131
train_avg_acc,0.83178
train_avg_loss,0.47974
train_loss,0.38069
trainer/global_step,2250.0
val_acc,0.85586
val_avg_acc,0.85586


[34m[1mwandb[0m: Agent Starting Run: 72b90ygj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	type: small
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 429 K 
-----------------------------------
429 K     Trainable params
0         Non-trainable params
429 K     Total params
1.717     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 1125/1125 [00:36<00:00, 30.70it/s, v_num=0ygj, train_acc=0.833, train_loss=0.657]
Epoch 0, Avg. Training Loss: 1.383, Avg. Training Accuracy: 0.592 Avg. Validation Loss: 0.601, Avg. Validation Accuracy: 0.805
Epoch 1: 100%|██████████| 1125/1125 [00:36<00:00, 31.04it/s, v_num=0ygj, train_acc=0.883, train_loss=0.386]
Epoch 1, Avg. Training Loss: 0.452, Avg. Training Accuracy: 0.843 Avg. Validation Loss: 0.370, Avg. Validation Accuracy: 0.862
Epoch 1: 100%|██████████| 1125/1125 [00:54<00:00, 20.60it/s, v_num=0ygj, train_acc=0.883, train_loss=0.386]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 1125/1125 [00:54<00:00, 20.58it/s, v_num=0ygj, train_acc=0.883, train_loss=0.386]
Testing DataLoader 0: 100%|██████████| 313/313 [00:07<00:00, 42.07it/s]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▁▂▄▃▃▆▄▅▆▅▄▆▇▇▇█▅▇▇▇▇▇█▇▆█▇▆▆▇█▆▆█▆▇█▇▇
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,████▇▇▆▆▅▄▄▄▄▃▃▃▃▄▃▂▂▃▂▂▂▂▁▂▃▂▁▁▂▂▂▁▂▁▁▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.87415
test_loss,0.35573
train_acc,0.88333
train_avg_acc,0.84322
train_avg_loss,0.45195
train_loss,0.3857
trainer/global_step,2250.0
val_acc,0.8619
val_avg_acc,0.8619


[34m[1mwandb[0m: Agent Starting Run: e7y1racm with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	type: small
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type     | Params
-----------------------------------
0 | model | CNNModel | 429 K 
-----------------------------------
429 K     Trainable params
0         Non-trainable params
429 K     Total params
1.717     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 1125/1125 [00:36<00:00, 30.79it/s, v_num=racm, train_acc=0.817, train_loss=0.675]
Epoch 0, Avg. Training Loss: 1.477, Avg. Training Accuracy: 0.618 Avg. Validation Loss: 0.634, Avg. Validation Accuracy: 0.809
Epoch 1: 100%|██████████| 1125/1125 [00:36<00:00, 31.14it/s, v_num=racm, train_acc=0.950, train_loss=0.420]
Epoch 1, Avg. Training Loss: 0.475, Avg. Training Accuracy: 0.835 Avg. Validation Loss: 0.373, Avg. Validation Accuracy: 0.863
Epoch 1: 100%|██████████| 1125/1125 [00:54<00:00, 20.72it/s, v_num=racm, train_acc=0.950, train_loss=0.420]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 1125/1125 [00:54<00:00, 20.70it/s, v_num=racm, train_acc=0.950, train_loss=0.420]
Testing DataLoader 0: 100%|██████████| 313/313 [00:07<00:00, 44.07it/s]


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅█
test_acc,▁
test_loss,▁
train_acc,▁▁▃▂▂▄▄▄▅▆▆▇▄▇▆▇▆▆▇█▇▇▆▆▅█▆██▇▇▇▇▆▇▇▇▆▇█
train_avg_acc,▁█
train_avg_loss,█▁
train_loss,█████▇▇▆▆▅▅▃▄▃▃▃▃▃▂▂▂▂▂▂▂▁▂▁▁▁▂▂▁▂▂▂▁▂▂▂
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
val_acc,▁█
val_avg_acc,▁█

0,1
epoch,2.0
test_acc,0.87597
test_loss,0.36754
train_acc,0.95
train_avg_acc,0.83507
train_avg_loss,0.47481
train_loss,0.42017
trainer/global_step,2250.0
val_acc,0.86282
val_avg_acc,0.86282


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


In [7]:
wandb.finish()


In [8]:
# # 만일 이전 결과에서 Epoch을 이어서 실행하고 싶다면?
# model = CNNModel(config=config)  # 기존에 학습 때 사용한 모델
# task = ClassificationTask.load_from_checkpoint(
#     "test_checkpoints/checkpoints/last.ckpt", model=model)  # 기존 최신 모델에서 체크포인트를 가져옴
# trainer = pl.Trainer(max_epochs=30, callbacks=callbacks)
# trainer.fit(task, data_module)

## Model Prediction

직접 Model Prediction을 수행하여 모델이 제대로 동작하는지 검증합니다.


In [9]:
data_module = MNISTDataModule()
# MNIST 테스트 데이터셋 로드
predict_dataset = datasets.MNIST(
    root='./', train=False, download=True)

# 랜덤 이미지 선택
random_idx = torch.randint(len(predict_dataset), size=(1,)).item()
image, true_label = predict_dataset[random_idx]

# 이미지 확인 (optional)
transform = torchvision.transforms.ToTensor()
image_tensor = transform(image)
plt.imshow(image_tensor[0].squeeze(), cmap='gray')
plt.show()


# 모델 생성 및 학습된 가중치 로드
model_config = {}
model = ClassificationTask.load_from_checkpoint(
    "test_checkpoints/best_model.ckpt", model=CNNModel(config=model_config))

model.eval()
with torch.no_grad():
    x, y = data_module.predict_instantly([image], [true_label])
    loss, logits = model(x, y)

# Predict Data를 원하는 DataLoader로 직접 만들어서 predict를 수행하고자 할 경우
# data_module.predicted_dataloader_attr = your_dataloader
# trainer = pl.Trainer()
# loss, logits = trainer.predict(model, datamodule=data_module)


# 가장 높은 확률을 가진 클래스 예측
_, predicted_class = torch.max(logits, dim=1)

print(f'True label: {true_label}, Predicted label: {predicted_class.item()}')


TypeError: MNISTDataModule.__init__() missing 4 required positional arguments: 'data_dir', 'batch_size', 'pred_batch_size', and 'train_ratio'