In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.transforms as T

import pytorch_lightning as pl
from pytorch_lightning import LightningModule, Trainer, LightningDataModule, seed_everything
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger

import torchmetrics

import hydra
from hydra.utils import instantiate

seed_everything(42, workers=True)

Seed set to 42


42

In [6]:
# 데이터를 불러올 때, 필요한 변환(transform)을 정의합니다.
mnist_transform = T.Compose([
    T.ToTensor(), # 텐서 형식으로 변환
])

In [9]:
download_root = './MNIST_DATASET'

train_dataset = torchvision.datasets.MNIST(download_root, transform=mnist_transform, train=True, download=True) # train dataset 다운로드
test_dataset = torchvision.datasets.MNIST(download_root, transform=mnist_transform, train=False, download=True) # test dataset 다운로드

In [10]:
# 데이터 셋을 학습 데이터 셋과 검증 데이터 셋으로 분리합니다.
total_size = len(train_dataset)
train_num, valid_num = int(total_size * 0.8), int(total_size * 0.2) # 8 : 2 = train : valid
print("Train dataset 개수 : ", train_num)
print("Validation dataset 개수 : ", valid_num)
train_dataset,valid_dataset = torch.utils.data.random_split(train_dataset, [train_num, valid_num]) # train - valid set 나누기

Train dataset 개수 :  48000
Validation dataset 개수 :  12000


In [13]:
batch_size = 32

train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
valid_dataloader = DataLoader(valid_dataset, batch_size = batch_size, shuffle = False)
test_dataloader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False)

In [22]:
class Classifier(LightningModule):
    def __init__(self, num_classes, dropout_ratio, lr = 0.001):
        super().__init__()
        self.learning_rate = lr
        self.accuracy = torchmetrics.Accuracy(task = 'multiclass', num_classes = num_classes)
        self.criterion = nn.CrossEntropyLoss()
        self.num_classes = num_classes
        self.dropout_ratio = dropout_ratio

        self.layer = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5),  # [BATCH_SIZE, 1, 28, 28] -> [BATCH_SIZE, 16, 24, 24]
            nn.ReLU(),  # ReLU 활성화 함수 적용
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5), # [BATCH_SIZE, 16, 24, 24] -> [BATCH_SIZE, 32, 20, 20]
            nn.ReLU(),  # ReLU 활성화 함수 적용
            nn.MaxPool2d(kernel_size=2), # [BATCH_SIZE, 32, 20, 20] -> [BATCH_SIZE, 32, 10, 10]
            nn.Dropout(dropout_ratio),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5), # [BATCH_SIZE, 32, 10, 10] -> [BATCH_SIZE, 64, 6, 6]
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), # 크기를 1/2로 줄입니다. [BATCH_SIZE, 64, 6, 6] -> [BATCH_SIZE, 64, 3, 3]
            nn.Dropout(dropout_ratio),
        )

        self.fc_layer = nn.Linear(64*3*3, self.num_classes) # [BATCH_SIZE, 64*3*3] -> [BATCH_SIZE, num_classes]


    def forward(self, x):
        out = self.layer(x) # self.layer에 정의한 Sequential의 연산을 차례대로 다 실행합니다. [BATCH_SIZE, 64, 3, 3]
        out = out.view(x.size(0), -1)  # [BATCH_SIZE, 64*3*3]
        pred = self.fc_layer(out) # [BATCH_SIZE, num_classes]

        return pred
    
    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr = self.learning_rate)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

        return [optimizer], [scheduler]
    
    def training_step(self, batch, batch_idx):
        images, labels = batch

        outputs = self(images)

        loss = self.criterion(outputs, labels)
        acc = self.accuracy(outputs, labels)
        self.log('train_loss', loss, on_step = False , on_epoch= True, logger=True)
        self.log('train_acc', acc, on_step = False , on_epoch= True, logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        images, labels = batch

        outputs = self(images)

        loss = self.criterion(outputs, labels)
        acc = self.accuracy(outputs, labels)

        self.log('valid_loss', loss, on_step = False , on_epoch= True, logger=True)
        self.log('valid_acc', acc, on_step = False , on_epoch= True, logger=True)

        return loss

    def test_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)

        loss = self.criterion(outputs, labels)
        acc = self.accuracy(outputs, labels)

        self.log('test_loss', loss, on_step= False, on_epoch = True , logger= True)
        self.log('test_acc', acc, on_step= False, on_epoch = True , logger= True)

        return loss


    def predict_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        _, pred = torch.max(outputs, dim=1)
        return pred

In [23]:
model = Classifier(num_classes= 10, dropout_ratio = 0.2)

early_stopping = EarlyStopping(monitor = 'valid_loss', mode = 'min', verbose = True)
tensor_logger = TensorBoardLogger(save_dir="./tensor_logger", name= 'test')

trainer = pl.Trainer(
            accelerator="cpu",
            max_epochs= 100,
            callbacks = [early_stopping],
            logger = tensor_logger

    )

trainer.fit(model, train_dataloader, valid_dataloader)
trainer.test(model, test_dataloader)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\USER\.conda\envs\pytorch_test\lib\site-packages\pytorch_lightning\trainer\setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.

  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | accuracy  | MulticlassAccuracy | 0      | train
1 | criterion | CrossEntropyLoss   | 0      | train
2 | layer     | Sequential         | 64.5 K | train
3 | fc_layer  | Linear             | 5.8 K  | train
---------------------------------------------------------
70.3 K    Trainable params
0         Non-trainable params
70.3 K    Total params
0.281     Total estimated model params si

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\USER\.conda\envs\pytorch_test\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
c:\Users\USER\.conda\envs\pytorch_test\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_loss improved. New best score: 0.058


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.046


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_loss improved by 0.010 >= min_delta = 0.0. New best score: 0.036


Validation: |          | 0/? [00:00<?, ?it/s]

Metric valid_loss improved by 0.009 >= min_delta = 0.0. New best score: 0.027


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric valid_loss did not improve in the last 3 records. Best score: 0.027. Signaling Trainer to stop.
c:\Users\USER\.conda\envs\pytorch_test\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:433: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.9934999942779541
        test_loss           0.02021963894367218
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.02021963894367218, 'test_acc': 0.9934999942779541}]

In [24]:
# IPython 환경에서 tensorboard 확장 기능을 로드하는 역할을 합니다.
%load_ext tensorboard

# ./runs/tutorial 위치에 저장된 로그를 위치로 tensorboard를 실행합니다.
%tensorboard --logdir ./tensor_logger

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 8516), started 0:08:59 ago. (Use '!kill 8516' to kill it.)

In [25]:
cifar10_transform = T.Compose([
    T.ToTensor(),
])

In [29]:
# torchvision 라이브러리를 사용하여 CIFAR10 데이터 셋을 불러옵니다.
download_root = './CIFAR10_DATASET'

train_dataset = torchvision.datasets.CIFAR10(download_root, transform=cifar10_transform, train=True, download=True) # train dataset 다운로드
test_dataset = torchvision.datasets.CIFAR10(download_root, transform=cifar10_transform, train=False, download=True) # test dataset 다운로드

Files already downloaded and verified
Files already downloaded and verified


In [30]:
# 데이터 셋을 학습 데이터 셋과 검증 데이터 셋으로 분리합니다.
total_size = len(train_dataset)
train_num, valid_num = int(total_size * 0.8), int(total_size * 0.2) # 8 : 2 = train : valid
print("Train dataset 개수 : ", train_num)
print("Validation dataset 개수 : ", valid_num)
print("Test dataset 개수 :", len(test_dataset))
train_dataset, valid_dataset = torch.utils.data.random_split(train_dataset, [train_num, valid_num]) # train - valid set 나누기

Train dataset 개수 :  40000
Validation dataset 개수 :  10000
Test dataset 개수 : 10000


The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  @hydra.main(config_path = 'configs', config_name = 'config')


In [7]:
class SimpleCNN(LightningModule):
    def __init__(self, cfg):
        super().__init__()
        self.learning_rate = cfg.optimizer.lr
        self.accuracy = torchmetrics.Accuracy(task= 'multiclass', num_classes = cfg.model.model.num_classes)
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = cfg.optimizer
        self.scheduler = cfg.scheduler

        self.num_classes = cfg.model.model.num_classes
        self.dropout_ratio = cfg.model.model.dropout_ratio

        self.layer = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5),  # [BATCH_SIZE, 1, 28, 28] -> [BATCH_SIZE, 16, 24, 24]
            nn.ReLU(),  # ReLU 활성화 함수 적용
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5), # [BATCH_SIZE, 16, 24, 24] -> [BATCH_SIZE, 32, 20, 20]
            nn.ReLU(),  # ReLU 활성화 함수 적용
            nn.MaxPool2d(kernel_size=2), # [BATCH_SIZE, 32, 20, 20] -> [BATCH_SIZE, 32, 10, 10]
            nn.Dropout(self.dropout_ratio),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5), # [BATCH_SIZE, 32, 10, 10] -> [BATCH_SIZE, 64, 6, 6]
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), # 크기를 1/2로 줄입니다. [BATCH_SIZE, 64, 6, 6] -> [BATCH_SIZE, 64, 3, 3]
            nn.Dropout(self.dropout_ratio),
        )

        self.fc_layer = nn.Linear(1024, self.num_classes)
        self.softmax = nn.Softmax(dim = 1)
        
    def forward(self, x):
        return self.model(x)
    






In [8]:
class ResNet(LightningModule):
    def __init__(self, cfg):
        super().__init__()
        self.accuracy = torchmetrics.Accuracy(task = 'multiclass', num_classes = cfg.model.model.num_classes)
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = cfg.optimizer
        self.scheduler = cfg.scheduler

        self.model = instantiate(cfg.model.model)
        self.softmax = nn.Softmax(dim= 1)

    def configure_optimizers(self):
        optimizer = instantiate(self.optimizer, self.parameters())
        scheduler = instantiate(self.scheduler, optimizer)
        
        return [optimizer], [scheduler]

In [9]:
@hydra.main(config_path = 'configs', config_name = 'config')
def main(cfg):

    train_dataset = instantiate(cfg.data.train_dataset)

    test_dataset = instantiate(cfg.data.test_dataset)

    train_num, valid_num = int(len(train_dataset) * (1 - cfg.data.dataloader.valid_split)), int(len(train_dataset) * cfg.data.dataloader.valid_split)
    print("Train dataset 개수 : ", train_num)
    print("Validation dataset 개수 : ", valid_num)
    print("Test dataset 개수 : ", len(test_dataset))
    train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_num, valid_num])

    train_dataloader = DataLoader(dataset=train_dataset, batch_size = cfg.data.dataloader.batch_size, shuffle= True)
    
    val_dataloader = DataLoader(dataset=val_dataset, batch_size = cfg.data.dataloader.batch_size, shuffle= False)
    
    test_dataloader = DataLoader(dataset=test_dataset, batch_size = cfg.data.dataloader.batch_size, shuffle= False)

    if cfg.model.model.model_name == 'simple_cnn':
        model = SimpleCNN(cfg)
    else:
        model = ResNet(cfg)

    early_stopping = EarlyStopping(monitor = cfg.callback.monitor,
                                   mode = cfg.callback.mode, patience = cfg.callback.patience)
    lr_monitor = LearningRateMonitor(logging_interval= cfg.callback.logging_interval)

    logger = TensorBoardLogger(**cfg.logger)

    trainer = Trainer(
                    **cfg.trainer,
                    callback = [early_stopping, lr_monitor],
                    logger = logger
    )

    trainer.fit(model, train_dataloader, val_dataloader)
    trainer.test(model, test_dataloader)


The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  @hydra.main(config_path = 'configs', config_name = 'config')
