# PyTorch Lightning 스타일로 코딩

1. **모델 정의 (MLP 클래스)**

- `pl.LightningModule`를 상속하여 모델 클래스 정의.

- `training_step()`, `validation_step()`, `configure_optimizers()` 메서드 구현.

2. **콜백 (Callback)**

- `ModelCheckpoint` 콜백을 사용하여 `val_loss`가 가장 낮은 모델을 저장.

3. **학습 및 테스트** (`Trainer`):
```python
trainer = Trainer(max_epochs=EPOCHS, callbacks=[checkpoint_callback])
trainer.fit(model, train_loader, test_loader)
trainer.test(model, test_loader)
```

4. **주요 기능**

- `ModelCheckpoint` 콜백을 통해 모델이 `best_model.ckpt` 파일로 자동 저장됩니다.

 - 학습 루프가 깔끔하게 정리되었고, 학습률 스케줄러도 `configure_optimizers()`내에서 정의됩니다.



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import os
from torchviz import make_dot
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint

# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 20  # 에폭 수 증가
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5  # 가중치 감쇠를 줄임
DROPOUT_RATE = 0.3  # 드롭아웃 확률을 낮춤
INPUT_SIZE = 3 * 32 * 32  # CIFAR10 이미지의 Flatten 크기
NUM_CLASSES = 10  # CIFAR10은 10개의 클래스
MODEL_PATH = './best_model.ckpt'  # 모델 저장 경로

# CIFAR-10 데이터셋의 실제 평균과 표준편차 사용
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)

# 데이터 전처리 정의
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
])

train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform_test)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)


class MLP(pl.LightningModule):
    def __init__(self):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(DROPOUT_RATE),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = self.criterion(outputs, labels)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self(inputs)
        loss = self.criterion(outputs, labels)
        preds = torch.argmax(outputs, dim=1)
        acc = (preds == labels).float().mean()
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
        return [optimizer], [scheduler]


checkpoint_callback = ModelCheckpoint(monitor="val_loss", save_top_k=1, mode="min", dirpath="./", filename="best_model")

model = MLP()
trainer = Trainer(max_epochs=EPOCHS, callbacks=[checkpoint_callback])

# 학습 실행
trainer.fit(model, train_loader, test_loader)

# 테스트 실행
trainer.test(model, test_loader)


## ✅ Pytorch Lightning에 Optuna 통합

In [None]:
# !pip install optuna

import os
import json
import platform
import pkg_resources
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split
import optuna
from sklearn.metrics import precision_score, recall_score, f1_score
from pytorch_lightning import LightningModule, Trainer, seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torchmetrics import Accuracy, Precision, Recall, F1Score
from optuna.integration import PyTorchLightningPruningCallback


# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 20
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5
DROPOUT_RATE = 0.3
INPUT_SIZE = 3 * 32 * 32
NUM_CLASSES = 10
MODEL_PATH = './best_model.ckpt'
PATIENCE = 3
SPLIT_RATIO = 0.8

# CIFAR10 데이터셋의 평균 및 표준편차 (정규화 기준)
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)


class MLP(LightningModule):
    def __init__(self, learning_rate=LEARNING_RATE, dropout_rate=DROPOUT_RATE):
        super(MLP, self).__init__()
        self.learning_rate = learning_rate
        self.dropout_rate = dropout_rate

        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(self.dropout_rate),
            nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(self.dropout_rate),
            nn.Linear(256, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(self.dropout_rate),
            nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = Accuracy()
        self.precision = Precision(average='macro', num_classes=NUM_CLASSES)
        self.recall = Recall(average='macro', num_classes=NUM_CLASSES)
        self.f1 = F1Score(average='macro', num_classes=NUM_CLASSES)

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)

        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)

        preds = torch.argmax(y_hat, dim=1)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', self.accuracy(preds, y), prog_bar=True)
        self.log('val_precision', self.precision(preds, y), prog_bar=True)
        self.log('val_recall', self.recall(preds, y), prog_bar=True)
        self.log('val_f1', self.f1(preds, y), prog_bar=True)

        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)

        preds = torch.argmax(y_hat, dim=1)
        self.log('test_loss', loss)
        self.log('test_acc', self.accuracy(preds, y))

        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay=WEIGHT_DECAY)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
        return [optimizer], [scheduler]


def objective(trial):
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)

    model = MLP(learning_rate=learning_rate, dropout_rate=dropout_rate)

    trainer = Trainer(
        max_epochs=EPOCHS,
        callbacks=[
            EarlyStopping(monitor="val_acc", patience=PATIENCE, mode="max"),
            PyTorchLightningPruningCallback(trial, monitor="val_acc")
        ],
        enable_checkpointing=False,
        logger=False
    )

    trainer.fit(model, train_loader, val_loader)

    return trainer.callback_metrics['val_acc'].item()


if __name__ == "__main__":
    seed_everything(42)

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
    ])

    train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

    train_size = int(SPLIT_RATIO * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=20)

    print("Best Hyperparameters:", study.best_params)
    print("Best Validation Accuracy:", study.best_value)

    # 테스트 단계
    model = MLP.load_from_checkpoint(MODEL_PATH)
    trainer = Trainer()
    trainer.test(model, test_loader)


## ✅ GPU 설정 방법

In [None]:
# !pip install optuna

import os
import json
import platform
import pkg_resources
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split
import optuna
from sklearn.metrics import precision_score, recall_score, f1_score
from pytorch_lightning import LightningModule, Trainer, seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torchmetrics import Accuracy, Precision, Recall, F1Score
from optuna.integration import PyTorchLightningPruningCallback


# 하이퍼파라미터 설정
BATCH_SIZE = 64
EPOCHS = 20
LEARNING_RATE = 0.001
WEIGHT_DECAY = 1e-5
DROPOUT_RATE = 0.3
INPUT_SIZE = 3 * 32 * 32
NUM_CLASSES = 10
MODEL_PATH = './best_model.ckpt'
PATIENCE = 3
SPLIT_RATIO = 0.8

# CIFAR10 데이터셋의 평균 및 표준편차 (정규화 기준)
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD = (0.2023, 0.1994, 0.2010)


class MLP(LightningModule):
    def __init__(self, learning_rate=LEARNING_RATE, dropout_rate=DROPOUT_RATE):
        super(MLP, self).__init__()
        self.learning_rate = learning_rate
        self.dropout_rate = dropout_rate

        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(INPUT_SIZE, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(self.dropout_rate),
            nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(self.dropout_rate),
            nn.Linear(256, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(self.dropout_rate),
            nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(),
            nn.Linear(128, NUM_CLASSES)
        )

        self.criterion = nn.CrossEntropyLoss()
        self.accuracy = Accuracy()
        self.precision = Precision(average='macro', num_classes=NUM_CLASSES)
        self.recall = Recall(average='macro', num_classes=NUM_CLASSES)
        self.f1 = F1Score(average='macro', num_classes=NUM_CLASSES)

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)

        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)

        preds = torch.argmax(y_hat, dim=1)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', self.accuracy(preds, y), prog_bar=True)
        self.log('val_precision', self.precision(preds, y), prog_bar=True)
        self.log('val_recall', self.recall(preds, y), prog_bar=True)
        self.log('val_f1', self.f1(preds, y), prog_bar=True)

        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)

        preds = torch.argmax(y_hat, dim=1)
        self.log('test_loss', loss)
        self.log('test_acc', self.accuracy(preds, y))

        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay=WEIGHT_DECAY)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
        return [optimizer], [scheduler]


def objective(trial):
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-2)

    model = MLP(learning_rate=learning_rate, dropout_rate=dropout_rate)

    trainer = Trainer(accelerator="gpu", devices=1,
        max_epochs=EPOCHS,
        callbacks=[
            EarlyStopping(monitor="val_acc", patience=PATIENCE, mode="max"),
            PyTorchLightningPruningCallback(trial, monitor="val_acc")
        ],
        enable_checkpointing=False,
        logger=False,
        accelerator="gpu",  # GPU 사용 설정
        devices=1           # 사용할 GPU 개수
    )

    trainer.fit(model, train_loader, val_loader)

    return trainer.callback_metrics['val_acc'].item()


if __name__ == "__main__":
    seed_everything(42)

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD)
    ])

    train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

    train_size = int(SPLIT_RATIO * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=20)

    print("Best Hyperparameters:", study.best_params)
    print("Best Validation Accuracy:", study.best_value)

    # 테스트 단계
    model = MLP.load_from_checkpoint(MODEL_PATH)
    trainer = Trainer(accelerator="gpu", devices=1,accelerator="gpu", devices=1)
    trainer.test(model, test_loader)
