### 데이터 분할을 위한 폴더 생성

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### 베이스라인 모델 학습을 위한 준비

In [6]:
import torch
import os

# MPS 지원 여부 확인
USE_MPS = torch.backends.mps.is_available()  # MPS 지원 여부
USE_CUDA = torch.cuda.is_available()        # CUDA 지원 여부

# 디바이스 선택 (MPS > CUDA > CPU 순으로 우선 선택)
if USE_MPS:
    DEVICE = torch.device("mps")
elif USE_CUDA:
    DEVICE = torch.device("cuda")
else:
    DEVICE = torch.device("cpu")

BATCH_SIZE = 256
EPOCH = 30




### VisonTransformer를 위한 준비

In [16]:
import time
import copy
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

VIT_BATCH_SIZE = 64

vit_transform_base = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5],[0.5])
])

vit_train_dataset = ImageFolder(root = '/content/drive/MyDrive/Colab Notebooks/splitted/train',transform=vit_transform_base)
vit_val_dataset = ImageFolder(root = '/content/drive/MyDrive/Colab Notebooks/splitted/val',transform=vit_transform_base)

vit_train_loader = DataLoader(vit_train_dataset, batch_size=VIT_BATCH_SIZE, shuffle=True, num_workers=0)
vit_val_loader = DataLoader(vit_val_dataset, batch_size=VIT_BATCH_SIZE, shuffle=True, num_workers=0)

### VisonTransformer 모델 설계

In [17]:
from torchvision.models.vision_transformer import vit_b_16

class ViTClassifier(nn.Module):
    def __init__(self, num_classes = 33):
        super(ViTClassifier, self).__init__()
        self.vit = vit_b_16(weights = None)
        self.vit.heads = nn.Linear(768, num_classes)

    def forward(self,x):
        return self.vit(x)

model_vit = ViTClassifier(num_classes=len(vit_train_dataset.classes)).to(DEVICE)
optimizer = optim.Adam(model_vit.parameters(), lr = 0.001)

### VisonTransformer 학습 함수

In [18]:
def vit_train(model, train_loader, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = nn.CrossEntropyLoss()(output, target)
        loss.backward()
        optimizer.step()

### VisonTransformer 평가 함수

In [19]:
def vit_evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data,target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)
            test_loss += nn.CrossEntropyLoss()(output, target).item()
            pred = output.argmax(dim = 1, keepdim = True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    test_accuracy = 100 * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

### VisonTransformer 모델 학습 하기

In [None]:
def vit_train_baseline(model, train_loader, val_loader, optimizer, num_epochs = EPOCH):
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(1, num_epochs + 1):
        since = time.time()
        vit_train(model, train_loader, optimizer)
        train_loss, train_acc = vit_evaluate(model, train_loader)
        val_loss, val_acc = vit_evaluate(model, val_loader)

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since
        print(f'-------------- epoch {epoch} ----------------')
        print(f'train Loss: {train_loss:.4f}, Accuracy: {train_acc:.2f}%')
        print(f'val Loss: {val_loss:.4f}, Accuracy: {val_acc:.2f}%')
        print(f'Completed in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')

    model.load_state_dict(best_model_wts)
    return model

vit_model = vit_train_baseline(model_vit, vit_train_loader, vit_val_loader, optimizer, EPOCH)
torch.save(vit_model, 'vit_model.pt')

-------------- epoch 1 ----------------
train Loss: 0.0344, Accuracy: 34.90%
val Loss: 0.0346, Accuracy: 34.16%
Completed in 22m 35s


### VisionTransformer 모델 평가를 위한 전처리

In [None]:
test_dataset = ImageFolder(root='/content/drive/MyDrive/Colab Notebooks/splitted/test', transform=vit_transform_base)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

### Vision Transformer 모델 성능 평가

In [None]:
vit_model = torch.load('vit_model.pt')
vit_model.eval()
test_loss, test_accuracy = vit_evaluate(vit_model, test_loader)
print('ViT test acc: ',test_accuracy)