In [1]:
import torch
import torch.nn as nn

class PatchEmbedding(nn.Module):
    def __init__(self, in_channels=3, patch_size=16, emb_size=768):
        super().__init__()
        self.patch_size = patch_size
        self.proj = nn.Conv2d(in_channels, emb_size, kernel_size=patch_size, stride=patch_size)

    def forward(self, x):
        x = self.proj(x)
        (B, C, H, W) = x.shape
        x = x.view(B, C, H * W)
        x = x.permute(0, 2, 1)
        return x

class VisionTransformer(nn.Module):
    def __init__(self, in_channels=3, patch_size=16, emb_size=768, img_size=224, num_classes=1000, num_layers=12):
        super().__init__()

        self.patch_embedding = PatchEmbedding(in_channels, patch_size, emb_size)
        num_patches = (img_size // patch_size) ** 2

        self.cls_token = nn.Parameter(torch.randn(1, 1, emb_size))
        self.position_embedding = nn.Parameter(torch.randn(1, num_patches + 1, emb_size))
        self.dropout = nn.Dropout(0.1)

        # batch_first=True 옵션 추가
        encoder_layer = nn.TransformerEncoderLayer(emb_size, 8, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
        
        self.mlp_head = nn.Sequential(
            nn.Linear(emb_size, emb_size),
            nn.ReLU(),
            nn.Linear(emb_size, num_classes)
        )

    def forward(self, x):
        x = self.patch_embedding(x)
        B, _, _ = x.shape

        cls_tokens = self.cls_token.expand(B, -1, -1)
        x = torch.cat([cls_tokens, x], dim=1)

        x = x + self.position_embedding
        x = self.dropout(x)

        x = self.transformer(x)

        x = x.mean(dim=1)
        x = self.mlp_head(x)

        return x

# Model 생성 및 예시 입력 통과
model = VisionTransformer()
x = torch.rand(1, 3, 224, 224)
print(model(x).shape)

torch.Size([1, 1000])


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


# GPU 사용 가능한지 확인
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 데이터 전처리
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# 데이터 로더 설정
train_dataset = datasets.ImageFolder(root='C:\\Users\\Jinho\\Downloads\\20231007\\CYH13\\BottomSideWall\\train', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# VisionTransformer 모델 정의 (이전 코드에서 정의되었다고 가정)
model = VisionTransformer()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


Using device: cpu


In [3]:

# 학습 루프
num_epochs = 10
print_every = 10  # 10번의 미니배치마다 출력

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)  # 데이터를 GPU로 이동

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        # 손실률과 정확도 계산
        total_loss += loss.item() * data.size(0)
        _, predicted = output.max(1)
        correct += predicted.eq(target).sum().item()

        # 실시간 정보 출력
        if batch_idx % print_every == 0:
            current_loss = loss.item()
            current_accuracy = 100. * correct / ((batch_idx + 1) * train_loader.batch_size)
            print(f"Epoch {epoch}/{num_epochs} | Batch {batch_idx}/{len(train_loader)} | Current Loss: {current_loss:.4f} | Cumulative Accuracy: {current_accuracy:.2f}%")

    # 에포크별 평균 손실과 정확도
    avg_loss = total_loss / len(train_loader.dataset)
    accuracy = 100. * correct / len(train_loader.dataset)

    print(f"Epoch {epoch}/{num_epochs} Finished. Average Loss: {avg_loss:.4f} | Overall Accuracy: {accuracy:.2f}%")
    print("="*50)

print("Training finished.")

Epoch 0/10 | Batch 0/26 | Current Loss: 6.6568 | Cumulative Accuracy: 0.00%
Epoch 0/10 | Batch 10/26 | Current Loss: 0.7721 | Cumulative Accuracy: 50.00%
Epoch 0/10 | Batch 20/26 | Current Loss: 0.8478 | Cumulative Accuracy: 52.23%
Epoch 0/10 Finished. Average Loss: 1.1739 | Overall Accuracy: 51.05%
Epoch 1/10 | Batch 0/26 | Current Loss: 0.7036 | Cumulative Accuracy: 62.50%
Epoch 1/10 | Batch 10/26 | Current Loss: 0.6967 | Cumulative Accuracy: 55.40%
Epoch 1/10 | Batch 20/26 | Current Loss: 0.7711 | Cumulative Accuracy: 53.27%
Epoch 1/10 Finished. Average Loss: 0.7074 | Overall Accuracy: 54.38%
Epoch 2/10 | Batch 0/26 | Current Loss: 0.6996 | Cumulative Accuracy: 43.75%
Epoch 2/10 | Batch 10/26 | Current Loss: 0.7604 | Cumulative Accuracy: 52.84%
Epoch 2/10 | Batch 20/26 | Current Loss: 0.6910 | Cumulative Accuracy: 53.57%
Epoch 2/10 Finished. Average Loss: 0.7160 | Overall Accuracy: 54.87%
Epoch 3/10 | Batch 0/26 | Current Loss: 0.6837 | Cumulative Accuracy: 59.38%
Epoch 3/10 | Batch