# dance - ballade 분류(RGG Ver)
- 프로젝트에 사용된 파일

In [95]:
# 모듈 로딩
import pandas as pd
import numpy as np

from PIL import Image
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, WeightedRandomSampler


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.nn.init as init
import matplotlib.pyplot as plt

# 데이터 불러오기

In [96]:
from torchvision.datasets import ImageFolder 
from torchvision.transforms import transforms
from PIL import Image

# 데이터셋 경로
train_root = "./data/train"
test_root = "./data/test"

# 전처리를 위한 변환
preprocessing = transforms.Compose([
    transforms.Resize((100, 100)),  # 이미지 크기 조정
    transforms.Grayscale(),  # 이미지를 흑백으로 변환
    transforms.ToTensor(),  # 텐서로 변환
])

# 데이터셋 로드
trainDS = ImageFolder(root=train_root, transform=preprocessing)
testDS = ImageFolder(root=test_root, transform=preprocessing)

In [97]:
trainDS.samples

[('./data/train\\ballade\\balad1.png', 0),
 ('./data/train\\ballade\\balad10.png', 0),
 ('./data/train\\ballade\\balad100.png', 0),
 ('./data/train\\ballade\\balad11.png', 0),
 ('./data/train\\ballade\\balad12.png', 0),
 ('./data/train\\ballade\\balad13.png', 0),
 ('./data/train\\ballade\\balad14.png', 0),
 ('./data/train\\ballade\\balad15.png', 0),
 ('./data/train\\ballade\\balad16.png', 0),
 ('./data/train\\ballade\\balad17.png', 0),
 ('./data/train\\ballade\\balad18.png', 0),
 ('./data/train\\ballade\\balad2.png', 0),
 ('./data/train\\ballade\\balad20.png', 0),
 ('./data/train\\ballade\\balad21.png', 0),
 ('./data/train\\ballade\\balad22.png', 0),
 ('./data/train\\ballade\\balad23.png', 0),
 ('./data/train\\ballade\\balad25.png', 0),
 ('./data/train\\ballade\\balad26.png', 0),
 ('./data/train\\ballade\\balad28.png', 0),
 ('./data/train\\ballade\\balad29.png', 0),
 ('./data/train\\ballade\\balad3.png', 0),
 ('./data/train\\ballade\\balad30.png', 0),
 ('./data/train\\ballade\\balad32.

## 데이터 셋이 잘 읽혔는지 확인

In [98]:
trainDS.classes, trainDS.class_to_idx

(['ballade', 'dance'], {'ballade': 0, 'dance': 1})

In [99]:
# valDS.classes, valDS.class_to_idx

In [100]:
testDS.classes, testDS.class_to_idx

(['ballade', 'dance'], {'ballade': 0, 'dance': 1})

In [101]:
# 비율 확인
len(trainDS), len(testDS)

(160, 20)

In [102]:
trainDS[0][0]

tensor([[[0.7255, 0.8510, 0.8549,  ..., 0.8510, 0.8510, 0.8431],
         [0.7333, 0.8627, 0.8627,  ..., 0.8627, 0.8627, 0.8549],
         [0.7333, 0.8627, 0.8627,  ..., 0.8627, 0.8627, 0.8549],
         ...,
         [0.7216, 0.8510, 0.8510,  ..., 0.6549, 0.6588, 0.6431],
         [0.7216, 0.8510, 0.8510,  ..., 0.6549, 0.6588, 0.6431],
         [0.7137, 0.8392, 0.8431,  ..., 0.6471, 0.6510, 0.6353]]])

In [104]:
# sampler -> 각 배치의 분포를 맞추기 위함 
# weights = make_weights(imgDS.targets, len(imgDS.classes))
weights = torch.ones(len(trainDS.targets), dtype=torch.float)
sampler = WeightedRandomSampler(weights, len(weights))

train_DL = DataLoader(trainDS, batch_size=20, shuffle=True)
test_DL = DataLoader(testDS, batch_size=20)  # 테스트 데이터 로더도 동일한 배치 크기를 설정합니다.


In [105]:
# max = 10
# for cnt, (_, label) in enumerate(val_DL):
#     print(f'batch별 target 분포\n{label.bincount()}\n')
#     if cnt > max:
#         break

In [106]:
len(train_DL), len(test_DL) # 배치 개수

(8, 1)

# 학습 준비

In [107]:
### ===> 딥러닝 모델을 설계할 때 활용하는 장비 확인
if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')
    
print('Using PyTorch version:', torch.__version__, ' Device:', DEVICE)

Using PyTorch version: 2.2.2  Device: cpu


In [120]:
# 데이터 모양... RNN이 뭔데 날 울려
sequence_length = 100 # 이미지 가로
input_size = 100 # 이미지 세로
hidden_size = 128
num_layers = 5
num_classes = 2
num_epochs = 10
learning_rate = 0.001

# RNN CLASS

In [124]:
# LSTM 클래스 정의 
# hidden : 닐라닐라바닐라 LSTM(강사님 피셜 이게 제일 기본)/LSTM/GRU(?) -> 종류에 따라 기능이 다른가?

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) # 배치가 앞으로 
        self.fc = nn.Linear(hidden_size, num_classes)
    
    # input x -> (BATCH, LENGTH, INPUT_SIZE)
    # 단방향
    def forward(self, x): 
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(DEVICE) # (BATCH SIZE, SEQ_LENGTH, HIDDEN_SIZE)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(DEVICE) # hidden state와 동일
        result, _ = self.lstm(x, (h0, c0))                                           # output : (BATCH_SIZE, SEQ_LENGTH, HIDDEN_SIZE)
        result = self.fc(result[:, -1, :])                                              # logit 
        return result

# 학습 및 평가

In [125]:
# 모델 할당 후 학습
model = RNN(input_size, hidden_size, num_layers, num_classes).to(DEVICE) # 

# 손실 함수와 옵티마이저 정의
criterion = nn.CrossEntropyLoss() # 분류
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# 학습

total_step = len(train_DL) # 배치 개수
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_DL):
       # print(images.shape, images.reshape(-1, sequence_length, input_size).shape)
       images = images.reshape(-1, sequence_length, input_size).to(DEVICE) # (BATCH, 1, 28, 28) -> (BATCH, 28, 28)
       labels = labels.to(DEVICE)

       # 순전파
       outputs = model(images)
       # print(torch.argmax(outputs, dim=1),len(torch.argmax(outputs, dim=1)))
       # print(labels)
       # print()
       loss = criterion(outputs, labels)

       # 역전파 & 최적화
       optimizer.zero_grad()
       loss.backward()
       optimizer.step()

       if (i+1) % 1 == 0: 
         print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(
             epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/10], Step [1/8], Loss: 0.6842
Epoch [1/10], Step [2/8], Loss: 0.6881
Epoch [1/10], Step [3/8], Loss: 0.6884
Epoch [1/10], Step [4/8], Loss: 0.6749
Epoch [1/10], Step [5/8], Loss: 0.7238
Epoch [1/10], Step [6/8], Loss: 0.7476
Epoch [1/10], Step [7/8], Loss: 0.7239
Epoch [1/10], Step [8/8], Loss: 0.6976
Epoch [2/10], Step [1/8], Loss: 0.6811
Epoch [2/10], Step [2/8], Loss: 0.6889
Epoch [2/10], Step [3/8], Loss: 0.7000
Epoch [2/10], Step [4/8], Loss: 0.6985
Epoch [2/10], Step [5/8], Loss: 0.6937
Epoch [2/10], Step [6/8], Loss: 0.6934
Epoch [2/10], Step [7/8], Loss: 0.6954
Epoch [2/10], Step [8/8], Loss: 0.6936
Epoch [3/10], Step [1/8], Loss: 0.6905
Epoch [3/10], Step [2/8], Loss: 0.6878
Epoch [3/10], Step [3/8], Loss: 0.7052
Epoch [3/10], Step [4/8], Loss: 0.7003
Epoch [3/10], Step [5/8], Loss: 0.7097
Epoch [3/10], Step [6/8], Loss: 0.6825
Epoch [3/10], Step [7/8], Loss: 0.7041
Epoch [3/10], Step [8/8], Loss: 0.6881
Epoch [4/10], Step [1/8], Loss: 0.6957
Epoch [4/10], Step [2/8],

# 모델 평가

In [123]:
model.eval() # Dropout, Batchnorm 등 실행 x
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_DL:
        images = images.reshape(-1, sequence_length, input_size).to(DEVICE)
        labels = labels.to(DEVICE)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1) # logit(확률)이 가장 큰 class index 반환
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    print('Accuracy: {} %'.format(100 * correct / total)) 


Accuracy: 50.0 %


In [112]:
torch.save(model.state_dict(), 'RNN.pth')