In [None]:
import pandas as pd

In [None]:
result = pd.read_csv('./padded_encoded_df.csv', usecols=[1,2])
data = pd.DataFrame(result)
data.head()

# feature, label 분리

In [None]:
feature_df = data[['encoded_lyrics']]
label_df = data[['genre']]

# train, test    DataLoader 생성

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(feature_df, label_df, stratify=label_df, test_size=0.2, random_state=42)

In [None]:
train_data = pd.concat([X_train, y_train], axis=1)
test_data = pd.concat([X_test, y_test], axis = 1)
train_data.head()

In [None]:
train_data.shape

In [None]:
test_data.head()

# 텐서로 변환

In [None]:
from torch.utils.data import DataLoader

BATCH = 20

train_loader = DataLoader(train_data, batch_size = BATCH, shuffle = True)
test_loader = DataLoader(test_data, batch_size = BATCH, shuffle = True)

# 모델 설계

In [None]:
import torch.nn as nn
class music_rnn(nn.Module):
    def __init__(self, VOCAB_SIZE, EMBED_DIM, HIDDEN_SIZE, NUM_CLASS):
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings=VOCAB_SIZE, embedding_dim=EMBED_DIM)
        self.rnn = nn.RNN(input_size=EMBED_DIM, hidden_size=HIDDEN_SIZE)
        # 학습 모델을 RNN 대신에 LSTM을 사용해볼수도 있겠다
        # RNN에서 양방향 학습 파라미터를 설정해볼 수 도 있겠다.
        
        # 활성화함수 => 다중 분류니까
        # self.fc = nn.Softmax(HIDDEN_SIZE, NUM_CLASS) # 이렇게 쓰는게아닌가?
        self.fc = nn.Linear(HIDDEN_SIZE, NUM_CLASS) # 다중 분류인데 이게 되나?
        self.init_weights() # 이건 왜 쓴거지?
        self.dropout = nn.Dropout()  # 혹시 모를 과대적합을 위해서
        
    # 가중치 초기화
    def init_weights(self):
        range = 0.5
        self.embedding.weight.data.uniform_(-range, range)
        self.fc.weight.data.uniform_(-range, range)
        self.fc.bias.data.zero_()
        
    # 순방향 학습 진행
    def forward(self, text, offsets):
        embed = self.embedding(text, offsets)
        output, hidden = self.rnn(embed)
        result = self.fc(output)
        return result

In [None]:
# 학습용 변수들 지정
from torch import optim
import torch
import torch.nn as nn
import torch.nn.functional as F
VOCAB_SIZE = 13714
EMBED_DIM = 50
HIDDEN_SIZE = 10 
NUM_CLASS = 8
model = music_rnn(VOCAB_SIZE, EMBED_DIM, HIDDEN_SIZE, NUM_CLASS)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
loss_fn = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(params = model.parameters())
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer)

# 함수 지정 (train, test)

In [None]:
# !pip install torchmetrics

In [None]:
# 데이터 로더를 텐서화 시키기

In [None]:
train_tensor = torch.Tensor(train_loader)

In [None]:
import torchmetrics.functional.classification as metrics
def training(DATALOADER):
    model.train()
    output_list = [[], [], [], [], []]
    # loss, acc, precision, recall, f1_score
    for (feature, label) in DATALOADER:
        train_feature = feature.to(device)
        train_label = label.to(device)
        pred = model(train_feature)
        
        # 손실 검정
        loss = loss_fn(pred, train_label) 
        output_list[0].append(loss)
        output_list[1].append(metrics.accuracy(pred, train_label, task = 'multiclass'))
        output_list[2].append(metrics.precision(pred, train_label, task = 'multiclass'))
        output_list[3].append(metrics.recall(pred, train_label, task = 'multiclass'))
        output_list[4].append(metrics.f1_score(pred, train_label, task = 'multiclass'))
        
        # 업데이트
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    return output_list