In [1]:
import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pickle

In [2]:
mapping = {
    "경계" : [1, 0, 0, 0],
    "주의": [0, 1, 0, 0],
    "보통": [0, 0, 1, 0],
    "낮음": [0, 0, 0, 1]
}

class CustomDataset(Dataset):
    def __init__(self, file_path):
        df = pd.read_excel(file_path)

        self.x = []
        self.mean = []
        self.std = []
        for i in range(2, 6+1):
            col = df.iloc[:, i].values
            self.mean.append(col.mean())
            self.std.append(col.std())
            self.x.append((col - col.mean())/col.std())
        
        self.y = df.iloc[:, 0].map(mapping).values
        self.length = len(df)

    def __getitem__(self, index):
        x = torch.FloatTensor([self.x[0][index], self.x[1][index], self.x[2][index], self.x[3][index], self.x[4][index]])
        # y = torch.LongTensor([np.argmax(self.y[index])])
        y = torch.FloatTensor(self.y[index])
        return x, y

    def __len__(self):
        return self.length

In [3]:
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        self.layer = nn.Sequential(
            nn.Linear(5, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 16),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(16, 4),
        )

    def forward(self, x):
        x = self.layer(x)
        return x

In [4]:
train_dataset = CustomDataset("./dataset/dataset.xlsx")
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=True)

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CustomModel().to(device)
criterion = nn.CrossEntropyLoss().to(device)
# optimizer = optim.Adam(model.parameters(), lr=0.01)
optimizer = optim.AdamW(model.parameters())

  _torch_pytree._register_pytree_node(


In [39]:
for epoch in range(50):
    cost = 0.0

    for x, y in train_dataloader:
        x = x.to(device)
        y_indices = torch.argmax(y, dim=1).to(device)
        y = y.to(device)
        
        output = model(x)
        loss = criterion(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        cost += loss.item()

    cost = cost / len(train_dataloader)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch : {epoch+1:4d}, Cost : {cost:.3f}")


Epoch :   10, Cost : 0.446
Epoch :   20, Cost : 0.440
Epoch :   30, Cost : 0.438
Epoch :   40, Cost : 0.436
Epoch :   50, Cost : 0.437


In [43]:
test_dataset = CustomDataset("./dataset/test.xlsx")
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=True, drop_last=True)

correct = 0
total = 0
with torch.no_grad():
    model.eval()
    for x, y in test_dataloader:
        x = x.to(device)
        y = y.to(device)
        output = model(x)
        _, predicted = torch.max(output.data, 1)
        total += y.size(0)
        correct += (predicted == torch.argmax(y, 1)).sum().item()  # y를 view를 사용하여 1차원으로 변환하여 정확성 계산

# Calculate accuracy
accuracy = correct / total
print(f"Test Accuracy: {accuracy:.2f}, total: {total}, correct: {correct}")

Test Accuracy: 0.87, total: 5760, correct: 4991


In [49]:
torch.save(model.state_dict(), 'model_state_dict.pth')

In [6]:
with open('mean_std.pkl', 'wb') as f:
    pickle.dump((train_dataset.mean, train_dataset.std), f)