In [1]:
import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pickle
from torch.utils.data import WeightedRandomSampler

In [4]:
mapping = {
    "경계": [1,0,0,0],
    "주의": [0,1,0,0],
    "보통": [0,0,1,0],
    "낮음": [0,0,0,1]
}

def calculate_weights_for_dataset(dataset):
    class_counts = [0] * 4
    for _, label in dataset:
        class_counts[label] += 1

    class_weights = [1.0 / count for count in class_counts]

    weights = [class_weights[label] for _, label in dataset]
    return weights

class CustomDataset(Dataset):
    def __init__(self, file_path):
        df = pd.read_excel(file_path)

        self.x = []
        self.mean = []
        self.std = []
        for i in range(2, 6+1):
            col = df.iloc[:, i].values
            self.mean.append(col.mean())
            self.std.append(col.std())
            self.x.append((col - col.mean())/col.std())
        
        self.y = df.iloc[:, 0].map(mapping).values
        self.length = len(df)

    def __getitem__(self, index):
        x = torch.FloatTensor([self.x[0][index], self.x[1][index], self.x[2][index], self.x[3][index], self.x[4][index]])
        y = torch.LongTensor([np.argmax(self.y[index])])
        #y = torch.FloatTensor(self.y[index])
        return x, y

    def __len__(self):
        return self.length

In [5]:
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        self.layer = nn.Sequential(
            nn.Linear(5, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(32, 16),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(16, 4)
        )

    def forward(self, x):
        x = self.layer(x)
        return x

In [6]:
train_dataset = CustomDataset("./dataset/dataset.xlsx")
weights = calculate_weights_for_dataset(train_dataset)
sampler = WeightedRandomSampler(weights, len(train_dataset))
train_dataloader = DataLoader(train_dataset, batch_size=64, sampler=sampler)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CustomModel().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(500):
    cost = 0.0

    for x, y in train_dataloader:
        x = x.to(device)
        y = y.to(device)
        
        output = model(x)
        loss = criterion(output, y.squeeze())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        cost += loss.item()

    cost = cost / len(train_dataloader)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch : {epoch+1:4d}, Cost : {cost:.3f}")

In [26]:
test_dataset = CustomDataset("./dataset/test.xlsx")
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=True, drop_last=True)

correct = 0
total = 0
ans=[0,0,0,0]
wrong=[0,0,0,0]
with torch.no_grad():
    model.eval()
    for x, y in test_dataloader:
        x = x.to(device)
        y = y.to(device)
        output = model(x)
        _, predicted = torch.max(output.data, 1)
        total += y.size(0)
        correct += (predicted == y.squeeze()).sum().item()
        for i in range(4):
            ans[i] += ((predicted == i) & (y.squeeze() == i)).sum().item()
            wrong[i] += ((predicted != i) & (y.squeeze() == i)).sum().item()

# Calculate accuracy
accuracy = correct / total
print(f"Test Accuracy: {accuracy:.4f}, total: {total}, correct: {correct}")
for i in range(4):
    print(f"{i} Accuracy: {ans[i]/(ans[i]+wrong[i]):.4f}, total: {ans[i]+wrong[i]}, correct: {ans[i]}")

Test Accuracy: 0.8387, total: 5760, correct: 4831
0 Accuracy: 0.8025, total: 81, correct: 65
1 Accuracy: 0.7708, total: 144, correct: 111
2 Accuracy: 0.8603, total: 544, correct: 468
3 Accuracy: 0.8389, total: 4991, correct: 4187


In [23]:
torch.save(model.state_dict(), 'model_state_dict.pth')

In [24]:
with open('mean_std.pkl', 'wb') as f:
    pickle.dump((train_dataset.mean, train_dataset.std), f)