**라이브러리 호출**

In [36]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

**데이터셋 경로 지정 및 훈련과 테스트 용도로 분리**

In [37]:
df = pd.read_csv("../pytorch/data/diabetes.csv")
x = df[df.columns[:-1]]
y = df["Outcome"]

x = x.values
y = torch.tensor(y.values)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33)

**훈련과 테스트용 데이터를 정규화**

In [38]:
ms = MinMaxScaler()
ss = StandardScaler()

x_train = ss.fit_transform(x_train)
x_test = ss.fit_transform(x_test)
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)
y_train = ms.fit_transform(y_train)
y_test = ms.fit_transform(y_test)

**커스텀 데이터셋 생성**

In [39]:
class CustomDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
        self.len = len(self.x)
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.len

**데이터로더에 데이터 담기**

In [40]:
train_data = CustomDataset(torch.FloatTensor(x_train),
                           torch.FloatTensor(y_train))
test_data = CustomDataset(torch.FloatTensor(x_test),
                          torch.FloatTensor(y_test))

train_loader = DataLoader(dataset=train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=64, shuffle=False)

**네트워크 생성**

In [41]:
class BinaryClassification(nn.Module):
    def __init__(self):
        super(BinaryClassification, self).__init__()
        self.layer_1 = nn.Linear(8, 64, bias=True)
        self.layer_2 = nn.Linear(64, 64, bias=True)
        self.layer_out = nn.Linear(64, 1, bias=True)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1)
        self.batchnorm1 = nn.BatchNorm1d(64)
        self.batchnorm2 = nn.BatchNorm1d(64)
        
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.batchnorm1(x)
        x = self.relu(self.layer_2(x))
        x = self.batchnorm2(x)
        x = self.dropout(x)
        x = self.layer_out(x)
        return x

In [42]:
epoch_num = 100
print_epoch = 10
lr = 1e-2

model = BinaryClassification().to(device)
print(model)
BCE = nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

BinaryClassification(
  (layer_1): Linear(in_features=8, out_features=64, bias=True)
  (layer_2): Linear(in_features=64, out_features=64, bias=True)
  (layer_out): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.1, inplace=False)
  (batchnorm1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (batchnorm2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [43]:
def accuracy(y_pred, y_test):
    y_pred = torch.round(torch.sigmoid(y_pred))
    correct_results_sum = (y_pred == y_test).sum().float()
    acc = correct_results_sum / y_test.shape[0]
    acc = torch.round(acc * 100)
    return acc

In [44]:
for epoch in range(epoch_num):
    iteration_loss = 0
    iteration_acc = 0

    model.train()
    for i, data in enumerate(train_loader):
        X, y = data
        X, y = X.to(device), y.to(device)

        y_pred = model(X.float())
        loss = BCE(y_pred, y.reshape(-1, 1).float())

        iteration_loss += loss
        iteration_acc += accuracy(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % print_epoch == 0:
        print(f"Train: epoch: {epoch}", end="")
        print(
            f"  - loss: {iteration_loss / (i+1):.5f} - acc: {iteration_acc / (i+1):.3f}"
        )

    iteration_loss = 0
    iteration_acc = 0

    model.eval()
    for i, data in enumerate(test_loader):
        X, y = data
        X, y = X.to(device), y.to(device)

        y_pred = model(X.float())
        loss = BCE(y_pred, y.reshape(-1, 1).float())

        iteration_loss += loss
        iteration_acc += accuracy(y_pred, y)
    if epoch % print_epoch == 0:
        print(f"Test: epoch: {epoch}", end="")
        print(
            f"  - loss: {iteration_loss / (i+1):.5f} - acc: {iteration_acc / (i+1):.3f}"
        )

Train: epoch: 0  - loss: 0.69440 - acc: 50.667
Test: epoch: 0  - loss: 0.68268 - acc: 59.750
Train: epoch: 10  - loss: 0.59047 - acc: 70.444
Test: epoch: 10  - loss: 0.56403 - acc: 71.250
Train: epoch: 20  - loss: 0.48650 - acc: 80.111
Test: epoch: 20  - loss: 0.58176 - acc: 68.750
Train: epoch: 30  - loss: 0.47228 - acc: 76.778
Test: epoch: 30  - loss: 0.54246 - acc: 71.250
Train: epoch: 40  - loss: 0.46626 - acc: 76.333
Test: epoch: 40  - loss: 0.53605 - acc: 75.000
Train: epoch: 50  - loss: 0.44167 - acc: 77.000
Test: epoch: 50  - loss: 0.52236 - acc: 74.750
Train: epoch: 60  - loss: 0.41852 - acc: 82.333
Test: epoch: 60  - loss: 0.53753 - acc: 74.000
Train: epoch: 70  - loss: 0.42538 - acc: 76.667
Test: epoch: 70  - loss: 0.51109 - acc: 77.500
Train: epoch: 80  - loss: 0.49221 - acc: 77.222
Test: epoch: 80  - loss: 0.50122 - acc: 76.000
Train: epoch: 90  - loss: 0.46104 - acc: 76.889
Test: epoch: 90  - loss: 0.90118 - acc: 69.750
