In [1]:
from torch.utils.data import Dataset, DataLoader, TensorDataset
import numpy as np
import torch
import pandas as pd

In [2]:
def load_pulsar_dataset(adjust_ratio):
    df = pd.read_csv('data/chap02/pulsar_stars.csv')
    pulsars = np.asarray(df[df['target_class'] == 1])
    stars = np.asarray(df[df['target_class'] == 0])

    input_cnt, output_cnt = 8, 1

    star_cnt, pulsar_cnt = len(stars), len(pulsars)
    if adjust_ratio:
        data = np.zeros([2 * star_cnt, 9])
        data[0:star_cnt, :] = np.asarray(stars, dtype='float32')
        for n in range(star_cnt):
            data[star_cnt + n] = np.asarray(pulsars[n % pulsar_cnt], dtype='float32')
    else:
        data = np.zeros([star_cnt + pulsar_cnt, 9])
        data[0:star_cnt, :] = np.asarray(stars, dtype='float32')
        data[star_cnt:, :] = np.asarray(pulsars, dtype='float32')

    return data

In [21]:
data = load_pulsar_dataset(1)

In [22]:
data.shape

(32518, 9)

In [23]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    data[:,:-1], data[:,-1], test_size=0.2, random_state=42)

In [24]:
trn = TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
train_loader = DataLoader(trn, batch_size = 512, shuffle=True)

trn = TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test))
test_loader = DataLoader(trn, batch_size = 512, shuffle=True)

In [25]:
class NeuralNet(torch.nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.linear_1 = torch.nn.Linear(self.input_size, self.hidden_size, bias=True)
        
    def forward(self, input_tensor):
        linear1 = self.linear_1(input_tensor)
        out = torch.sigmoid(linear1)
        
        return out
        

In [26]:
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
print("Training on {}".format(DEVICE))

Training on cuda


In [27]:
model        = NeuralNet(8, 1).to(DEVICE)
optimizer    = torch.optim.SGD(model.parameters(), lr=0.01)
criterion    = torch.nn.BCELoss() 

In [28]:
def train(model, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output.squeeze(), target)
        loss.backward()
        optimizer.step()

        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


# ## 테스트하기

def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)

            # 배치 오차를 합산
            test_loss += criterion(output.squeeze(), target).item()

            # 가장 높은 값을 가진 인덱스가 바로 예측값
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

In [29]:
EPOCHS = 30

for epoch in range(1, EPOCHS + 1):
    train(model, train_loader, optimizer, epoch)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Test Loss: {:.4f}, Accuracy: {:.2f}%'.format(
          epoch, test_loss, test_accuracy))

[1] Test Loss: 0.0037, Accuracy: 51.11%
[2] Test Loss: 0.0030, Accuracy: 51.11%
[3] Test Loss: 0.0014, Accuracy: 51.11%
[4] Test Loss: 0.0172, Accuracy: 51.11%
[5] Test Loss: 0.0164, Accuracy: 51.11%
[6] Test Loss: 0.0016, Accuracy: 51.11%
[7] Test Loss: 0.0025, Accuracy: 51.11%
[8] Test Loss: 0.0018, Accuracy: 51.11%
[9] Test Loss: 0.0025, Accuracy: 51.11%
[10] Test Loss: 0.0011, Accuracy: 51.11%
[11] Test Loss: 0.0027, Accuracy: 51.11%
[12] Test Loss: 0.0017, Accuracy: 51.11%
[13] Test Loss: 0.0057, Accuracy: 51.11%
[14] Test Loss: 0.0010, Accuracy: 51.11%
[15] Test Loss: 0.0063, Accuracy: 51.11%
[16] Test Loss: 0.0023, Accuracy: 51.11%
[17] Test Loss: 0.0010, Accuracy: 51.11%
[18] Test Loss: 0.0021, Accuracy: 51.11%
[19] Test Loss: 0.0009, Accuracy: 51.11%
[20] Test Loss: 0.0035, Accuracy: 51.11%
[21] Test Loss: 0.0020, Accuracy: 51.11%
[22] Test Loss: 0.0035, Accuracy: 51.11%
[23] Test Loss: 0.0073, Accuracy: 51.11%
[24] Test Loss: 0.0016, Accuracy: 51.11%
[25] Test Loss: 0.0009, A