In [46]:
import numpy as np
import csv

def load_pulsar_dataset():
  with open('/content/drive/My Drive/Colab Notebooks/pulsar_stars.csv') as csvfile:
    csvreader = csv.reader(csvfile)
    next(csvreader, None)
    rows = []
    for row in csvreader:
      rows.append(row)

  global data, input_cnt, output_cnt
  input_cnt, output_cnt = 8, 1
  data = np.zeros([len(rows), input_cnt+output_cnt])

  for n, row in enumerate(rows):
    #if row[0] == 'I': data[n, 0] = 1
    #if row[0] == 'M': data[n, 1] = 1
    #if row[0] == 'F': data[n, 2] = 1
    data[n, 1:] = row[1:]

def arrange_data(mb_size):
  global data, shuffle_map, test_begin_idx
  shuffle_map = np.arange(data.shape[0])
  np.random.shuffle(shuffle_map)
  step_count = int(data.shape[0] * 0.8) // mb_size
  test_begin_idx = step_count * mb_size
  return step_count

def get_test_data():
  global data, shuffle_map, test_begin_idx, output_cnt
  test_data = data[shuffle_map[test_begin_idx:]]
  return test_data[:, :-output_cnt], test_data[:, -output_cnt:]

def get_train_data(mb_size, nth):
  global data, shuffle_map, test_begin_idx, output_cnt
  if nth == 0:
    np.random.shuffle(shuffle_map[:test_begin_idx])
  train_data = data[shuffle_map[mb_size*nth:mb_size*(nth+1)]]
  return train_data[:, :-output_cnt], train_data[:, -output_cnt:]

def eval_accuracy(output, y):
  estimate = np.greater(output, 0.5)
  answer = np.greater(y, 0.5)
  correct = np.equal(estimate, answer)
  return np.mean(correct)
  #mdiff = np.mean(np.abs((output - y)/y))
  #return 1 - mdiff  #회귀분석에서의 정확도

In [47]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# Pytorch 형태의 네트워크 구조
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.fc1 = nn.Linear(8, 1) #입력이 8 출력이 1인 퍼셉트론
    self.sigm = nn.Sigmoid()

  def forward(self, x):
    x = self.fc1(x)
    x = self.sigm(x)
    return x

In [48]:
# hyperparameters
LEARNING_RATE = 0.001
epoch_count = 50
mb_size = 10

# dataset load
load_pulsar_dataset()
step_count = arrange_data(mb_size)
test_x, test_y = get_test_data()

In [49]:
# gpu check
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print("device:", device)

# model set (gpu)
model = Net().to(device)
print(model)

# SGD optimizer
#optimizer = optim.SGD(model.parameters(), lr = LEARNING_RATE)
optimizer = optim.Adam(model.parameters())

# MSE loss
loss_fn = nn.BCELoss()

for epoch in range(epoch_count):
  losses, accs = [], []

  # set train mode
  model.train()

  for n in range(step_count):
    # optimizer init
    optimizer.zero_grad()

    # get train data
    train_x, train_y = get_train_data(mb_size, n)

    # dataset to torch
    x = torch.from_numpy(train_x).float()
    y = torch.from_numpy(train_y).float()
    x, y = x.to(device), y.to(device)

    # forward
    y_pred = model(x)
    acc = eval_accuracy(y_pred.to('cpu').detach().numpy(), train_y)
    accs.append(acc)

    # loss
    loss = loss_fn(y_pred, y)

    # backprop
    loss.backward()
    losses.append(loss.item())

    # weight, bias 업데이트
    optimizer.step()

  # run test & eval
  model.eval()
  x = torch.from_numpy(test_x).float()
  x = x.to(device)
  y_pred = model(x)
  acc = eval_accuracy(y_pred.to('cpu').detach().numpy(), test_y)

  print('Epoch {}: loss={:5.3f}, accuracy={:5.3f}/{:5.3f}'. \
        format(epoch+1, np.mean(losses), np.mean(accs), acc))

device: cuda
Net(
  (fc1): Linear(in_features=8, out_features=1, bias=True)
  (sigm): Sigmoid()
)
Epoch 1: loss=1.527, accuracy=0.901/0.971
Epoch 2: loss=0.103, accuracy=0.971/0.972
Epoch 3: loss=0.099, accuracy=0.972/0.974
Epoch 4: loss=0.097, accuracy=0.973/0.978
Epoch 5: loss=0.096, accuracy=0.974/0.977
Epoch 6: loss=0.095, accuracy=0.974/0.978
Epoch 7: loss=0.093, accuracy=0.974/0.977
Epoch 8: loss=0.092, accuracy=0.974/0.978
Epoch 9: loss=0.091, accuracy=0.976/0.979
Epoch 10: loss=0.090, accuracy=0.975/0.979
Epoch 11: loss=0.089, accuracy=0.976/0.978
Epoch 12: loss=0.088, accuracy=0.976/0.980
Epoch 13: loss=0.089, accuracy=0.975/0.980
Epoch 14: loss=0.087, accuracy=0.976/0.977
Epoch 15: loss=0.088, accuracy=0.975/0.978
Epoch 16: loss=0.087, accuracy=0.976/0.977
Epoch 17: loss=0.086, accuracy=0.975/0.980
Epoch 18: loss=0.087, accuracy=0.975/0.982
Epoch 19: loss=0.086, accuracy=0.976/0.979
Epoch 20: loss=0.085, accuracy=0.976/0.979
Epoch 21: loss=0.085, accuracy=0.976/0.980
Epoch 22