# 이진 판단: 천체의 펄서 여부 판정 신경망

In [11]:
# dataset : www.kaggle.com/pavanraj159/predicting-a-pulsar-star
# explanation : https://towering-payment-a36.notion.site/Chapter2-8d957bb3746d4bdd873eeca35f718303

In [12]:
%run Chapter_1.ipynb

Epoch 1: loss=33.875, accuracy=0.557/0.812
Epoch 2: loss=8.226, accuracy=0.820/0.814
Epoch 3: loss=7.582, accuracy=0.812/0.809
Epoch 4: loss=7.475, accuracy=0.808/0.811
Epoch 5: loss=7.395, accuracy=0.810/0.809
Epoch 6: loss=7.328, accuracy=0.808/0.810
Epoch 7: loss=7.269, accuracy=0.808/0.811
Epoch 8: loss=7.217, accuracy=0.808/0.812
Epoch 9: loss=7.175, accuracy=0.810/0.810
Epoch 10: loss=7.135, accuracy=0.809/0.810

Final Test: final accuracy = 0.810
[[1.02697603]
 [1.47450981]
 [1.66960135]
 [2.04468668]
 [1.62513525]
 [0.60292627]
 [2.39993815]
 [0.54107313]
 [0.46878034]
 [1.01969382]]
[4.16894769]
Epoch 20: loss=5.804, accuracy=0.825/0.831
Epoch 40: loss=5.259, accuracy=0.834/0.828
Epoch 60: loss=5.056, accuracy=0.837/0.838
Epoch 80: loss=4.950, accuracy=0.838/0.840
Epoch 100: loss=4.910, accuracy=0.840/0.826

Final Test: final accuracy = 0.826


## 구현 순서
- 메인 함수 정의
- 데이터 적재 함수 정의
- 이진 판단 후처리를 위한 순전파 및 역전파 함수 재정의
- 정확도 계산함수 재정의
- 시그모이드 관련 함수 정의

In [13]:
def pulsar_exec(epoch_count=10, mb_size=10, report=1):
    load_pulsar_dataset()
    init_model()
    train_and_test(epoch_count, mb_size, report)
    
def load_pulsar_dataset():
    with open('../jch/chap02/pulsar_stars.csv') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader, None)
        rows = []
        for row in csvreader:
            rows.append(row)
            
    global data, input_cnt, output_cnt
    input_cnt, output_cnt = 8, 1
    data = np.asarray(rows, dtype='float32')
    
def forward_postproc(output, y):
    entropy = sigmoid_cross_entropy_with_logits(y, output)
    loss = np.mean(entropy)
    return loss, [y, output, entropy]

def backprop_postproc(G_loss, aux):
    y, output, entropy = aux
    
    g_loss_entropy = 1.0 / np.prod(entropy.shape)
    g_entropy_output = sigmoid_cross_entropy_with_logits_derv(y, output)    
    
    G_entropy = g_loss_entropy * G_loss
    G_output = g_entropy_output * G_entropy
    
    return G_output

def eval_accuracy(output, y):
    estimate = np.greater(output, 0)
    answer = np.greater(y, 0.5)
    correct = np.equal(estimate, answer)
    
    return np.mean(correct)

def relu(x):
    return np.maximum(x, 0)

def sigmoid(x):
    return np.exp(-relu(-x)) / (1.0 + np.exp(-np.abs(x)))
        
def sigmoid_derv(x, y):
    return y * (1 - y)

def sigmoid_cross_entropy_with_logits(z, x):
    return relu(x) - x * z + np.log(1 + np.exp(-np.abs(x)))

def sigmoid_cross_entropy_with_logits_derv(z, x):
    return -z + sigmoid(x)

In [16]:
def pulsar_exec(epoch_count=10, mb_size=10, report=1, adjust_ratio=False):
    load_pulsar_dataset(adjust_ratio)
    init_model()
    train_and_test(epoch_count, mb_size, report)
    
def load_pulsar_dataset(adjust_ratio):
    pulsars, stars = [], []
    with open('../jch/chap02/pulsar_stars.csv') as csvfile:
        csvreader = csv.reader(csvfile)
        next(csvreader, None)
        rows = []
        for row in csvreader:
            if row[8] == '1': pulsars.append(row)
            else: stars.append(row)
            
    global data, input_cnt, output_cnt
    input_cnt, output_cnt = 8, 1
    
    star_cnt, pulsar_cnt = len(stars), len(pulsars)

    if adjust_ratio:
        data = np.zeros([2*star_cnt, 9])
        data[0:star_cnt, :] = np.asarray(stars, dtype='float32')
        for n in range(star_cnt):
            data[star_cnt+n] = np.asarray(pulsars[n % pulsar_cnt], dtype='float32')
    else:
        data = np.zeros([star_cnt+pulsar_cnt, 9])
        data[0:star_cnt, :] = np.asarray(stars, dtype='float32')
        data[star_cnt:, :] = np.asarray(pulsars, dtype='float32')
        
def eval_accuracy(output, y):
    est_yes = np.greater(output, 0)
    ans_yes = np.greater(y, 0.5)
    est_no = np.logical_not(est_yes)
    ans_no = np.logical_not(ans_yes)
    
    tp = np.sum(np.logical_and(est_yes, ans_yes))
    fp = np.sum(np.logical_and(est_yes, ans_no))
    fn = np.sum(np.logical_and(est_no, ans_yes))
    tn = np.sum(np.logical_and(est_no, ans_no))
    
    accuracy = safe_div(tp+tn, tp+tn+fp+fn)
    precision = safe_div(tp, tp+fp)
    recall = safe_div(tp, tp+fn)
    f1 = 2 * safe_div(recall*precision, recall+precision)
    
    return [accuracy, precision, recall, f1]

def safe_div(p, q):
    p, q = float(p), float(q)
    if np.abs(q) < 1.0e-20: return np.sign(p)
    return p / q

def train_and_test(epoch_count, mb_size, report):
    step_count = arrange_data(mb_size)
    test_x, test_y = get_test_data()
    
    for epoch in range(epoch_count):
        losses = []
        
        for n in range(step_count):
            train_x, train_y = get_train_data(mb_size, n)
            loss, _ = run_train(train_x, train_y)
            losses.append(loss)
            
        if report > 0 and (epoch+1) % report == 0:
            acc = run_test(test_x, test_y)
            acc_str = ','.join(['%5.3f']*4) % tuple(acc)
            print('Epoch {}: loss={:5.3f}, result={}'. \
                  format(epoch+1, np.mean(losses), acc_str))
            
    acc = run_test(test_x, test_y)
    acc_str = ','.join(['%5.3f']*4) % tuple(acc)
    print('\nFinal Test: final result = {}'.format(acc_str))

In [17]:
pulsar_exec()

Epoch 1: loss=14.104, result=0.932,0.591,0.869,0.704
Epoch 2: loss=12.449, result=0.973,0.958,0.746,0.839
Epoch 3: loss=11.660, result=0.973,0.951,0.752,0.840
Epoch 4: loss=10.214, result=0.948,0.675,0.851,0.753
Epoch 5: loss=12.217, result=0.953,0.994,0.504,0.669
Epoch 6: loss=11.340, result=0.970,0.857,0.821,0.838
Epoch 7: loss=11.239, result=0.961,0.758,0.860,0.806
Epoch 8: loss=10.970, result=0.972,0.968,0.722,0.827
Epoch 9: loss=11.369, result=0.970,0.838,0.836,0.837
Epoch 10: loss=10.798, result=0.949,1.000,0.451,0.621

Final Test: final result = 0.949,1.000,0.451,0.621


In [18]:
pulsar_exec(adjust_ratio=True)

Epoch 1: loss=41.395, result=0.850,0.996,0.709,0.828
Epoch 2: loss=37.350, result=0.916,0.925,0.909,0.917
Epoch 3: loss=37.621, result=0.886,0.855,0.936,0.893
Epoch 4: loss=35.285, result=0.516,0.513,1.000,0.678
Epoch 5: loss=34.152, result=0.908,0.990,0.829,0.902
Epoch 6: loss=33.929, result=0.916,0.919,0.916,0.917
Epoch 7: loss=33.684, result=0.918,0.982,0.856,0.914
Epoch 8: loss=36.195, result=0.925,0.967,0.883,0.923
Epoch 9: loss=34.916, result=0.853,0.798,0.953,0.869
Epoch 10: loss=37.392, result=0.876,0.831,0.951,0.887

Final Test: final result = 0.876,0.831,0.951,0.887


In [19]:
abalone_exec()

Epoch 1: loss=-4005.318, result=1.000,1.000,1.000,1.000
Epoch 2: loss=-12038.183, result=1.000,1.000,1.000,1.000
Epoch 3: loss=-20070.984, result=1.000,1.000,1.000,1.000
Epoch 4: loss=-28103.800, result=1.000,1.000,1.000,1.000
Epoch 5: loss=-36136.732, result=1.000,1.000,1.000,1.000
Epoch 6: loss=-44169.581, result=1.000,1.000,1.000,1.000
Epoch 7: loss=-52202.471, result=1.000,1.000,1.000,1.000
Epoch 8: loss=-60235.356, result=1.000,1.000,1.000,1.000
Epoch 9: loss=-68268.149, result=1.000,1.000,1.000,1.000
Epoch 10: loss=-76301.046, result=1.000,1.000,1.000,1.000

Final Test: final result = 1.000,1.000,1.000,1.000
