# Import required Libraries

In [52]:
import numpy as np
import csv
import time 
# 난수 발생 패턴 고정
np.random.seed(1234)  
def randomize(): np.random(time.time())
    

# Set Hyperparameters

In [53]:
RND_MEAN = 0
RND_STD = 0.0030

learning_rate = 0.001

# Main Function

In [54]:
def abalone_exec(epoch_count=10, mb_size=10, report=1):
    # 아발로니 데이터셋 로드
    load_abalone_dataset() 
    # 모델 파라미터 초기화
    init_model() 
    # 학습 수행 
    train_and_test(epoch_count, mb_size, report) 

# Data Load Function

In [55]:
def load_abalone_dataset():
    with open('./data/abalone.csv') as csvfile:
        # csv는 엑셀처럼 생김
        csvreader = csv.reader(csvfile) 
        # 첫 행은 읽지 않고 넘김 
        next(csvreader, None) 
        rows = []
        # 행 단위로 데이터를 읽어옴
        for row in csvreader:  
            rows.append(row)
    # global 변수 선언 
    global data, input_cnt, output_cnt 
    # input의 크기와 output의 크기 지정(성별을 3개의 원핫벡터로 표현하므로 인풋 특성이 총 10개가 됨)
    input_cnt, output_cnt = 10, 1 
    data = np.zeros([len(rows), input_cnt+output_cnt])
    
    for n, row in enumerate(rows):
        # 성별을 원핫벡터로 표현 
        if row[0] == 'I': data[n, 0] = 1 
        if row[0] == 'M': data[n, 1] = 1
        if row[0] == 'F': data[n, 2] = 1
        # 매 행의 0, 1, 2인덱스는 성별을 나타냄, 3부터는 원래 있던 데이터 복붙 
        data[n, 3:] = row[1:]  
        

# Reset parameters Function

In [56]:
def init_model():
    global weight, bias, input_cnt, output_cnt
    weight = np.random.normal(RND_MEAN, RND_STD, [input_cnt, output_cnt])
    bias = np.zeros([output_cnt])

# Train Function

In [57]:
def train_and_test(epoch_count, mb_size, report):
    step_count = arrange_data(mb_size)
    test_x, test_y = get_test_data()
    
    for epoch in range(epoch_count):
        losses, accs = [], []
        
        for n in range(step_count):
            train_x, train_y = get_train_data(mb_size, n)
            loss, acc = run_train(train_x, train_y)
            losses.append(loss)
            accs.append(acc)
            
        if report > 0 and (epoch+1) % report == 0:
            acc = run_test(test_x, test_y)
            print('Epoch {}: loss={:5.3f}, accuracy={:5.3f}/{:5.3f}'.format(epoch+1, np.mean(loss), np.mean(accs), acc))
            
    final_acc = run_test(test_x, test_y)
    print('\n final test: final accuracy = {:5.3f}'.format(final_acc))

# Get data Function

In [58]:
# 테스트할 데이터와 학습할 데이터를 나누는 함수 
def arrange_data(mb_size):
    global data, shuffle_map, test_begin_idx 
    shuffle_map = np.arange(data.shape[0])
    np.random.shuffle(shuffle_map)
    step_count = int(data.shape[0] * 0.8) // mb_size
    test_begin_idx = step_count * mb_size
    return step_count

# 테스트 데이터를 받아옴 
def get_test_data():
    global data, shuffle_map, test_begin_idx, output_cnt
    test_data = data[shuffle_map[test_begin_idx:]] # 테스트를 시작할 데이터부터 가져옴 
    return test_data[:, :-output_cnt], test_data[:, -output_cnt]

# 학습할 데이터를 받아옴 
def get_train_data(mb_size, nth):
    global data, shuffle_map, test_begin_idx, output_cnt
    if nth == 0:
        np.random.shuffle(shuffle_map[:test_begin_idx])
    train_data = data[shuffle_map[mb_size*nth:mb_size*(nth+1)]] # mb_size 만큼의 데이터만 가져옴 
    return train_data[:, :-output_cnt], train_data[:, -output_cnt:]


# Run Function

In [59]:
def run_train(x, y):
    output, aux_nn = forward_neural(x)
    loss, aux_pp = forward_post(output, y)
    accuracy = eval_accuracy(output, y)
    
    G_loss = 1.0 
    G_output = backprop_post(G_loss, aux_pp)
    backprop_neural(G_output, aux_nn)
    
    return loss, accuracy

def run_test(x, y):
    output, _ = forward_neural(x)
    accuracy = eval_accuracy(output, y)
    return accuracy

# Forward & Backward Propagation Function

In [60]:
def forward_neural(x):
    global weight, bias
    output = np.matmul(x, weight) + bias
    return output, x

def backprop_neural(G_output, x):
    global weight, bias
    g_output_w = x.transpose()
    
    G_w = np.matmul(g_output_w, G_output)
    G_b = np.sum(G_output, axis=0)
    
    weight -= learning_rate * G_w
    bias -= learning_rate * G_b
    
def forward_post(output, y):
    diff = output - y
    square = np.square(diff)
    loss = np.mean(square)
    return loss, diff

def backprop_post(G_loss, diff):
    shape = diff.shape
    
    g_loss_square = np.ones(shape) / np.prod(shape)
    g_square_diff = 2 * diff
    g_diff_output = 1
    
    G_square = g_loss_square * G_loss
    G_diff = g_square_diff * G_square
    G_output = g_diff_output * G_diff
    
    return G_output

# Accuracy Function

In [61]:
def eval_accuracy(output, y):
    mdiff = np.mean(np.abs((output-y)/y))
    return 1 - mdiff

# Run

In [63]:
abalone_exec()
print(weight)
print(bias)

Epoch 1: loss=19.373, accuracy=0.555/0.683
Epoch 2: loss=14.653, accuracy=0.818/0.660
Epoch 3: loss=4.109, accuracy=0.810/0.658
Epoch 4: loss=4.825, accuracy=0.808/0.661
Epoch 5: loss=4.118, accuracy=0.808/0.663
Epoch 6: loss=2.944, accuracy=0.809/0.664
Epoch 7: loss=3.433, accuracy=0.808/0.666
Epoch 8: loss=13.314, accuracy=0.808/0.668
Epoch 9: loss=8.611, accuracy=0.807/0.671
Epoch 10: loss=6.699, accuracy=0.809/0.670

 final test: final accuracy = 0.670
[[1.01617685]
 [1.51142756]
 [1.63278597]
 [2.03414766]
 [1.62248277]
 [0.60801856]
 [2.42286624]
 [0.54287   ]
 [0.47943326]
 [1.05478035]]
[4.15802604]
