In [5]:
import numpy as np
import csv
import time

np.random.seed(1234) # Seed fix 

def randomize():
    np.random.seed(time.time())  #use this method if we need to reset the random
    
    
#Hyper parameters 
RND_MEAN = 0
RND_STD = 0.0030 
LEARNING_RATE= 0.001 


In [6]:
def abalone_exec_main(epoch_count=10, mb_size=10, report=1):
    load_abalone_dataset() #Read data set 
    init_model()
    train_and_test(epoch_count, mb_size, report)
    

In [7]:
def load_abalone_dataset():
    file_path = './abalone.csv'
    with open(file_path) as csvfile:
        csvreader=csv.reader(csvfile) #read a file 
        next(csvreader, None) # Skip the first line 
        rows = [] #rows list 
        for row in csvreader:
            rows.append(row)  #keep adding .. 
            
        global data, input_cnt, output_cnt
        input_cnt, output_cnt = 10, 1  #input vec size, output vec size 
        
        
        #print (len(rows))
        #print (input_cnt+output_cnt)
        # num of data from csv * num feature + output   = 4177 * 11 matrix 
        
        data = np.zeros([len(rows), input_cnt+output_cnt])
        
        #one hot vectorize for Sex : M,F,I 
        for n,row in enumerate(rows):
            if row[0] == 'I': 
                data[n,0] = 1 
            elif row[0] == 'M':
                data[n,1] = 1
            elif row[0] == 'F':
                data[n,2] = 1 
            #data 3rd colum to end will be filled with csv 1st colum (without sex) to end 
            data[n, 3:] = row[1:]
                    
        #print(rows)
        print("오오 잘된다")
        print(data)
        
        
        

            
        

In [8]:
load_abalone_dataset()

오오 잘된다
[[ 0.      1.      0.     ...  0.101   0.15   15.    ]
 [ 0.      1.      0.     ...  0.0485  0.07    7.    ]
 [ 0.      0.      1.     ...  0.1415  0.21    9.    ]
 ...
 [ 0.      1.      0.     ...  0.2875  0.308   9.    ]
 [ 0.      0.      1.     ...  0.261   0.296  10.    ]
 [ 0.      1.      0.     ...  0.3765  0.495  12.    ]]


In [9]:
def init_model():
    global weight,bias, input_cnt, output_cnt
    weight = np.random.normal(RND_MEAN, RND_STD, [input_cnt, output_cnt])
    bias = np.zeros(output_cnt)
    
    print(weight)
    print(bias)

In [10]:
init_model()

[[ 1.41430549e-03]
 [-3.57292708e-03]
 [ 4.29812091e-03]
 [-9.37955688e-04]
 [-2.16176620e-03]
 [ 2.66148882e-03]
 [ 2.57876524e-03]
 [-1.90957051e-03]
 [ 4.70891163e-05]
 [-6.72805486e-03]]
[0.]


In [11]:
#Run training and evaluation function 
def train_and_test(epoch_count, mb_size, report):
    step_count = arrange_data(mb_size)
    test_x, test_y = get_test_data() 
    
    for epoch in range(epoch_count):
        losses, accs= [],[]
        
        for n in range(step_count): 
            train_x, train_y = get_train_data(mb_size, n)
            loss, acc = run_train(train_x, train_y)
            
            #add loss to losses 
            losses.append(loss)
            #add accuracy to accuracies 
            acces.append(acc)
            
        if report > 0 and (epoch+1) % report == 0:
            acc = run_test(test_x, test_y)
            #printout 5width , point 100th digits
            print('Epoch {}: loss={:5.3f}, accuracy={:5.3f}/{:5.3f}'.format(epoch+1, np.mean(losses), np.mean(accs),acc))
    
    final_acc = run_test(test_x, test_y)
    print('')
    print('Final Test: final accuracy = {:5.3f}'.format(final_acc))

In [14]:
#shuffle and divide data set into two (training, and evaluation)

def arrange_data(mb_size):
    global data, shuffle_map, test_begin_idx
    
    #print(data.shape[0])
    shuffle_map = np.arange(data.shape[0])   # Y.shape is (n,m) , Y,shape[0] is n , arange 
    #0,1,2,...4177
    
    print(shuffle_map)
    np.random.shuffle(shuffle_map)
    
    #shuffle it !
    print(shuffle_map)
    
    step_count = int(data.shape[0] * 0.8) // mb_size
    test_begin_idx = step_count * mb_size 
    
    
    #print(step_count)
    #print(test_begin_idx)
    
    return step_count 
    
    
    

In [15]:
arrange_data(10)

[   0    1    2 ... 4174 4175 4176]
[3108 2694 3185 ... 3126 3356 3900]
334
3340


334

In [16]:
def get_test_data():
    global data, shuffle_map, test_begin_idx, output_cnt
    
    test_data = data[shuffle_map[test_begin_idx:]]  # Test data -> starting from 33410, data.
    print(test_data)
    
    return test_data[:, :-output_cnt], test_data[:, -output_cnt:] 
    

In [18]:
def get_train_data(mb_size, nth):
    global data, shuffle_map, test_begin_idx, output_cnt
    if nth == 0: #if this is first epoch call , nth ==0, 
        np.random.shuffle(shuffle_map[:test_begin_idx])
        print(shuffle_map[:test_begin_idx])
    
    train_data = data[shuffle_map[mb_size*nth: mb_size*(nth+1)]]
    return train_data[:,:-output_cnt], train_data[:, -output_cnt:]

    

In [19]:
get_train_data(10,0)

[ 756  665 4144 ... 3445 3154 1864]


(array([[0.    , 1.    , 0.    , 0.615 , 0.525 , 0.155 , 1.1375, 0.367 ,
         0.236 , 0.37  ],
        [0.    , 0.    , 1.    , 0.395 , 0.295 , 0.095 , 0.2245, 0.078 ,
         0.054 , 0.08  ],
        [0.    , 1.    , 0.    , 0.67  , 0.535 , 0.19  , 1.669 , 0.7465,
         0.2935, 0.508 ],
        [1.    , 0.    , 0.    , 0.54  , 0.42  , 0.14  , 0.6275, 0.2505,
         0.1175, 0.235 ],
        [0.    , 0.    , 1.    , 0.615 , 0.515 , 0.135 , 1.1215, 0.545 ,
         0.2305, 0.29  ],
        [1.    , 0.    , 0.    , 0.37  , 0.275 , 0.14  , 0.2215, 0.097 ,
         0.0455, 0.0615],
        [0.    , 0.    , 1.    , 0.5   , 0.375 , 0.115 , 0.5945, 0.185 ,
         0.148 , 0.19  ],
        [0.    , 1.    , 0.    , 0.55  , 0.41  , 0.13  , 0.8705, 0.4455,
         0.2115, 0.213 ],
        [0.    , 1.    , 0.    , 0.5   , 0.375 , 0.15  , 0.636 , 0.2535,
         0.145 , 0.19  ],
        [0.    , 1.    , 0.    , 0.6   , 0.46  , 0.155 , 0.9595, 0.4455,
         0.189 , 0.295 ]]), array([[

In [20]:
def run_train(x,y):
    output, aux_nn = forward_neuralnet(x)  #output = martix * weight + b , aux_nn is original x 
    loss, aux_pp = forward_postproc(output,y) #loss -> MSE result , aux_pp -> differences
    #aux_nn , aux_pp keep these information for back propagation
    
    accuracy = eval_accuracy(output,y)
    
    G_loss =1.0  #loss gradient = 1 
    G_output = backprop_postproc(G_loss, aux_pp)
    backprop_neuralnet(G_output, aux_nn)
    
    return loss, accuracy 
    

In [21]:
def run_test(x,y):
    output, _ = forward_neuralnet(x)
    accuracy = eval_accuracy(output,y)
    return accuracy 

In [23]:
def forward_neuralnet(x):
    global weight, bias
    output = np.matmul(x,weight) + bias  # input matrix * weight and + b 
    return output, x 

In [24]:
def forward_postproc(output, y):
    diff = output - y
    square = np.square(diff) # diff ^2 
    loss = np.mean(square)   # MSE 
    return loss, diff 

In [None]:
def backpro_postproc(G_loss, diff):
    shape = diff.shape  #tuple (n,m)
    
    g_loss_square = np.ones(shape) / np.prod(shape)
    g_square_diff = 2* diff
    g_diff_output = 1
    
    G_square = g_loss_square * G_loss
    G_diff = g_square_diff * Gsquare
    G_output = g_diff_output * G_diff 
    
    return G_output 
    

In [None]:
def backprop_neuralnet(G_output, x):
    global weight, bias 
    g_output_w = x.transpose()
    
    G_w = np.matmul(g_output_w, G_output)
    G_b = np.sum(G_output, axis=0)
    
    weight -= LEARNING_RATE * G_w
    bias -= LEARNING_RATE * G_b