# pre-processing

In [1]:
dic = {'곡류군': 0, '과일군': 1, '고지방 어육류군': 2, '중지방 어육류군': 3, '저지방 어육류군': 4, '우유군': 5, '저지방 우유군': 6,\
       '지방군': 7, '채소군': 8}

In [2]:
f = open("D:/Backup_data/jongkeun/Kangwon/samples/sample.csv", 'r', encoding='UTF8')
data = f.readlines()

# import

In [3]:
import time
import numpy as np
import tensorflow as tf
# warning 보이지 않게 설정
tf.logging.set_verbosity(tf.logging.ERROR)

# hyper parameter
- training_epochs: 학습 에폭, 1,000번 반복 학습하도록 설정
- batch_size: 배치 크기, 한 번에 20개의 데이터를 학습하도록 설정
- width: input의 가로 길이, 3
- height: input의 세로 길이, 1
- initial_rate: 초기 학습률, 0.0001
- label_len: 분류 레이블 크기(총 식품군의 종류), 9

In [4]:
training_epochs = 1000
batch_size = 20
width = 3
height = 1
initial_rate = 1e-3
label_len = 9

# dataset

- 탄수화물, 단백질, 지방 데이터를 정규화하여 배열로 저장 (shape=[1,3])
- 식품군에 따라 레이블 원핫으로 정의 (0~9)

In [None]:
_input = []
_label = []
label = '000000000'

"""
- csv 포맷으로 정의한 data를 line별로 읽어와 각 line을 [식품군, 탄수화물, 단백질, 지방]의 형태로 변환

 _class: 식품군
kcal: 에너지
carbo: 탄수화물
protein: 단백질
fat: 지방

- 탄수화물, 단백질, 지방 함량을 정규화 (세 가지의 총 합이 1이 되도록)

"""

for idx, line in enumerate(data) :
    
    info = line.split(',')
    
    _class = info[0]
    kcal = float(info[1])
    carbo = float(info[2])
    protein = float(info[3])
    fat = float(info[4][:-1])
    
    val = carbo+protein+fat
    _input.append([carbo/val, protein/val, fat/val])
    _label.append(list(label[:dic[_class]]+'1'+label[dic[_class]+1:]))
    

In [6]:
"""
- 정규화한 데이터를 합성곱 신경망에 맞게 shape 변환 => (총 데이터 수, 높이, 너비, 1)
- input으로 들어갈 땐, (1, 높이, 너비, 1)의 형태로 입력

- label 데이터도 위와 같은 포맷으로 변환 => (총 데이터 수, 레이블 길이(9))

"""

_input = np.reshape(_input, [-1, height*width])
_input = np.array(np.reshape(_input, [-1, height, width, 1]), dtype=np.float32)

_label = np.array(np.reshape(_label, [-1, label_len]), dtype=np.int32)

# feeding value
- 학습에 필요한 value 정의
- x: input data([탄수화물, 단백질, 지방])
- y_: label (식품군)
- lr: 학습률
- is_training (training과 evaluation 구분을 위한 boolean 값)

In [7]:
x = tf.placeholder(dtype=tf.float32, shape=[None, height, width, 1])
y_ = tf.placeholder(dtype=tf.float32, shape=[None, label_len])
lr = tf.placeholder(dtype=tf.float32)
is_training = tf.placeholder(dtype=tf.bool)

# layers
- 합성곱 신경망 구현한 부분
- 3개의 합성곱 계층과, 1개의 max pooling 계층, 1개의 FC 계층으로 구현(깊이와 구조는 임의로 설정)
- 활성화 함수는 ReLU를 사용
- dropout은 합성곱 계층에서 0.7, FC 계층에서 0.5로 설정
- 분류기는 softmax 사용
- optimizer는 Adam 사용

In [8]:
layer1 = tf.layers.conv2d(inputs=x, filters=8, kernel_size=[3,3], padding='SAME')
layer2 = tf.layers.conv2d(layer1, 128, [3,3], padding='SAME')
layer3 = tf.layers.conv2d(layer2, 128, [3,3], padding='SAME')
layer_ = tf.layers.max_pooling2d(inputs=layer3, pool_size=[3,3], strides=[1,1], padding='SAME')
layer_ = tf.layers.dropout(inputs=layer_, rate=0.7, training=is_training)

fc = tf.contrib.layers.flatten(inputs=layer_)
fc = tf.layers.dense(inputs=fc, units=256, activation=tf.nn.relu)
fc = tf.layers.dropout(fc, 0.5, is_training)

logits = tf.layers.dense(fc, label_len, activation=None)
pred = tf.nn.softmax(logits)
# reduce_mean: 배열 전체 원소의 합을 원소 개수로 나누어 계산
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y_))
train = tf.train.AdamOptimizer(lr).minimize(loss)
correct = tf.equal(tf.argmax(pred, 1), tf.argmax(y_, 1))
acc = tf.reduce_mean(tf.cast(correct, tf.float32))
label_output = tf.argmax(pred, 1)
label_input = tf.argmax(y_, 1)

# session

In [9]:
config = tf.ConfigProto(log_device_placement = True)
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess :
    coord = tf.train.Coordinator()
    thread = tf.train.start_queue_runners(sess=sess, coord=coord)
    
    sess.run(tf.global_variables_initializer())
    
    total_batch = int(len(_input) / batch_size)
    correct_list = []
    incorrect_list = []
    cost_list = []
    cost_sum_list = []
    
    total_acc = 0
    cost_sum = 0
    cost_flag = 0
    
    start_time = time.time()
    
    print("")
    
    for epoch in range(training_epochs) :
        total_cost = 0
                
        if epoch%10 == 9 :
            midpoint = int(time.time() - start_time)
        
            print("")
            print("=======================================================================================")
            print('{:03d}:{:02d}:{:02d}'.format(midpoint//3600, (midpoint%3600//60), midpoint%60))
            print("=======================================================================================")
            print("")
        
        batch_index = np.random.choice(len(_input), total_batch, replace=False)
                
        for i in range(total_batch) :                
            _, _loss = sess.run([train, loss], feed_dict={x: [_input[batch_index[i]]], y_: [_label[batch_index[i]]], 
                                                          is_training: True, lr: initial_rate})
            
            total_cost += _loss

        avg_cost = total_cost/total_batch
        print('Epoch : ', '%4d' % (epoch + 1), '    Avg. cost = ', '{:.4f}'.format(avg_cost))            
        
#         train_writer.add_summary(_merge, epoch)
               
    print("")
    print("=======================================================================================")
    print("================================     Training done     ================================")
    print("=======================================================================================")
    print("")
    
    for test in range(len(_input)) :
        _acc, _label_input, _label_output = sess.run([acc, label_input, label_output], 
                             feed_dict={x: [_input[test]], y_: [_label[test]], is_training: False})

        total_acc += _acc
        
        print("n: ", test, "    label_input: ", _label_input, "    label_output: ", _label_output)
        
        if _label_input == _label_output :
            correct_list.append(test)   
        else :
            incorrect_list.append(test)
        
    print("")
    print("          TOTAL ACC    : ", '{:.5f}'.format(total_acc / len(_input)))
    print("")
    
    end_time = int(time.time() - start_time)
    
    print("")
    print("=======================================================================================")
    print('{:03d}:{:02d}:{:02d}'.format(end_time//3600, (end_time%3600//60), end_time%60))
    print("=======================================================================================")
    print("")
    
    coord.request_stop()
    coord.join(thread)


Epoch :     1     Avg. cost =  2.1813
Epoch :     2     Avg. cost =  1.9025
Epoch :     3     Avg. cost =  1.8159
Epoch :     4     Avg. cost =  1.3705
Epoch :     5     Avg. cost =  1.0244
Epoch :     6     Avg. cost =  0.9940
Epoch :     7     Avg. cost =  1.2223
Epoch :     8     Avg. cost =  1.1413
Epoch :     9     Avg. cost =  1.1780

000:00:02

Epoch :    10     Avg. cost =  1.0796
Epoch :    11     Avg. cost =  1.1959
Epoch :    12     Avg. cost =  1.4928
Epoch :    13     Avg. cost =  1.1390
Epoch :    14     Avg. cost =  1.3150
Epoch :    15     Avg. cost =  1.4345
Epoch :    16     Avg. cost =  0.9296
Epoch :    17     Avg. cost =  1.3454
Epoch :    18     Avg. cost =  1.1265
Epoch :    19     Avg. cost =  0.9800

000:00:03

Epoch :    20     Avg. cost =  0.9736
Epoch :    21     Avg. cost =  0.7681
Epoch :    22     Avg. cost =  0.6811
Epoch :    23     Avg. cost =  0.7230
Epoch :    24     Avg. cost =  1.1931
Epoch :    25     Avg. cost =  0.8663
Epoch :    26     Avg. co

Epoch :   152     Avg. cost =  0.8274
Epoch :   153     Avg. cost =  0.7408
Epoch :   154     Avg. cost =  0.7022
Epoch :   155     Avg. cost =  0.7228
Epoch :   156     Avg. cost =  0.9854
Epoch :   157     Avg. cost =  0.7301
Epoch :   158     Avg. cost =  0.7097
Epoch :   159     Avg. cost =  1.1473

000:00:08

Epoch :   160     Avg. cost =  0.6266
Epoch :   161     Avg. cost =  0.6561
Epoch :   162     Avg. cost =  0.8517
Epoch :   163     Avg. cost =  0.5202
Epoch :   164     Avg. cost =  0.7649
Epoch :   165     Avg. cost =  0.7733
Epoch :   166     Avg. cost =  0.6943
Epoch :   167     Avg. cost =  0.8569
Epoch :   168     Avg. cost =  0.7168
Epoch :   169     Avg. cost =  0.8257

000:00:09

Epoch :   170     Avg. cost =  0.7700
Epoch :   171     Avg. cost =  0.8196
Epoch :   172     Avg. cost =  0.4542
Epoch :   173     Avg. cost =  0.7699
Epoch :   174     Avg. cost =  0.7900
Epoch :   175     Avg. cost =  0.4314
Epoch :   176     Avg. cost =  1.2665
Epoch :   177     Avg. cos

Epoch :   441     Avg. cost =  0.7921
Epoch :   442     Avg. cost =  0.5441
Epoch :   443     Avg. cost =  0.5701
Epoch :   444     Avg. cost =  0.5185
Epoch :   445     Avg. cost =  0.8597
Epoch :   446     Avg. cost =  0.7210
Epoch :   447     Avg. cost =  0.7060
Epoch :   448     Avg. cost =  0.8377
Epoch :   449     Avg. cost =  0.5756

000:00:19

Epoch :   450     Avg. cost =  0.5809
Epoch :   451     Avg. cost =  0.9853
Epoch :   452     Avg. cost =  0.5567
Epoch :   453     Avg. cost =  1.0052
Epoch :   454     Avg. cost =  0.7191
Epoch :   455     Avg. cost =  0.4601
Epoch :   456     Avg. cost =  0.3354
Epoch :   457     Avg. cost =  0.6059
Epoch :   458     Avg. cost =  0.9174
Epoch :   459     Avg. cost =  0.4886

000:00:20

Epoch :   460     Avg. cost =  0.5468
Epoch :   461     Avg. cost =  1.0928
Epoch :   462     Avg. cost =  0.4310
Epoch :   463     Avg. cost =  0.5727
Epoch :   464     Avg. cost =  1.0541
Epoch :   465     Avg. cost =  0.7952
Epoch :   466     Avg. cos

Epoch :   590     Avg. cost =  0.7745
Epoch :   591     Avg. cost =  0.3775
Epoch :   592     Avg. cost =  0.9310
Epoch :   593     Avg. cost =  0.7997
Epoch :   594     Avg. cost =  1.0268
Epoch :   595     Avg. cost =  0.8324
Epoch :   596     Avg. cost =  1.1260
Epoch :   597     Avg. cost =  0.9134
Epoch :   598     Avg. cost =  0.3937
Epoch :   599     Avg. cost =  0.7841

000:00:25

Epoch :   600     Avg. cost =  0.5001
Epoch :   601     Avg. cost =  0.3831
Epoch :   602     Avg. cost =  0.7535
Epoch :   603     Avg. cost =  0.5455
Epoch :   604     Avg. cost =  0.4660
Epoch :   605     Avg. cost =  1.2849
Epoch :   606     Avg. cost =  1.0117
Epoch :   607     Avg. cost =  0.9877
Epoch :   608     Avg. cost =  0.8251
Epoch :   609     Avg. cost =  0.7617

000:00:25

Epoch :   610     Avg. cost =  0.9077
Epoch :   611     Avg. cost =  0.8602
Epoch :   612     Avg. cost =  0.5261
Epoch :   613     Avg. cost =  0.7772
Epoch :   614     Avg. cost =  0.7885
Epoch :   615     Avg. cos

Epoch :   739     Avg. cost =  0.5833

000:00:30

Epoch :   740     Avg. cost =  0.8683
Epoch :   741     Avg. cost =  0.5077
Epoch :   742     Avg. cost =  0.9682
Epoch :   743     Avg. cost =  0.8033
Epoch :   744     Avg. cost =  0.9601
Epoch :   745     Avg. cost =  0.6725
Epoch :   746     Avg. cost =  0.6389
Epoch :   747     Avg. cost =  0.6202
Epoch :   748     Avg. cost =  0.7805
Epoch :   749     Avg. cost =  0.6085

000:00:31

Epoch :   750     Avg. cost =  0.7766
Epoch :   751     Avg. cost =  0.9337
Epoch :   752     Avg. cost =  0.7133
Epoch :   753     Avg. cost =  0.7262
Epoch :   754     Avg. cost =  0.7554
Epoch :   755     Avg. cost =  0.5721
Epoch :   756     Avg. cost =  0.8163
Epoch :   757     Avg. cost =  1.1546
Epoch :   758     Avg. cost =  0.6525
Epoch :   759     Avg. cost =  0.5434

000:00:31

Epoch :   760     Avg. cost =  0.9323
Epoch :   761     Avg. cost =  0.6426
Epoch :   762     Avg. cost =  0.8559
Epoch :   763     Avg. cost =  0.9565
Epoch :   764 

Epoch :   885     Avg. cost =  0.6887
Epoch :   886     Avg. cost =  0.9711
Epoch :   887     Avg. cost =  0.9075
Epoch :   888     Avg. cost =  1.0609
Epoch :   889     Avg. cost =  0.9569

000:00:36

Epoch :   890     Avg. cost =  0.2897
Epoch :   891     Avg. cost =  0.7463
Epoch :   892     Avg. cost =  0.8665
Epoch :   893     Avg. cost =  0.4797
Epoch :   894     Avg. cost =  0.7864
Epoch :   895     Avg. cost =  1.0191
Epoch :   896     Avg. cost =  0.4608
Epoch :   897     Avg. cost =  0.5271
Epoch :   898     Avg. cost =  0.9208
Epoch :   899     Avg. cost =  0.4451

000:00:36

Epoch :   900     Avg. cost =  0.7642
Epoch :   901     Avg. cost =  0.6332
Epoch :   902     Avg. cost =  0.9730
Epoch :   903     Avg. cost =  0.7660
Epoch :   904     Avg. cost =  0.8496
Epoch :   905     Avg. cost =  0.3918
Epoch :   906     Avg. cost =  0.8981
Epoch :   907     Avg. cost =  0.7422
Epoch :   908     Avg. cost =  0.4388
Epoch :   909     Avg. cost =  0.4014

000:00:37

Epoch :   910 

n:  63     label_input:  [4]     label_output:  [4]
n:  64     label_input:  [4]     label_output:  [4]
n:  65     label_input:  [4]     label_output:  [4]
n:  66     label_input:  [4]     label_output:  [3]
n:  67     label_input:  [4]     label_output:  [4]
n:  68     label_input:  [4]     label_output:  [4]
n:  69     label_input:  [4]     label_output:  [4]
n:  70     label_input:  [4]     label_output:  [4]
n:  71     label_input:  [4]     label_output:  [4]
n:  72     label_input:  [4]     label_output:  [4]
n:  73     label_input:  [4]     label_output:  [4]
n:  74     label_input:  [4]     label_output:  [4]
n:  75     label_input:  [4]     label_output:  [4]
n:  76     label_input:  [4]     label_output:  [4]
n:  77     label_input:  [4]     label_output:  [4]
n:  78     label_input:  [4]     label_output:  [4]
n:  79     label_input:  [4]     label_output:  [8]
n:  80     label_input:  [4]     label_output:  [3]
n:  81     label_input:  [4]     label_output:  [4]
n:  82     l

n:  249     label_input:  [7]     label_output:  [7]
n:  250     label_input:  [7]     label_output:  [7]
n:  251     label_input:  [7]     label_output:  [7]
n:  252     label_input:  [7]     label_output:  [7]
n:  253     label_input:  [7]     label_output:  [7]
n:  254     label_input:  [7]     label_output:  [7]
n:  255     label_input:  [7]     label_output:  [7]
n:  256     label_input:  [7]     label_output:  [7]
n:  257     label_input:  [7]     label_output:  [7]
n:  258     label_input:  [7]     label_output:  [7]
n:  259     label_input:  [7]     label_output:  [7]
n:  260     label_input:  [7]     label_output:  [7]
n:  261     label_input:  [7]     label_output:  [7]
n:  262     label_input:  [7]     label_output:  [7]
n:  263     label_input:  [7]     label_output:  [7]
n:  264     label_input:  [7]     label_output:  [7]
n:  265     label_input:  [7]     label_output:  [7]
n:  266     label_input:  [5]     label_output:  [5]
n:  267     label_input:  [5]     label_output