In [11]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import random

tf.reset_default_graph()

# Fashion MNIST

### load data

train set과 test 셋을 불러오자

데이터 출처 : https://www.kaggle.com/zalando-research/fashionmnist

In [3]:
df_train=pd.read_csv('fashion-mnist_train.csv')
df_test =pd.read_csv('fashion-mnist_test.csv')

df_train.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


불러온 데이터의 모양을 확인해보자

In [12]:
print('train shape:',df_train.shape)
print('test shape:',df_test.shape)

train shape: (60000, 785)
test shape: (10000, 785)


### set data : train set

학습을 위한 train set을 정해주자

In [13]:
train_set=df_train.as_matrix(None)


x_train=train_set[:,1:]
y_train=train_set[:,[0]]

print('x_train shape:',x_train.shape)
print('y_train shape:',y_train.shape)

x_train shape: (60000, 784)
y_train shape: (60000, 1)


한번에 6만개의 데이터를 학습시키기에는 너무 많은 양이다.

따라서 batch로 나눠주자 (배치 size =100)

In [14]:
batch_size=100
train_x_batch, train_y_batch = tf.train.batch([train_set[:,1:],train_set[:,[0]]] , batch_size=100)

print(train_x_batch.shape)
print(train_y_batch.shape)

(100, 60000, 784)
(100, 60000, 1)


### set data : test set

test 세트를 설정하자

In [15]:
test_set=df_test.as_matrix(columns=None)

x_test=test_set[:,1:]
y_test=test_set[:,[0]]

print('x_test shape:',x_test.shape)
print('y_test shape:',y_test.shape)

x_test shape: (10000, 784)
y_test shape: (10000, 1)


test세트는 따로 배치 해줄 필요 없다.

## 모델 만들기

### 첫번째 레이어

X:우리가 입력할 사진은 (60000, 784)와 (10000,784)이므로 28X28로 바꿔줘야 한다.

Y:원핫 인코딩이 적용되므로 출력은 10개가 되야한다.

In [16]:
X= tf.placeholder(tf.float32, [None,784])
X_img=tf.reshape(X, [-1,28,28,1])


Y = tf.placeholder(tf.int32, [None, 1])  

Y_one_hot = tf.one_hot(Y, 10)  # one hot
print("one_hot", Y_one_hot)
Y_one_hot = tf.reshape(Y_one_hot, [-1, 10])
print("reshape", Y_one_hot)

one_hot Tensor("one_hot:0", shape=(?, 1, 10), dtype=float32)
reshape Tensor("Reshape_1:0", shape=(?, 10), dtype=float32)


이제 첫번째 필터를 만들어주자

첫번째 필터는 3X3 크기로 하고 색은 1개, 32개의 필터를 사용하자 -> (3,3,1,32)

In [17]:
W1=tf.Variable(tf.random_normal([3,3,1,32],stddev=0.01))
L1= tf.nn.conv2d(X_img,W1,strides=[1,1,1,1],padding='SAME')

print('conv2d:',L1.shape)

conv2d: (?, 28, 28, 32)


stride는 1X1로 하고 padding까지 적용해주면 모양은 (?,28,28,32)일 것이다.

통과시키고 렐루 함수도 적용시키자

In [18]:
L1=tf.nn.relu(L1)

print('Relu:',L1.shape)

Relu: (?, 28, 28, 32)


Max-pooling도 통과시켜주면 첫번째 레이어가 끝난다.

In [19]:
L1 = tf.nn.max_pool(L1, ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

print(L1.shape)

(?, 14, 14, 32)


스트라이드 2X2 이므로 크기는 (?, 28, 28, 32)에서 절반으로 줄어든 (?, 14, 14, 32)가 될것이다

### 두번째 레이어

두번째 레이어를 통과시키자.

이때 들어오는 값의 크기는 Max-pooling을 통과하고 나온 (?, 14, 14, 32)가 될것이다

두번째 필터도 3X3 크기에 64개로 만들자. 단 이번에는 32개의 그림이 들어으므로 ([3,3,32,64])로 해줘야한다.

이하 과정은 첫번째 레이어와 같으므로 생략한다.

In [20]:
W2=tf.Variable(tf.random_normal([3,3,32,64],stddev=0.01))

L2=tf.nn.conv2d(L1,W2,strides=[1,1,1,1],padding='SAME')

L2=tf.nn.relu(L2)

L2= tf.nn.max_pool(L2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

print(L2.shape)

(?, 7, 7, 64)


두번째 레이어에 들어올땐 (?, 14, 14, 32) 였지만 나갈땐 (?, 7, 7, 64)이 됬다.

### FC layer

지금 두번째 레이어까지 통과한 그림은 입체적인 모양이므로 이것을 한줄로 나열해야 한다.

In [21]:
L2= tf.reshape(L2,[-1,7*7*64])

print(L2.shape)

(?, 3136)


받은 7X7X64를 10개(숫자들의 원핫인코딩: 0~9)으로 출력한다.

In [22]:
W3=tf.get_variable("W3",shape=[7*7*64,10],initializer=tf.contrib.layers.xavier_initializer())
b=tf.Variable(tf.random_normal([10]))

hypothesis = tf.matmul(L2,W3)+b

### loss/cost function
모델을 만들었으므로 loss 함수와 학습시킬 준비를 하자

In [23]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y_one_hot))
optimizer =tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)

### Accuracy
추후 추가

### Graph Initialize


In [24]:
sess= tf.Session()
sess.run(tf.global_variables_initializer())

### Start populating

In [25]:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

### Training

에포치는 15번하자

In [26]:
learning_rate = 0.001
training_epochs = 15
batch_size=100
total_batch= int(len(x_train)/batch_size)

In [27]:
print(train_x_batch.shape)

(100, 60000, 784)


학습 가즈ㅏㅏㅏㅏ

In [28]:
'''
for step in range(2001):
    x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
    cost_val, hy_val, _ = sess.run(
        [cost, hypothesis, train], feed_dict={X: x_batch, Y: y_batch})
    if step % 10 == 0:
        print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)
'''

for epoch in range(training_epochs):
    avg_cost=0
    for i in range(total_batch):
        x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
        c, _ = sess.run([cost, optimizer],feed_dict={X:x_batch, Y:y_batch })
        avg_cost += c/ total_batch
    print('Epoch:','%04d'%(epoch+1),'cost=','{:.9f}'.format(avg_cost))

coord.request_stop()
coord.join(threads)    
print('Learning finish')    

ResourceExhaustedError: OOM when allocating tensor with shape[100,60000,784]
	 [[Node: batch = QueueDequeueManyV2[component_types=[DT_INT64, DT_INT64], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"](batch/fifo_queue, batch/n)]]

Caused by op 'batch', defined at:
  File "C:\ProgramData\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\ProgramData\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\ProgramData\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\ProgramData\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-2c0895d8c5e0>", line 2, in <module>
    train_x_batch, train_y_batch = tf.train.batch([train_set[:,1:],train_set[:,[0]]] , batch_size=100)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\input.py", line 927, in batch
    name=name)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\training\input.py", line 722, in _batch
    dequeued = queue.dequeue_many(batch_size, name=name)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\data_flow_ops.py", line 464, in dequeue_many
    self._queue_ref, n=n, component_types=self._dtypes, name=name)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_data_flow_ops.py", line 2417, in _queue_dequeue_many_v2
    component_types=component_types, timeout_ms=timeout_ms, name=name)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 2956, in create_op
    op_def=op_def)
  File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1470, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[100,60000,784]
	 [[Node: batch = QueueDequeueManyV2[component_types=[DT_INT64, DT_INT64], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"](batch/fifo_queue, batch/n)]]


In [None]:
next_batch