## 딥러닝 모델 구현하기
* tensorflow 활용한 딥러닝 모델 구현
* tf version(1.15.0)
* 설치(로컬).CPU버전 : pip install tensorflow==1.15.0

### 학습내용
* 컬럼 한개를 이용한 모델 만들기
* 컬럼 두개를 이용한 모델 만들기
* 컬럼 두개, 행렬을 이용한 모델 만들기

In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np

In [2]:
# 버전확인
print(tf.__version__)
print(np.__version__)
print(pd.__version__)

1.15.0
1.16.4
0.24.2


In [3]:
# 데이터 불러오기
train = pd.read_csv("./bike/train.csv", parse_dates=['datetime'])
test = pd.read_csv("./bike/test.csv", parse_dates=['datetime'])

In [4]:
print(train.columns)
print(test.columns)

Index(['datetime', 'season', 'holiday', 'workingday', 'weather', 'temp',
       'atemp', 'humidity', 'windspeed', 'casual', 'registered', 'count'],
      dtype='object')
Index(['datetime', 'season', 'holiday', 'workingday', 'weather', 'temp',
       'atemp', 'humidity', 'windspeed'],
      dtype='object')


### 입력 데이터 선택

In [5]:
# input_col = [ 'temp', 'atemp']
input_col = ['temp']
labeled_col = ['count']

In [6]:
x_data = train[input_col]
y_data = train[labeled_col]

x_test = test[input_col]
print(type(x_data))

<class 'pandas.core.frame.DataFrame'>


### 신경망 이용을 위해 numpy 배열로 변환

In [7]:
x_data = np.array(x_data)
x_test = np.array(x_test)
y_data = np.array(y_data)
print(type(x_data))

<class 'numpy.ndarray'>


### tf.Variable를 이용한 임의의 W값과 b값

In [8]:
W1 = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
b = tf.Variable(tf.random_uniform([1], -1.0, 1.0))

In [9]:
X1 = tf.placeholder(tf.float32, name="X1")
Y = tf.placeholder(tf.float32, name="Y")

In [10]:
# hypothesis = W1 * X1 + W2 * X2 + b
hypothesis = W1 * X1 + b

In [11]:
### 비용함수, 최적화 함수
cost = tf.reduce_mean(tf.square(hypothesis - Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(cost)

In [12]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for step in range(100):
        _, cost_val = sess.run([train_op, cost], feed_dict={X1:x_data, Y:y_data})
        
        if step%10==0:
            print(step, cost_val, sess.run(W1), sess.run(b))
            
        
    pred = sess.run(hypothesis, feed_dict={X1:x_test})
    
print(pred.shape)
print(type(pred))
print(pred[0:5])
pred = pred.reshape(-1,)
pred.shape

0 73607.99 [8.817715] [0.8653382]
10 27708.64 [9.391976] [0.90336806]
20 27708.623 [9.391406] [0.9166129]
30 27708.602 [9.390837] [0.92982364]
40 27708.588 [9.390269] [0.94300026]
50 27708.57 [9.389704] [0.95614314]
60 27708.553 [9.389139] [0.9692522]
70 27708.535 [9.3885765] [0.98232734]
80 27708.514 [9.388015] [0.9953689]
90 27708.504 [9.387454] [1.0083768]
(6493, 1)
<class 'numpy.ndarray'>
[[101.08497]
 [101.08497]
 [101.08497]
 [101.08497]
 [101.08497]]


(6493,)

In [13]:
dat = pd.DataFrame({'count':pred})
sub = pd.read_csv("./bike/sampleSubmission.csv")
print(sub.columns)
sub['count'] = dat['count']
sub.to_csv("tf_pred.csv", index=False)

Index(['datetime', 'count'], dtype='object')


### 실습1. 변수 2개일 경우로 해보기

In [14]:
# input_col = [ 'temp', 'atemp']
input_col1 = ['temp']
input_col2 = ['temp']
labeled_col = ['count']

x_data1 = train['temp']
x_data2 = train['atemp']
y_data = train[labeled_col]

x_data1 = np.array(x_data1)
x_data2 = np.array(x_data1)
y_data = np.array(y_data)

In [15]:
W1 = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
W2 = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
b = tf.Variable(tf.random_uniform([1], -1.0, 1.0))

In [16]:
X1 = tf.placeholder(tf.float32, name="X1")
X2 = tf.placeholder(tf.float32, name="X2")
Y = tf.placeholder(tf.float32, name="Y")

In [17]:
hypothesis = W1 * X1 + W2 * X2 + b

In [18]:
### 비용함수, 최적화 함수
cost = tf.reduce_mean(tf.square(hypothesis - Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(cost)

In [19]:
%%time

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for step in range(100):
        _, cost_val = sess.run([train_op, cost], feed_dict={X1:x_data1,X2:x_data2, Y:y_data})
        
        if step%10==0:
            print(step, cost_val, sess.run(W1), sess.run(W2), sess.run(b))

0 63460.0 [7.45807] [7.3372097] [-0.06780264]
10 39626.76 [5.1063576] [4.9854975] [0.32578617]
20 37682.8 [4.4310036] [4.3101435] [0.79026836]
30 37503.816 [4.2316656] [4.1108065] [1.2739711]
40 37467.23 [4.167514] [4.046654] [1.7622242]
50 37442.258 [4.141761] [4.020901] [2.2508652]
60 37418.33 [4.126933] [4.0060735] [2.7387145]
70 37394.59 [4.1152277] [3.9943683] [3.2254384]
80 37370.992 [4.1044307] [3.9835713] [3.7109451]
90 37347.504 [4.093907] [3.9730473] [4.1952114]
Wall time: 2min 45s


### 행렬로 연산해 보기

In [20]:
input_col = [ 'temp', 'atemp']
labeled_col = ['count']

x_data = train[input_col]
y_data = train[labeled_col]

x_test = test[input_col]

x_data = np.array(x_data)
y_data = np.array(y_data)
print(x_data.shape, y_data.shape)

(10886, 2) (10886, 1)


In [21]:
X1 = tf.placeholder(tf.float32, [None, 2], name="X1")
Y = tf.placeholder(tf.float32, [None, 1], name="Y")

W1 = tf.Variable(tf.random_uniform([2, 1], -1.0, 1.0))
b = tf.Variable(tf.random_uniform([1], -1.0, 1.0))

In [22]:
# X = tf.placeholder(tf.float32, [None, 2])   # [? * 2] X [2 * 1] => ? X 1 
# W = tf.Variable(tf.random_normal([2,1]))

hypothesis = tf.matmul(X1, W1) + b
hypothesis

<tf.Tensor 'add_3:0' shape=(?, 1) dtype=float32>

In [23]:
### 비용함수, 최적화 함수
cost = tf.reduce_mean(tf.square(hypothesis - Y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001)
train_op = optimizer.minimize(cost)

In [24]:
%%time

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for step in range(100):
    _, cost_val = sess.run([train_op, cost], feed_dict={X1:x_data, Y:y_data})
    
    if step%10==0:
        print(step, cost_val, sess.run(W1[0]), sess.run(W1[1]))

        
pred = sess.run(hypothesis, feed_dict={X1:x_test})
pred = pred.reshape(-1,)

sess.close()

0 57148.414 [1.3524553] [1.6722946]
10 27938.799 [3.7737215] [4.461745]
20 27736.857 [3.9830272] [4.686672]
30 27735.326 [4.008456] [4.698452]
40 27735.182 [4.018583] [4.6925306]
50 27735.049 [4.027419] [4.6851544]
60 27734.916 [4.03613] [4.677672]
70 27734.793 [4.0448127] [4.670196]
80 27734.652 [4.0534744] [4.6627364]
90 27734.53 [4.0621166] [4.6552944]
Wall time: 803 ms


In [None]:
dat = pd.DataFrame(pred, columns=['count'])
print(dat.columns)

sub = pd.read_csv("./bike/sampleSubmission.csv")
print(sub.columns)
sub['count'] = dat['count']
sub.to_csv("tf_pred2.csv", index=False)