## Fancy Softmax Classification
- Data load : read csv file using Pandas
- One hot encoding 

In [1]:
import tensorflow as tf
import pandas as pd

In [2]:
df = pd.read_csv('zoo.txt', delimiter=',', header=None)

In [3]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,0
1,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,0
2,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,3
3,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,0
4,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,0


In [4]:
x_data = df.iloc[:, 0:-1]

In [5]:
x_data.shape

(101, 16)

In [6]:
x_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1
1,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1
2,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0
3,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1
4,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1


In [7]:
y_data = df.iloc[:,-1]

In [8]:
y_data.shape

(101,)

In [9]:
y_data_df = y_data.to_frame()

In [10]:
y_data_df.shape

(101, 1)

In [11]:
y_data.head()

0    0
1    0
2    3
3    0
4    0
Name: 16, dtype: int64

In [12]:
# y_data.astype('float32').head()

In [13]:
nb_classes = 7

### graph 

In [14]:
X = tf.placeholder(tf.float32, [None, 16])
Y = tf.placeholder(tf.int32, [None, 1])

### One-Hot Encoding

In [15]:
Y.shape

TensorShape([Dimension(None), Dimension(1)])

In [16]:
# 그냥 one_hot 하면 차원하나 늘어남 
Y_one_hot = tf.one_hot(Y, nb_classes)

In [17]:
Y_one_hot.shape

TensorShape([Dimension(None), Dimension(1), Dimension(7)])

In [18]:
# 2차원으로 줄이기 위한 reshape 과정 필요
Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes])

In [19]:
Y_one_hot.shape

TensorShape([Dimension(None), Dimension(7)])

### Other Graph
- Weight , Bias, logits, hypothesis, cost_i, cost, optimizer

In [20]:
X.shape

TensorShape([Dimension(None), Dimension(16)])

In [21]:
W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name='bias')

In [22]:
logits = tf.matmul(X,W) + b
h = tf.nn.softmax(logits)

In [23]:
# tf.nn.softmax_cross_entropy_with_logits : deprecated 
cost_i = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y_one_hot)
cost = tf.reduce_mean(cost_i)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

### 예측 및 평가

In [24]:
prediction = tf.argmax(h, axis = 1)
correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, axis = 1))
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

### Launch graph

In [40]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for step in range(2001):
        sess.run(optimizer, feed_dict={X: x_data, Y: y_data_df})
        if step % 200 == 0:
            loss_val, acc_val = sess.run([cost, acc], feed_dict={X: x_data, Y:y_data_df})
            
            print('step: {0}, \n loss : {1}, \n acc : {2}'.format(step, loss_val, acc_val))
            
    pred = sess.run(prediction, feed_dict={X:x_data})
    # y_data.flatten : 2차원 배열 --> 1차원 배열 
    # 하지만 데이티를 pandas로 불러와서 series로 불러왔기 때문에 flatten 할 필요 X 
    for p,y in zip(pred, y_data):
        print("[{0}] Prediction: {1} - True Y : {2}".format(p == int(y), p, int(y)))

step: 0, 
 loss : 5.272947788238525, 
 acc : 0.30693069100379944
step: 200, 
 loss : 0.41260263323783875, 
 acc : 0.8712871074676514
step: 400, 
 loss : 0.24978190660476685, 
 acc : 0.9405940771102905
step: 600, 
 loss : 0.18034476041793823, 
 acc : 0.9504950642585754
step: 800, 
 loss : 0.1397009938955307, 
 acc : 0.9603960514068604
step: 1000, 
 loss : 0.11296695470809937, 
 acc : 0.9801980257034302
step: 1200, 
 loss : 0.0943373441696167, 
 acc : 0.9900990128517151
step: 1400, 
 loss : 0.08080713450908661, 
 acc : 1.0
step: 1600, 
 loss : 0.07062531262636185, 
 acc : 1.0
step: 1800, 
 loss : 0.06272418051958084, 
 acc : 1.0
step: 2000, 
 loss : 0.05643046647310257, 
 acc : 1.0
[True] Prediction: 0 - True Y : 0
[True] Prediction: 0 - True Y : 0
[True] Prediction: 3 - True Y : 3
[True] Prediction: 0 - True Y : 0
[True] Prediction: 0 - True Y : 0
[True] Prediction: 0 - True Y : 0
[True] Prediction: 0 - True Y : 0
[True] Prediction: 3 - True Y : 3
[True] Prediction: 3 - True Y : 3
[True

### Issue 
1. Can not convert a float32 into a Tensor or Operation.
    - sess.run 할 때 변수명과 똑같이 지정해서 나타나는 에러
    - loss_val, acc= sess.run([cost, acc]
2. can't series to flatten
    - y_data.flatten : 2차원 배열 --> 1차원 배열 
    - 하지만 데이티를 pandas로 불러와서 series로 불러왔기 때문에 flatten 할 필요 X
3. tuple out of index
    - print({1} {2} {3}) --> print({0} {1} {2})