# Google Machinelearing Challenge
#### Submit을 위한 Test set Validation

In [1]:
import os
from glob import glob

import pandas as pd # Dataframe
import numpy as np #Linear Algebra
import matplotlib.pyplot as plt #Visualization
%matplotlib inline
from skimage import color, io 
from scipy.misc import imresize 
from PIL import Image 
from lime.lime_image import LimeImageExplainer 

import tflearn
from tflearn.data_utils import shuffle, to_categorical
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation
from tflearn.metrics import Accuracy

In [2]:
files_path = './data'
test_file_path = os.path.join(files_path, 'test/*.jpg')
test_file = sorted(glob(test_file_path))
n_file = len(test_file)

In [3]:
X_test = np.zeros((n_file, 128, 128, 3), dtype='float64')
count = 0

In [4]:
for idx, f in enumerate(test_file):
    try:
        img = io.imread(f)
        new_img = imresize(img, (128, 128, 3))
        X_test[idx] = np.array(new_img)

    except:
        continue

In [5]:
img_prep = ImagePreprocessing()
img_prep.add_featurewise_zero_center()
img_prep.add_featurewise_stdnorm()

img_aug = ImageAugmentation()
img_aug.add_random_flip_leftright()
img_aug.add_random_rotation(max_angle=25.)

In [6]:
network = input_data(shape=[None, 128, 128, 3],
                     data_preprocessing=img_prep,
                     data_augmentation=img_aug)

network = conv_2d(network, 32, 3, activation='relu')
network = max_pool_2d(network, 2)
network = conv_2d(network, 64, 3, activation='relu')
network = conv_2d(network, 64, 3, activation='relu')
network = max_pool_2d(network, 2)
network = fully_connected(network, 512, activation='relu')
network = dropout(network, 0.5)
network = fully_connected(network, 2, activation='softmax')
acc = Accuracy(name="Accuracy")
network = regression(network, optimizer='adam',
                     loss='categorical_crossentropy',
                     learning_rate=0.0005, metric=acc)

In [7]:
model = tflearn.DNN(network)

In [9]:
model.load('MLCrooftop_final.tflearn')

INFO:tensorflow:Restoring parameters from /Users/mac/OneDrive/DSS/Kaggle/MLChallenge/MLCrooftop_final.tflearn


In [10]:
count = 0
result = model.predict(X_test)

In [11]:
result

array([[  4.22610879e-01,   5.77389121e-01],
       [  9.57554996e-01,   4.24449816e-02],
       [  9.99999881e-01,   1.23994738e-07],
       ..., 
       [  8.86700630e-01,   1.13299362e-01],
       [  3.36064630e-07,   9.99999642e-01],
       [  8.74228954e-01,   1.25771031e-01]], dtype=float32)

In [12]:
result = pd.DataFrame(result)
result = result[1].apply(lambda x : 1 if x >= 0.5 else 0)
result.value_counts()

1    997
0    965
Name: 1, dtype: int64

In [23]:
files = []
for i in test_file:
    files.append(i[12:])
df_test = pd.DataFrame(files)

In [46]:
df = pd.concat([df_test, result], axis=1)
df.columns = ['Id', 'Category']
df.head()

Unnamed: 0,Id,Category
0,100028593_af7d48448a_o.jpg,1
1,10003034604_c2beed00c5_o.jpg,0
2,1001057768_9a5a16f687_o.jpg,0
3,10013551155_28b798defc_o.jpg,0
4,10015407805_7eeee4d177_o.jpg,0


In [47]:
df.to_csv('data/submit.csv', index = False)

# Result
### Accuracy : 0.80836
### 27th on [Leaderboard](https://inclass.kaggle.com/c/kmlc-challenge-1-cats-vs-dogs/leaderboard/public)