https://www.youtube.com/user/sentdex

In [44]:
import cv2
import numpy as np
import os
from random import shuffle
from tqdm import tqdm

TRAIN_DIR = "C:/Users/Karam/Codes/Preprocessing_Sentdex/Kaggle_Data/train"
TEST_DIR ="C:/Users/Karam/Codes/Preprocessing_Sentdex/Kaggle_Data/test"
IMG_SIZE = 50
LR = 1e-3

MODEL_NAME = "dogsvscats-{}-{}.model".format(LR,"8conv-basic-video")

In [38]:
def label_img(img):
    word_label = img.split(".")[-3] # image name is dog.93.png so -3 is dog
    if word_label == "cat": return [1,0]
    elif word_label == "dog" : return [0,1]

In [39]:
def create_train_data():
    training_data = []
    for img in tqdm(os.listdir(TRAIN_DIR)):
        label = label_img(img)
        path = os.path.join(TRAIN_DIR,img)
        img = cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE),(IMG_SIZE,IMG_SIZE),interpolation = cv2.INTER_CUBIC)
        training_data.append([np.array(img),np.array(label)])
        
    shuffle(training_data)
    np.save("train_data.npy",training_data)
    return training_data

In [40]:
def process_test_data():
    testing_data=[]
    for img in tqdm(os.listdir(TEST_DIR)):
        path = os.path.join(TEST_DIR,img)
        img_num = img.split(".")[0]
        img = cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE),(IMG_SIZE,IMG_SIZE),interpolation = cv2.INTER_CUBIC)
        testing_data.append([np.array(img), img_num])
    np.save("test_data.npy",testing_data)
    return testing_data

In [41]:
#train_data = create_train_data()
# if you already have train data:
train_data = np.load('train_data.npy')

In [42]:
#testing_data = process_test_data()
test_data = np.load("test_data.npy")

In [45]:
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression

import tensorflow as tf
tf.reset_default_graph()

convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, 2, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')

In [46]:
if os.path.exists("{}.meta".format(MODEL_NAME)):
    model.load(MODEL_NAME)
    print("model loaded!")

In [47]:
train = train_data[:-500] #the data except the last 500
test = train_data[-500:]

In [48]:
X = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE,IMG_SIZE, 1) # features set, [0] is the image data
Y = [i[1] for i in train] # Labels set

test_x = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE,IMG_SIZE, 1) # for testing accuracy
test_y = [i[1] for i in train]

In [50]:
# train the model
model.fit({'input': X}, {'targets': Y}, n_epoch=15, validation_set=({'input': test_x}, {'targets': test_y}), 
    snapshot_step=500, show_metric=True, run_id=MODEL_NAME)

Training Step: 5893  | total loss: 0.22807 | time: 25.304s
| Adam | epoch: 016 | loss: 0.22807 - acc: 0.9237 -- iter: 24448/24500
Training Step: 5894  | total loss: 0.21555 | time: 29.058s
| Adam | epoch: 016 | loss: 0.21555 - acc: 0.9298 | val_loss: 0.15795 - val_acc: 0.9404 -- iter: 24500/24500
--


In [18]:
#tensorboard --logdir=foo:C:\Users\Karam\Codes\Preprocessing_Sentdex\log

In [51]:
#to save the model
model.save(MODEL_NAME)

INFO:tensorflow:C:\Users\Karam\Codes\Preprocessing_Sentdex\dogsvscats-0.001-8conv-basic-video.model is not in all_model_checkpoint_paths. Manually adding it.


In [None]:
#to recall the model
model.load(MODEL_NAME)

# Test the data before Submission

In [52]:
import matplotlib.pyplot as plt

In [None]:
# if we don't have the data
#test_data = preprocess_test_data
# if we already have it
test_data = np.load("test_data.npy")

fig = plt.figure() 
for num,data in enumerate(test_data[:12]): # iterate the first 12 testing data
    # cat:[1,0] , dog: [0,1]
    
    img_num = data[1]
    img_data = data[0]
    
    y = fig.add_subplot(3,4,num+1)
    orig = img_data.reshape
    data = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
    
    model_out = model.predict([data])[0]
    
    if np.argmax(model_out) == 1: str_label="Dog"
    else: str_label = "Cat"
        
    y.imshow(orig,cmap="gray")
    plt.title(str_label)
    y.axes.get_xaxis().set_visible(False)
    y.axes.get_yaxis().set_visible(False)
plt.show()

In [54]:
with open("submission-file.csv","w") as f:
    f.write("id,label\n")

In [56]:
with open("submission-file.csv","a") as f:
    for data in tqdm(test_data):
        img_num = data[1]
        img_data = data[0]
        orig = img_data
        data = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
        model_out = model.predict([data])[0]
        f.write('{},{}\n'.format(img_num,model_out[1]))

100%|███████████████████████████████████████████████████████████████████████████| 12500/12500 [00:23<00:00, 543.13it/s]


In [58]:
print(model.predict([data]))

[[0.96610725 0.03389278]]
