# Dogs vs Cats Training

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

## Prepare the DATA

In [2]:
DIR = '../../dogs_cats'
DIR_TRAIN = os.path.join(DIR,'train')
DIR_TEST  = os.path.join(DIR,'test')
TAGS = ['cat','dog']
IMG_SIZE = 100

**!!! JUMP to IMPORT DATA if not run first time**

Take a look at data

In [None]:
for f in os.listdir(DIR_TRAIN):
    print(os.path.join(DIR_TRAIN,f))
    img_array = cv2.imread(os.path.join(DIR_TRAIN,f),cv2.IMREAD_GRAYSCALE)
    plt.imshow(img_array,cmap='gray')
    plt.show()
    break

Resize the img for all

In [None]:
new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
print(new_array)
plt.imshow(new_array,cmap = 'gray')
plt.show()

build the labeled data

In [None]:
%%time
def create_label_data(DIR_TRAIN, TAGS,IMG_SIZE = 100):
    label_data = []
    
    for f in os.listdir(DIR_TRAIN):
        
        img_array = cv2.imread(os.path.join(DIR_TRAIN,f),cv2.IMREAD_GRAYSCALE)
        new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
        
        y = [ 1 if tag in f else 0 for tag in TAGS]
        
        label_data.append([new_array, y])       
            
    return label_data
label_data = create_label_data(DIR_TRAIN, TAGS)

Take check if group is right

In [None]:
plt.imshow(label_data[0][0],cmap='gray')
plt.show()
print(label_data[0][1])

plt.imshow(label_data[1][0],cmap='gray')
plt.show()
print(label_data[1][1])

save the data as pickle

In [None]:
import pickle 

with open(os.path.join(DIR,'label_data.pickle'), 'wb') as f:
    pickle.dump(label_data, f)

**IMPORT THE DATA**

In [3]:
import pickle 
with open(os.path.join(DIR,'label_data.pickle'), 'rb') as f:
    label_data = pickle.load(f)

add some random feature

In [4]:
import random
random.shuffle(label_data)

In [5]:
def train_validation_data(label_data):
    x_train = []
    y_train = []

    for x,y in label_data:
        x_train.append(x)
        y_train.append(y)
    return (x_train,y_train)

train_data = train_validation_data(label_data)

In [None]:
# X = np.array(x_train)
# print(X.shape)
# print(X[0].flatten()[:100])
# print(X.reshape(X.shape[0],-1)[0][:100])

## Train the Model

In [6]:
x_train = train_data[0]
y_train = train_data[1]

In [7]:
x = tf.keras.utils.normalize(x_train)
y = np.array(y_train).reshape(-1,2)
print(y.shape)
print(x.shape)

(25000, 2)
(25000, 100, 100)


In [8]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128,activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(2,activation=tf.nn.softmax)) 

In [9]:
model.compile(
    optimizer= tf.keras.optimizers.SGD(learning_rate=0.1),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [13]:
tf.keras.backend.set_floatx('float64')
history = model.fit(
    x,y, 
    epochs=10,
    batch_size=32, 
    validation_split = 0.25,
    verbose=2)

Epoch 1/10
586/586 - 1s - loss: 0.5863 - accuracy: 0.6859 - val_loss: 0.6911 - val_accuracy: 0.6064
Epoch 2/10
586/586 - 1s - loss: 0.5792 - accuracy: 0.6895 - val_loss: 0.7556 - val_accuracy: 0.5638
Epoch 3/10
586/586 - 1s - loss: 0.5773 - accuracy: 0.6889 - val_loss: 0.6741 - val_accuracy: 0.6080
Epoch 4/10
586/586 - 1s - loss: 0.5685 - accuracy: 0.6955 - val_loss: 0.7019 - val_accuracy: 0.5930
Epoch 5/10
586/586 - 1s - loss: 0.5619 - accuracy: 0.7033 - val_loss: 0.7034 - val_accuracy: 0.6061
Epoch 6/10
586/586 - 1s - loss: 0.5597 - accuracy: 0.7051 - val_loss: 0.6910 - val_accuracy: 0.6091
Epoch 7/10
586/586 - 1s - loss: 0.5513 - accuracy: 0.7124 - val_loss: 0.7119 - val_accuracy: 0.5958
Epoch 8/10
586/586 - 1s - loss: 0.5429 - accuracy: 0.7132 - val_loss: 0.7291 - val_accuracy: 0.5982
Epoch 9/10
586/586 - 1s - loss: 0.5353 - accuracy: 0.7237 - val_loss: 0.7263 - val_accuracy: 0.5926
Epoch 10/10
586/586 - 1s - loss: 0.5317 - accuracy: 0.7242 - val_loss: 0.7042 - val_accuracy: 0.6203

## Validation Model

In [None]:
x_validate = validate_data[0]
y_validate = validate_data[1]

x = tf.keras.utils.normalize(x_validate)
y = np.array(y_validate).reshape(-1,2)

In [None]:
val_loss, val_acc = model.evaluate(x,y)

## Test

In [None]:
%%time
def create_test_data(DIR, IMG_SIZE = 100):
    test_data = []
    
    for f in os.listdir(DIR):
        
        img_array = cv2.imread(os.path.join(DIR,f),cv2.IMREAD_GRAYSCALE)
        new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
        
        test_data.append([new_array])       
            
    return test_data
test_data = create_test_data(DIR_TEST)

In [None]:
with open(os.path.join(DIR,'test_data.pickle'), 'wb') as f:
    pickle.dump(test_data, f)

In [None]:
with open(os.path.join(DIR,'test_data.pickle'), 'rb') as f:
    test_data = pickle.load(f)