# Dogs vs Cats Training

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [2]:
tf.debugging.set_log_device_placement(True)

## Prepare the DATA

In [3]:
DIR = 'dogs_cats'
DIR_TRAIN = os.path.join(DIR,'train')
DIR_TEST  = os.path.join(DIR,'test')
TAGS = ['cat','dog']
IMG_SIZE = 100

Take a look at data

In [None]:
for f in os.listdir(DIR_TRAIN):
    print(os.path.join(DIR_TRAIN,f))
    img_array = cv2.imread(os.path.join(DIR_TRAIN,f),cv2.IMREAD_GRAYSCALE)
    plt.imshow(img_array,cmap='gray')
    plt.show()
    break

Resize the img for all

In [None]:
new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
print(new_array)
plt.imshow(new_array,cmap = 'gray')
plt.show()

build the labeled data

In [23]:
%%time
def create_label_data(DIR_TRAIN, TAGS,IMG_SIZE = 100):
    label_data = []
    
    for f in os.listdir(DIR_TRAIN):
        
        img_array = cv2.imread(os.path.join(DIR_TRAIN,f),cv2.IMREAD_GRAYSCALE)
        new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
        
        y = [ 1 if tag in f else 0 for tag in TAGS]
        
        label_data.append([new_array, y])       
            
    return label_data
label_data = create_label_data(DIR_TRAIN, TAGS)

CPU times: user 13.2 s, sys: 313 ms, total: 13.5 s
Wall time: 13.5 s


Take check if group is right

In [None]:
plt.imshow(label_data[0][0],cmap='gray')
plt.show()
print(label_data[0][1])

plt.imshow(label_data[1][0],cmap='gray')
plt.show()
print(label_data[1][1])

save the data as pickle

In [25]:
import pickle 

with open(os.path.join(DIR,'label_data.pickle'), 'wb') as f:
    pickle.dump(label_data, f)

load the data

In [5]:
import pickle 
with open(os.path.join(DIR,'label_data.pickle'), 'rb') as f:
    label_data = pickle.load(f)

add some random feature

In [6]:
import random
random.shuffle(label_data)

divide the data into train and validate

In [7]:
def train_validation_data(label_data,ratio):
    x_train = []
    y_train = []

    for x,y in label_data:
        x_train.append(x)
        y_train.append(y)
    index = int(len(y_train)*ratio)
    return (x_train[:index],y_train[:index]),(x_train[index:],y_train[index:])

train_data, validate_data = train_validation_data(label_data,0.8)

In [27]:
# X = np.array(x_train)
# print(X.shape)
# print(X[0].flatten()[:100])
# print(X.reshape(X.shape[0],-1)[0][:100])

## Train the Model

In [8]:
x_train = train_data[0]
y_train = train_data[1]

In [9]:
x = tf.keras.utils.normalize(x_train)
print(x[0])
y = np.array(y_train).reshape(-1,2)
print(y.shape)
print(x.shape)

[[0.05802693 0.05802693 0.05687788 ... 0.01091596 0.00919238 0.00861786]
 [0.05991238 0.05756288 0.05756288 ... 0.0099854  0.00939802 0.00822327]
 [0.05801943 0.05683536 0.05742739 ... 0.01065663 0.00947256 0.00828849]
 ...
 [0.14079574 0.15112937 0.0968778  ... 0.04004282 0.02970919 0.02454238]
 [0.09106778 0.06995062 0.13462194 ... 0.06995062 0.07918938 0.10558584]
 [0.12259229 0.12743146 0.15001425 ... 0.0887181  0.0371003  0.08549199]]
(20000, 2)
(20000, 100, 100)


In [10]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128,activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(128,activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(2,activation=tf.nn.softmax)) 

In [11]:
%%time
model.compile(
    optimizer= 'Adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
model.fit(x,y, epochs=10)

Executing op Fill in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarIsInitializedOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op LogicalNot in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Assert in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op RangeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op FlatMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op TensorDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in 

<tensorflow.python.keras.callbacks.History at 0x7f859c61de10>

## Validation Model

In [12]:
x_validate = validate_data[0]
y_validate = validate_data[1]

x = tf.keras.utils.normalize(x_validate)
y = np.array(y_validate).reshape(-1,2)

In [13]:
val_loss, val_acc = model.evaluate(x,y)

Executing op RangeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op FlatMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op TensorDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ZipDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ParallelMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AnonymousIteratorV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op __inference_test_function_13352 in device /job:localhost/replica:0/task:0/devic

## Test

In [None]:
%%time
def create_test_data(DIR, IMG_SIZE = 100):
    test_data = []
    
    for f in os.listdir(DIR):
        
        img_array = cv2.imread(os.path.join(DIR,f),cv2.IMREAD_GRAYSCALE)
        new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
        
        test_data.append([new_array])       
            
    return test_data
test_data = create_test_data(DIR_TEST)

In [None]:
with open(os.path.join(DIR,'test_data.pickle'), 'wb') as f:
    pickle.dump(test_data, f)

In [None]:
with open(os.path.join(DIR,'test_data.pickle'), 'rb') as f:
    test_data = pickle.load(f)