# Cat-Dog Classification

data received from [Kaggle Cats and Dogs Dataset](https://www.microsoft.com/en-us/download/details.aspx?id=54765)

# Imports

In [1]:
import os
from PIL import Image
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
print(tf.__version__)

2.19.0


# Take in the images and label them dog and cat

In [3]:
X = []
y = []

# load all dog data
for img_name in os.listdir("Data/Dog"):
    img_path = os.path.join("Data/Dog", img_name)

    #re-size images to 64 by 64
    img = Image.open(img_path).convert("RGB").resize((64, 64))
    img = np.array(img) / 255.0

    # add images to arrays
    X.append(img)
    y.append("Dog")

# load all cat data
for img_name in os.listdir("Data/Cat"):
    img_path = os.path.join("Data/Cat", img_name)

    #re-size images to 64 by 64
    img = Image.open(img_path).convert("RGB").resize((64, 64))
    img = np.array(img) / 255.0

    # add images to arrays
    X.append(img)
    y.append("cat")

X = np.array(X)
y = np.array(y)



# Encode and split the data into test and training

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# get validation data
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_val = label_encoder.fit_transform(y_val)

# Create CNN Model

In [5]:
# Specify the number of filters and their sizes
# Dropout randomly drops half the neurons form the dense call (prevents overfitting)
# Dense 1 is classification of cat or dog

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Train the model
- x_train = image data
- y_train = labels
- epochs = number of times trained

In [6]:
model = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 55ms/step - accuracy: 0.5638 - loss: 0.7012 - val_accuracy: 0.7048 - val_loss: 0.5792
Epoch 2/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 49ms/step - accuracy: 0.7061 - loss: 0.5740 - val_accuracy: 0.7310 - val_loss: 0.5500
Epoch 3/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 51ms/step - accuracy: 0.7547 - loss: 0.5067 - val_accuracy: 0.7765 - val_loss: 0.4766
Epoch 4/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 52ms/step - accuracy: 0.7862 - loss: 0.4564 - val_accuracy: 0.7900 - val_loss: 0.4563
Epoch 5/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 52ms/step - accuracy: 0.8130 - loss: 0.4076 - val_accuracy: 0.8060 - val_loss: 0.4367
Epoch 6/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 55ms/step - accuracy: 0.8350 - loss: 0.3675 - val_accuracy: 0.8125 - val_loss: 0.4330
Epoch 7/10
[1m5