# STEP 1 : Data Setup 

In [2]:
!pip install split-folders



In [3]:
import splitfolders
input_folder = "data/PetImages"
output_folder = "data/split_data"
splitfolders.ratio(input_folder, output=output_folder, seed=42, ratio=(.8, .1, .1))
# This will split the data into training, validation, and test sets with an 80-10-10 ratio.

In [15]:
from PIL import Image, UnidentifiedImageError
import os, shutil

root = './data/split_data/train'
quarantine = './data/split_data/corrupt_or_unreadable'
os.makedirs(quarantine, exist_ok=True)

moved = 0
for subdir, dirs, files in os.walk(root):
    for fname in files:
        path = os.path.join(subdir, fname)
        try:
            with Image.open(path) as im:
                if im.mode == 'LA' or im.mode == 'P':
                    # optional: move these too if you don't want conversion
                    dest = os.path.join(quarantine, os.path.relpath(path, start=root).replace(os.sep, '_'))
                    shutil.move(path, dest)
                    moved += 1
        except:
            dest = os.path.join(quarantine, os.path.relpath(path, start=root).replace(os.sep, '_'))
            shutil.move(path, dest)
            moved += 1

print(f'Moved {moved} files to {quarantine}')


Moved 0 files to ./data/split_data/corrupt_or_unreadable


# Step 2 : Data Preprocessing 

In [16]:
import tensorflow as tf 
from tensorflow import keras
from keras import Sequential 
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense 

In [17]:
train_ds = keras.utils.image_dataset_from_directory(
    directory = './data/split_data/train',
    labels = 'inferred',
    label_mode = 'int',
    batch_size = 32,
    image_size = (256,256),
)
validation_ds = keras.utils.image_dataset_from_directory(
    directory = './data/split_data/val',
    labels = 'inferred',
    label_mode = 'int',
    batch_size = 32,
    image_size = (256,256),
)
test_ds = keras.utils.image_dataset_from_directory(
    directory = './data/split_data/test',
    labels = 'inferred',
    label_mode = 'int',
    batch_size = 32,
    image_size = (256,256),
)

Found 19949 files belonging to 2 classes.
Found 2500 files belonging to 2 classes.
Found 2502 files belonging to 2 classes.


In [18]:
# Normalization Layer 
def process(image, label):
    image = tf.cast(image/255.0,tf.float32)
    return image, label 

train_ds = train_ds.map(process)
validation_ds = validation_ds.map(process)
test_ds = test_ds.map(process)

# STEP3: CNN ARCHITECTURE

In [19]:
model = Sequential()
# CNN LAYERS 
model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu', input_shape=(256,256,3)))
model.add(MaxPool2D(pool_size=(2,2), strides=2))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=2))
model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2), strides=2))
model.add(Flatten())
# ANN LAYERS
model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [20]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(train_ds, validation_data=validation_ds, epochs=10,validation_steps=0.25)

In [None]:
evaluation = model.evaluate(test_ds)

[1m27/79[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m5s[0m 96ms/step - accuracy: 0.6028 - loss: 0.6698

Corrupt JPEG data: 128 extraneous bytes before marker 0xd9


[1m43/79[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m3s[0m 96ms/step - accuracy: 0.5970 - loss: 0.6700

Corrupt JPEG data: 1403 extraneous bytes before marker 0xd9


[1m53/79[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m2s[0m 96ms/step - accuracy: 0.5970 - loss: 0.6693

InvalidArgumentError: Graph execution error:

Detected at node decode_image/DecodeImage defined at (most recent call last):
<stack traces unavailable>
Input size should match (header_size + row_size * abs_height) but they differ by 2
	 [[{{node decode_image/DecodeImage}}]]
	 [[IteratorGetNext]] [Op:__inference_multi_step_on_iterator_9945]

In [None]:
import matplotlib.pyplot as plt 

plt.plot(history.history['accuracy'], color='red',label='Train')
plt.plot(history.history['val_accuracy'], color='blue',label='Validation')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['loss'], color='red',label='Train')
plt.plot(history.history['val_loss'], color='blue',label='Validation')
plt.legend()
plt.show()

In [None]:
# to reduce overfitting we can 
# - add dropout layers
# - L1 and L2 regularization
# - batch normalization
# - reduce complexity of model
# --- IGNORE ---