DATA COLLECTION

In [1]:
import requests
import os

url = 'https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip'
downloads_folder = os.path.join(os.path.expanduser('~'), 'Downloads')
output_file = os.path.join(downloads_folder, 'cats-and-dogs.zip')

try:
    response = requests.get(url)
    
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        with open(output_file, 'wb') as f:
            f.write(response.content)
        print(f"File downloaded to {output_file}")
    else:
        print(f"Failed to download file: HTTP status code {response.status_code}")

except requests.exceptions.RequestException as e:
    print(f"Error downloading file: {e}")

Failed to download file: HTTP status code 404


download manually from:

https://www.microsoft.com/en-us/download/confirmation.aspx?id=54765

In [2]:
import zipfile

local_zip = r"C:\Users\karat\Downloads\kagglecatsanddogs_5340.zip"
extract_dir = r"C:\Users\karat\Downloads\kagglecatsanddogs_5340"

os.makedirs(extract_dir, exist_ok=True)

with zipfile.ZipFile(local_zip, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f"Contents extracted to: {extract_dir}")

Contents extracted to: C:\Users\karat\Downloads\kagglecatsanddogs_5340


In [3]:
cats = r"C:\Users\karat\Downloads\kagglecatsanddogs_5340\PetImages\Cat\\"
dogs = r"C:\Users\karat\Downloads\kagglecatsanddogs_5340\PetImages\Dog\\"
print(len(os.listdir(cats)))
print(len(os.listdir(dogs))) 

12501
12501


In [4]:
import random
from shutil import copyfile

dir = r"C:\Users\karat\Downloads\kagglecatsanddogs_5340"

try:
    os.makedirs(os.path.join(dir, 'cats-v-dogs', 'training', 'cats'))
    os.makedirs(os.path.join(dir, 'cats-v-dogs', 'testing', 'cats'))
    os.makedirs(os.path.join(dir, 'cats-v-dogs', 'training', 'dogs'))
    os.makedirs(os.path.join(dir, 'cats-v-dogs', 'testing', 'dogs'))
except OSError:
    pass

def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    files = []
    for filename in os.listdir(SOURCE):
        file = os.path.join(SOURCE, filename)
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print(filename + " is zero length, so ignoring.")

    training_length = int(len(files) * SPLIT_SIZE)
    testing_length = int(len(files) - training_length)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[0:training_length]
    testing_set = shuffled_set[training_length:]

    for filename in training_set:
        this_file = os.path.join(SOURCE, filename)
        destination = os.path.join(TRAINING, filename)
        copyfile(this_file, destination)

    for filename in testing_set:
        this_file = os.path.join(SOURCE, filename)
        destination = os.path.join(TESTING, filename)
        copyfile(this_file, destination)

CAT_SOURCE_DIR = os.path.join(dir, "PetImages", "Cat")
TRAINING_CATS_DIR = os.path.join(dir, "cats-v-dogs", "training", "cats")
TESTING_CATS_DIR = os.path.join(dir, "cats-v-dogs", "testing", "cats")
DOG_SOURCE_DIR = os.path.join(dir, "PetImages", "Dog")
TRAINING_DOGS_DIR = os.path.join(dir, "cats-v-dogs", "training", "dogs")
TESTING_DOGS_DIR = os.path.join(dir, "cats-v-dogs", "testing", "dogs")

split_size = 0.9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

666.jpg is zero length, so ignoring.
11702.jpg is zero length, so ignoring.


In [5]:
training_cats_dir = os.path.join(dir, 'cats-v-dogs', 'training', 'cats')
training_dogs_dir = os.path.join(dir, 'cats-v-dogs', 'training', 'dogs')
testing_cats_dir = os.path.join(dir, 'cats-v-dogs', 'testing', 'cats')
testing_dogs_dir = os.path.join(dir, 'cats-v-dogs', 'testing', 'dogs')

print(len(os.listdir(training_cats_dir)))
print(len(os.listdir(training_dogs_dir)))
print(len(os.listdir(testing_cats_dir)))
print(len(os.listdir(testing_dogs_dir)))


12377
12372
2377
2372


MODEL BUILDING

In [6]:
import tensorflow as tf
print(tf.__version__)

2.16.1


In [9]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

In [39]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
]) 


model.compile(
            optimizer=Adam(0.00325),
            loss='binary_crossentropy', 
            metrics=['accuracy']
            )

In [40]:
class GrayscaleImageDataGenerator(ImageDataGenerator):
    def flow_from_directory(self, directory, *args, **kwargs):
        generator = super().flow_from_directory(directory, *args, **kwargs)
        
        for batch in generator:
            batch_x = batch[0]
            batch_x = np.dot(batch_x[..., :3], [0.2989, 0.5870, 0.1140])
            batch_x = np.expand_dims(batch_x, axis=-1) 
            yield batch_x, batch[1]


In [47]:
def brightness_and_contrast_adjustment(image):
    brightness_factor = np.random.uniform(0.8, 1.2)
    contrast_factor = np.random.uniform(0.8, 1.2)
    image = tf.image.adjust_brightness(image, delta=brightness_factor - 1.0)
    image = tf.image.adjust_contrast(image, contrast_factor)
    return image

In [48]:
training_loc = r"C:\Users\karat\Downloads\kagglecatsanddogs_5340\cats-v-dogs\training"

# image augmentation
train_datagen = ImageDataGenerator(rescale=1.0/255.,
                                    #rotation_range=20,
                                    width_shift_range=0.15,    
                                    height_shift_range=0.15, 
                                    #shear_range=0.1,    
                                    zoom_range=0.15,
                                    #horizontal_flip=True,           
                                    fill_mode='nearest',
                                    preprocessing_function=brightness_and_contrast_adjustment)

train_generator = train_datagen.flow_from_directory(training_loc,
                                                    batch_size=100,
                                                    class_mode='binary',
                                                    target_size=(150, 150))
 
valid_loc = r"C:\Users\karat\Downloads\kagglecatsanddogs_5340\cats-v-dogs\testing"
validation_datagen = ImageDataGenerator(rescale=1.0/255.)
validation_generator = validation_datagen.flow_from_directory(valid_loc,
                                                              batch_size=100,
                                                              class_mode='binary',
                                                              target_size=(150, 150))

Found 24747 images belonging to 2 classes.
Found 4749 images belonging to 2 classes.


In [49]:
history = model.fit(train_generator,
                    epochs=10,
                    verbose=1,
                    validation_data=validation_generator)

Epoch 1/10
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m569s[0m 2s/step - accuracy: 0.8891 - loss: 0.2610 - val_accuracy: 0.9303 - val_loss: 0.1659
Epoch 2/10
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m656s[0m 3s/step - accuracy: 0.8964 - loss: 0.2453 - val_accuracy: 0.8951 - val_loss: 0.2377
Epoch 3/10
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1126s[0m 4s/step - accuracy: 0.8912 - loss: 0.2576 - val_accuracy: 0.9231 - val_loss: 0.1783
Epoch 4/10
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m871s[0m 3s/step - accuracy: 0.8970 - loss: 0.2452 - val_accuracy: 0.9290 - val_loss: 0.1696
Epoch 5/10
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m742s[0m 3s/step - accuracy: 0.9024 - loss: 0.2389 - val_accuracy: 0.9147 - val_loss: 0.1943
Epoch 6/10
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m743s[0m 3s/step - accuracy: 0.9026 - loss: 0.2330 - val_accuracy: 0.9151 - val_loss: 0.1972
Epoch 7/10
[1m248/24

In [55]:
acc = history.history['accuracy'][-1]*100
print(f"accuracy: {acc:.2f}%")

accuracy: 90.43%


In [50]:
import numpy as np
from PIL import Image
from keras.preprocessing import image

directory = r"C:\Users\karat\Downloads\testing_images_for_model"

image_files = [file for file in os.listdir(directory) if file.endswith(('jpg', 'jpeg', 'png'))]

for file in image_files:
    img_path = os.path.join(directory, file)

    img = Image.open(img_path).convert("RGB")
    img = img.resize((150, 150))
    
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = x / 255.0
    
    classes = model.predict(x)
    
    if classes[0] > 0.5:
        print(f"{file} is a dog")
    else:
        print(f"{file} is a cat")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step
download_c.jpeg is a cat
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
download_c2.jpeg is a dog
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
download_c3.jpeg is a cat
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
download_d.jpeg is a dog
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
download_d2.jpeg is a dog
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
download_d3.jpeg is a dog
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
download_d4.jpeg is a dog
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
images_c.jpeg is a dog
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
images_c2.jpeg is a cat
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
images_d.jpeg is a dog
