In [1]:
import os
import random
import numpy as np
from io import BytesIO

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import tensorflow as tf
from ipywidgets import widgets


## Importing data and Feature Engineering

In [2]:
# Create the .kaggle directory
!mkdir -p ~/.kaggle

# create kaggle Move the kaggle.json file (assumes it is in the current working directory)
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets list

!kaggle competitions download -c dogs-vs-cats



ref                                                          title                                                     size  lastUpdated                 downloadCount  voteCount  usabilityRating  
-----------------------------------------------------------  --------------------------------------------------  ----------  --------------------------  -------------  ---------  ---------------  
jayaantanaath/student-habits-vs-academic-performance         Student Habits vs Academic Performance                   19512  2025-04-12 10:49:08.663000          18921        323  1.0              
adilshamim8/cost-of-international-education                  Cost of International Education                          18950  2025-05-07 15:41:53.213000           2383         40  1.0              
fatemehmohammadinia/heart-attack-dataset-tarik-a-rashid      Heart Attack Dataset                                     16250  2025-04-30 21:58:22.740000           3394         59  1.0              
glowstudygram/s

In [3]:
import zipfile

local_zip = '/content/dogs-vs-cats.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/content/cat_vs_dog')
zip_ref.close()

In [4]:
base_dir = '/content/cat_vs_dog'
train_zip_path = os.path.join(base_dir, 'train.zip')
test_zip_path = os.path.join(base_dir, 'test1.zip')
train_extract_path = os.path.join(base_dir, 'train')
test_extract_path = os.path.join(base_dir, 'test')


# Extract training data
with zipfile.ZipFile(train_zip_path, 'r') as zip_ref:
    zip_ref.extractall(train_extract_path)

# Extract test data
with zipfile.ZipFile(test_zip_path, 'r') as zip_ref:
    zip_ref.extractall(test_extract_path)


In [5]:
# Check number of files in train and test directories
train_extract_path = os.path.join(train_extract_path, 'train')
test_extract_path = os.path.join(test_extract_path, 'test1')

print("Number of training images:", len(os.listdir(train_extract_path)))
print("Number of test images:", len(os.listdir(test_extract_path)))


Number of training images: 25000
Number of test images: 12500


In [6]:
# Create new directories for cats and dogs
train_cats_dir = os.path.join(train_extract_path, 'cats')
train_dogs_dir = os.path.join(train_extract_path, 'dogs')

os.makedirs(train_cats_dir, exist_ok=True)
os.makedirs(train_dogs_dir, exist_ok=True)



In [7]:
import shutil

In [8]:
base_train_dir = '/content/cat_vs_dog/train/train'
# Iterate through the files in the main train directory

for filename in os.listdir(base_train_dir):
    file_path = os.path.join(base_train_dir, filename)

    # Only consider files (not directories)
    if os.path.isfile(file_path):
        # Move to 'cats' folder
        if filename.startswith('cat'):
            shutil.move(file_path, os.path.join(train_cats_dir, filename))

        # Move to 'dogs' folder
        elif filename.startswith('dog'):
            shutil.move(file_path, os.path.join(train_dogs_dir, filename))

print("Files moved successfully!")

Files moved successfully!


In [10]:
## Validation folder creation
base_train_dir = '/content/cat_vs_dog'

# Define paths
val_dir = os.path.join(base_train_dir, 'val')
os.makedirs(os.path.join(val_dir, 'cats'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'dogs'), exist_ok=True)


In [21]:
print(train_cats_dir)


/content/cat_vs_dog/train/train/cats


In [11]:
from sklearn.model_selection import train_test_split

In [22]:
val_cats_dir = '/content/cat_vs_dog/val/cats'
os.makedirs(val_cats_dir, exist_ok=True)


# Split ratio
split_ratio = 0.8

## making the list from the files
cat_files = os.listdir(train_cats_dir)

# Split Cats
train_cats, val_cats = train_test_split(cat_files, train_size=split_ratio, random_state=42)

for filename in val_cats:

  ## creating the source and destination path directory
    src_path = os.path.join(train_cats_dir, filename)
    dest_path = os.path.join(val_cats_dir, filename)

    # Move only if it is a file
    if os.path.isfile(src_path):
        shutil.move(src_path, dest_path)


print("Files moved successfully!")

Files moved successfully!


In [24]:
val_dogs_dir = '/content/cat_vs_dog/val/dogs'
os.makedirs(val_dogs_dir, exist_ok=True)


# Split ratio
split_ratio = 0.8

## making the list from the files
dog_files = os.listdir(train_dogs_dir)

# Split Cats
train_dogs, val_dogs = train_test_split(dog_files, train_size=split_ratio, random_state=42)

for filename in val_dogs:

  ## creating the source and destination path directory
    src_path = os.path.join(train_dogs_dir, filename)
    dest_path = os.path.join(val_dogs_dir, filename)

    # Move only if it is a file
    if os.path.isfile(src_path):
        shutil.move(src_path, dest_path)


print("Files moved successfully!")

Files moved successfully!


In [25]:
print("Number of training Cat images:", len(os.listdir(train_cats_dir)))
print("Number of training Dog images:", len(os.listdir(train_dogs_dir)))

print("Number of validation Cat images:", len(os.listdir(val_cats_dir)))
print("Number of validation Dog images:", len(os.listdir(val_dogs_dir)))

Number of training Cat images: 10000
Number of training Dog images: 10000
Number of validation Cat images: 2500
Number of validation Dog images: 2500


## Model Building

In [27]:
model = tf.keras.models.Sequential([
    # Rescale the image. Note the input shape is the desired size of the image: 150x150 with 3 bytes for color

    tf.keras.Input(shape=(150, 150, 3)),
    tf.keras.layers.Rescaling(1./255),

    # Convolution and Pooling layers
    tf.keras.layers.Conv2D(16, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),

    # 512 neuron hidden layer

    tf.keras.layers.Dense(512, activation='relu'),
    # Only 1 output neuron. It will contain a value from 0-1 where 0 for one class ('cats') and 1 for the other ('dogs')

    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.summary()

In [28]:
model.compile(
    optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics = ['accuracy']
    )

In [37]:
train_dir ='/content/cat_vs_dog/train/train'
val_dir ='/content/cat_vs_dog/val'

# Instantiate the Dataset object for the training set
train_dataset = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    image_size=(150, 150),
    batch_size=50,
    label_mode='binary'
    )

# Instantiate the Dataset object for the validation set
validation_dataset = tf.keras.utils.image_dataset_from_directory(
    val_dir,
    image_size=(150, 150),
    batch_size=50,
    label_mode='binary'
    )

Found 20000 files belonging to 2 classes.
Found 5000 files belonging to 2 classes.


In [36]:
SHUFFLE_BUFFER_SIZE = 1000
PREFETCH_BUFFER_SIZE = tf.data.AUTOTUNE

train_dataset_final = train_dataset.cache().shuffle(SHUFFLE_BUFFER_SIZE).prefetch(PREFETCH_BUFFER_SIZE)
validation_dataset_final = validation_dataset.cache().prefetch(PREFETCH_BUFFER_SIZE)

In [35]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Restrict TensorFlow to only use the first GPU
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

Num GPUs Available:  1
1 Physical GPUs, 1 Logical GPUs


In [38]:
history = model.fit(
    train_dataset_final,
    epochs=15,
    validation_data=validation_dataset_final,
    verbose=2
    )

Epoch 1/15
400/400 - 45s - 114ms/step - accuracy: 0.6392 - loss: 0.6384 - val_accuracy: 0.7170 - val_loss: 0.5477
Epoch 2/15
400/400 - 7s - 18ms/step - accuracy: 0.7471 - loss: 0.5110 - val_accuracy: 0.7658 - val_loss: 0.4849
Epoch 3/15
400/400 - 8s - 19ms/step - accuracy: 0.7926 - loss: 0.4411 - val_accuracy: 0.7858 - val_loss: 0.4548
Epoch 4/15
400/400 - 7s - 18ms/step - accuracy: 0.8270 - loss: 0.3822 - val_accuracy: 0.8000 - val_loss: 0.4643
Epoch 5/15
400/400 - 7s - 18ms/step - accuracy: 0.8553 - loss: 0.3250 - val_accuracy: 0.8134 - val_loss: 0.4024
Epoch 6/15
400/400 - 7s - 18ms/step - accuracy: 0.8928 - loss: 0.2530 - val_accuracy: 0.8134 - val_loss: 0.4271
Epoch 7/15
400/400 - 9s - 24ms/step - accuracy: 0.9335 - loss: 0.1655 - val_accuracy: 0.7980 - val_loss: 0.5786
Epoch 8/15
400/400 - 8s - 20ms/step - accuracy: 0.9636 - loss: 0.1024 - val_accuracy: 0.8166 - val_loss: 0.5471
Epoch 9/15
400/400 - 7s - 18ms/step - accuracy: 0.9821 - loss: 0.0530 - val_accuracy: 0.8068 - val_los

In [39]:
# Create the widget and take care of the display
uploader = widgets.FileUpload(accept="image/*", multiple=True)
display(uploader)
out = widgets.Output()
display(out)

def file_predict(filename, file, out):
    """ A function for creating the prediction and printing the output."""
    image = tf.keras.utils.load_img(file, target_size=(150, 150))
    image = tf.keras.utils.img_to_array(image)
    image = np.expand_dims(image, axis=0)

    prediction = model.predict(image, verbose=0)[0][0]

    with out:
        if prediction > 0.5:
            print(filename + " is a dog")
        else:
            print(filename + " is a cat")


def on_upload_change(change):
    """ A function for geting files from the widget and running the prediction."""
    # Get the newly uploaded file(s)

    items = change.new
    for item in items: # Loop if there is more than one file uploaded
        file_jpgdata = BytesIO(item.content)
        file_predict(item.name, file_jpgdata, out)


uploader.observe(on_upload_change, names='value')

FileUpload(value={}, accept='image/*', description='Upload', multiple=True)

Output()

AttributeError: 'str' object has no attribute 'content'