
# **BINARY IMAGE CLASSIFIER BY KRISHANG MAHESHWARI**

**IMPORTING LIBRARIES**

In [12]:
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

**DATA ACUISIITION**

In [13]:
!pip install kagglehub -q
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix
import kagglehub
import os

# Download latest version
path = kagglehub.dataset_download("bhavikjikadara/dog-and-cat-classification-dataset")

print("Path to dataset files:", path)
# List files to see the folder structure
print(os.listdir(path))

Using Colab cache for faster access to the 'dog-and-cat-classification-dataset' dataset.
Path to dataset files: /kaggle/input/dog-and-cat-classification-dataset
['PetImages']


**DATA LOADING**

In [15]:
import os

# 1. SET THE CORRECT PATH
# Since the only folder is 'PetImages', we point directly to it.
base_data_dir = os.path.join(path, 'PetImages')

# Verify the subfolders exist (should see ['Cat', 'Dog'] or similar)
if os.path.exists(base_data_dir):
    print(f"✅ Found data directory at: {base_data_dir}")
    print(f"Classes found: {os.listdir(base_data_dir)}")
else:
    # If it's nested even deeper, this find_dir will grab it
    def find_pet_images(root_path):
        for root, dirs, _ in os.walk(root_path):
            if 'PetImages' in dirs:
                return os.path.join(root, 'PetImages')
        return None
    base_data_dir = find_pet_images(path)
    print(f"✅ Found nested directory at: {base_data_dir}")

# 2. LOAD AND SPLIT DATA
IMG_SIZE = (160, 160)
BATCH_SIZE = 32

# Training Set (80%)
train_ds = tf.keras.utils.image_dataset_from_directory(
    base_data_dir,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

# Validation/Test Set (20%)
val_ds = tf.keras.utils.image_dataset_from_directory(
    base_data_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

# Since there is no separate 'test' folder, we use val_ds for evaluation.
test_ds = val_ds

✅ Found data directory at: /kaggle/input/dog-and-cat-classification-dataset/PetImages
Classes found: ['Dog', 'Cat']
Found 24998 files belonging to 2 classes.
Using 19999 files for training.
Found 24998 files belonging to 2 classes.
Using 4999 files for validation.


**MODEL BUILDING**

In [16]:
# Load MobileNetV2 without the top classification layer
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(160, 160, 3),
    include_top=False,
    weights='imagenet'
)

# Freeze the pretrained weights
base_model.trainable = False



model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_160_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


**TRAINING MODEL**

In [None]:
history = model.fit(train_ds, validation_data=val_ds, epochs=1)

[1m264/625[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m4:38[0m 770ms/step - accuracy: 0.6608 - loss: 0.6101

**EVALUATION**

In [None]:
# Collect true labels and predictions
y_true = tf.concat([y for x, y in test_ds], axis=0)
y_pred_probs = model.predict(test_ds)
y_pred = (y_pred_probs > 0.5).astype("int32")

# Print Metrics
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=['Cat', 'Dog']))

print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred))