In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_curve, ConfusionMatrixDisplay, PrecisionRecallDisplay
import seaborn as sns
from pathlib import Path


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# Define base directory
data_dir = Path("D:/datasets/cats_dogs")

# Subset data
def create_subset(source_dir, target_dir, start_idx, end_idx):
    os.makedirs(target_dir, exist_ok=True)
    for label in ['cat', 'dog']:
        label_dir = os.path.join(target_dir, label)
        os.makedirs(label_dir, exist_ok=True)
        for i in range(start_idx, end_idx):
            src_file = os.path.join(source_dir, f"{label}.{i}.jpg")
            dst_file = os.path.join(label_dir, f"{label}.{i}.jpg")
            if os.path.exists(src_file):
                os.rename(src_file, dst_file)

base_dir = Path("D:/datasets/cats_dogs_small")
create_subset(data_dir, base_dir / 'train', 0, 1000)
create_subset(data_dir, base_dir / 'validation', 1000, 1500)
create_subset(data_dir, base_dir / 'test', 1500, 2000)


In [None]:
train_dir = base_dir / "train"

class_counts = {folder.name: len(list(folder.glob("*.jpg"))) for folder in train_dir.iterdir()}
sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()))
plt.title("Class Distribution")
plt.xlabel("Class")
plt.ylabel("Image Count")
plt.show()


In [None]:
# Load datasets
train_ds = tf.keras.utils.image_dataset_from_directory(base_dir / "train", image_size=(224, 224), batch_size=32)
val_ds = tf.keras.utils.image_dataset_from_directory(base_dir / "validation", image_size=(224, 224), batch_size=32)
test_ds = tf.keras.utils.image_dataset_from_directory(base_dir / "test", image_size=(224, 224), batch_size=32)

# Normalize data
normalization_layer = layers.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y))


In [None]:
# Define CNN
cnn_model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dropout(0.5),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

cnn_callbacks = [
    tf.keras.callbacks.ModelCheckpoint("cnn_best_model.h5", save_best_only=True, monitor="val_loss")
]

# Train CNN
cnn_history = cnn_model.fit(train_ds, epochs=20, validation_data=val_ds, callbacks=cnn_callbacks)


In [None]:
# Load Pre-trained VGG16
vgg16_base = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
vgg16_base.trainable = False

# Add custom classifier
vgg16_model = models.Sequential([
    vgg16_base,
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

vgg16_model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy'])

vgg16_callbacks = [
    tf.keras.callbacks.ModelCheckpoint("vgg16_best_model.h5", save_best_only=True, monitor="val_loss")
]

# Train VGG16
vgg16_history = vgg16_model.fit(train_ds, epochs=15, validation_data=val_ds, callbacks=vgg16_callbacks)


In [None]:
cnn_model = tf.keras.models.load_model("cnn_best_model.h5")
vgg16_model = tf.keras.models.load_model("vgg16_best_model.h5")


In [None]:
for model, name in [(cnn_model, "CNN"), (vgg16_model, "VGG16")]:
    test_loss, test_acc = model.evaluate(test_ds)
    print(f"{name} Test Accuracy: {test_acc:.2f}")

    # Predictions
    true_labels = np.concatenate([y for _, y in test_ds], axis=0)
    pred_probs = model.predict(test_ds).ravel()
    preds = (pred_probs > 0.5).astype(int)

    # Confusion Matrix
    cm = confusion_matrix(true_labels, preds)
    ConfusionMatrixDisplay(cm, display_labels=["Cat", "Dog"]).plot(cmap="Blues")
    plt.title(f"{name} Confusion Matrix")
    plt.show()

    # Classification Report
    print(f"{name} Classification Report:")
    print(classification_report(true_labels, preds, target_names=["Cat", "Dog"]))

    # Precision-Recall Curve
    precision, recall, _ = precision_recall_curve(true_labels, pred_probs)
    PrecisionRecallDisplay(precision=precision, recall=recall).plot()
    plt.title(f"{name} Precision-Recall Curve")
    plt.show()


In [None]:
for model, name in [(cnn_model, "CNN"), (vgg16_model, "VGG16")]:
    misclassified_idx = np.where(true_labels != preds)[0]
    print(f"{name} Misclassified Samples: {len(misclassified_idx)}")
    for i, idx in enumerate(misclassified_idx[:5]):  # Show up to 5 examples
        image, label = test_ds.unbatch().as_numpy_iterator().__next__()
        plt.imshow(image.astype("uint8"))
        plt.title(f"True: {'Dog' if true_labels[idx] else 'Cat'}, Pred: {'Dog' if preds[idx] else 'Cat'}")
        plt.axis("off")
        plt.show()
