In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, precision_recall_curve, average_precision_score

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

In [None]:
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [None]:
dataset_path = Path("processed_dataset")

classes = ['pen', 'pencil']
for class_name in classes:
    class_path = dataset_path / class_name
    num_images = len(list(class_path.glob('*.jpg')))
    print(f"{class_name.capitalize()}: {num_images} images")

fig, axes = plt.subplots(2, 5, figsize=(15, 6))
fig.suptitle("Sample Images from Dataset", fontsize=16, fontweight='bold')

for i, class_name in enumerate(classes):
    class_path = dataset_path / class_name
    image_files = list(class_path.glob('*.jpg'))[:5]
    
    for j, img_path in enumerate(image_files):
        img = load_img(img_path, target_size=(224, 224))
        axes[i, j].imshow(img)
        axes[i, j].axis('off')
        axes[i, j].set_title(f"{class_name}", fontsize=10)

plt.tight_layout()
plt.show()

In [None]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 20
LEARNING_RATE = 0.0001

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

test_datagen = ImageDataGenerator(
    rescale=1./255
)

In [None]:
train_generator = train_datagen.flow_from_directory(
    str(dataset_path),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training',
    shuffle=True,
    seed=SEED
)

validation_generator = train_datagen.flow_from_directory(
    str(dataset_path),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation',
    shuffle=False,
    seed=SEED
)

test_generator = test_datagen.flow_from_directory(
    str(dataset_path),
    target_size=IMG_SIZE,
    batch_size=1,
    class_mode='binary',
    shuffle=False
)

print(f"\nClass indices: {train_generator.class_indices}")
print(f"Training samples: {train_generator.samples}")
print(f"Validation samples: {validation_generator.samples}")
print(f"Test samples: {test_generator.samples}")

In [None]:
base_model = MobileNetV2(
    input_shape=(*IMG_SIZE, 3),
    include_top=False,
    weights='imagenet'
)

base_model.trainable = False

print(f"Base model: MobileNetV2")
print(f"Total layers in base model: {len(base_model.layers)}")
print(f"Base model trainable: {base_model.trainable}")

In [None]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=output)

model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
)

print("\nModel Summary:")
model.summary()

In [None]:
checkpoint = ModelCheckpoint(
    'best_model.keras',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-7,
    verbose=1
)

callbacks = [checkpoint, early_stop, reduce_lr]

In [None]:
print("Starting training...")
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=EPOCHS,
    callbacks=callbacks,
    verbose=1
)

print("\nTraining completed!")

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

axes[0].plot(history.history['accuracy'], label='Training Accuracy', marker='o')
axes[0].plot(history.history['val_accuracy'], label='Validation Accuracy', marker='s')
axes[0].set_title('Model Accuracy Over Epochs', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

axes[1].plot(history.history['loss'], label='Training Loss', marker='o')
axes[1].plot(history.history['val_loss'], label='Validation Loss', marker='s')
axes[1].set_title('Model Loss Over Epochs', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

axes[2].plot(history.history['auc'], label='Training AUC', marker='o')
axes[2].plot(history.history['val_auc'], label='Validation AUC', marker='s')
axes[2].set_title('Model AUC Over Epochs', fontsize=14, fontweight='bold')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('AUC')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
test_loss, test_accuracy, test_auc = model.evaluate(test_generator, verbose=1)

print(f"\n{'='*50}")
print(f"Test Results:")
print(f"{'='*50}")
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"Test AUC: {test_auc:.4f}")
print(f"{'='*50}")

In [None]:
test_generator.reset()

y_pred_proba = model.predict(test_generator, verbose=1)
y_pred = (y_pred_proba > 0.5).astype(int).flatten()
y_true = test_generator.classes

class_names = list(test_generator.class_indices.keys())
print(f"\nClass names: {class_names}")
print(f"Total predictions: {len(y_pred)}")
print(f"Predictions shape: {y_pred.shape}")
print(f"True labels shape: {y_true.shape}")

In [None]:
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names,
            cbar_kws={'label': 'Count'},
            annot_kws={'size': 16})
plt.title('Confusion Matrix', fontsize=16, fontweight='bold', pad=20)
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.show()

print("\nConfusion Matrix Breakdown:")
print(f"True Negatives (Pen predicted as Pen): {cm[0][0]}")
print(f"False Positives (Pen predicted as Pencil): {cm[0][1]}")
print(f"False Negatives (Pencil predicted as Pen): {cm[1][0]}")
print(f"True Positives (Pencil predicted as Pencil): {cm[1][1]}")

In [None]:
report = classification_report(y_true, y_pred, target_names=class_names, digits=4)

print("\n" + "="*60)
print("CLASSIFICATION REPORT")
print("="*60)
print(report)

from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_true, y_pred, average='binary')
recall = recall_score(y_true, y_pred, average='binary')
f1 = f1_score(y_true, y_pred, average='binary')

metrics = ['Precision', 'Recall', 'F1-Score', 'Accuracy']
values = [precision, recall, f1, test_accuracy]

plt.figure(figsize=(10, 6))
bars = plt.bar(metrics, values, color=['#3498db', '#2ecc71', '#f39c12', '#e74c3c'], alpha=0.8, edgecolor='black')
plt.ylim(0, 1.1)
plt.ylabel('Score', fontsize=12)
plt.title('Model Performance Metrics', fontsize=16, fontweight='bold', pad=20)
plt.grid(axis='y', alpha=0.3)

for bar, value in zip(bars, values):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 0.02,
             f'{value:.4f}', ha='center', va='bottom', fontweight='bold', fontsize=11)

plt.tight_layout()
plt.show()

In [None]:
num_samples = 16
indices = np.random.choice(len(test_generator.filenames), num_samples, replace=False)

fig, axes = plt.subplots(4, 4, figsize=(16, 16))
fig.suptitle('Sample Predictions with Confidence Scores', fontsize=18, fontweight='bold', y=0.995)

for idx, ax in zip(indices, axes.flatten()):
    img_path = Path(dataset_path) / test_generator.filenames[idx]
    img = load_img(img_path, target_size=IMG_SIZE)
    
    true_label = class_names[y_true[idx]]
    pred_label = class_names[y_pred[idx]]
    confidence = y_pred_proba[idx][0] if y_pred[idx] == 1 else 1 - y_pred_proba[idx][0]
    
    ax.imshow(img)
    ax.axis('off')
    
    color = 'green' if true_label == pred_label else 'red'
    title = f'True: {true_label}\nPred: {pred_label}\nConf: {confidence:.2%}'
    ax.set_title(title, fontsize=10, color=color, fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, color='darkorange', lw=3, label=f'ROC Curve (AUC = {roc_auc:.4f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Classifier (AUC = 0.5)')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate', fontsize=12)
plt.ylabel('True Positive Rate', fontsize=12)
plt.title('Receiver Operating Characteristic (ROC) Curve', fontsize=16, fontweight='bold', pad=20)
plt.legend(loc="lower right", fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"\nROC AUC Score: {roc_auc:.4f}")

In [None]:
precision_curve, recall_curve, _ = precision_recall_curve(y_true, y_pred_proba)
avg_precision = average_precision_score(y_true, y_pred_proba)

plt.figure(figsize=(10, 8))
plt.plot(recall_curve, precision_curve, color='blue', lw=3, label=f'PR Curve (AP = {avg_precision:.4f})')
plt.axhline(y=precision, color='red', linestyle='--', lw=2, label=f'Current Precision = {precision:.4f}')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Recall', fontsize=12)
plt.ylabel('Precision', fontsize=12)
plt.title('Precision-Recall Curve', fontsize=16, fontweight='bold', pad=20)
plt.legend(loc="lower left", fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"\nAverage Precision Score: {avg_precision:.4f}")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

axes[0].hist(y_pred_proba[y_true == 0], bins=30, alpha=0.7, label='Pen (Class 0)', color='blue', edgecolor='black')
axes[0].hist(y_pred_proba[y_true == 1], bins=30, alpha=0.7, label='Pencil (Class 1)', color='orange', edgecolor='black')
axes[0].axvline(x=0.5, color='red', linestyle='--', linewidth=2, label='Decision Threshold')
axes[0].set_xlabel('Predicted Probability', fontsize=12)
axes[0].set_ylabel('Frequency', fontsize=12)
axes[0].set_title('Distribution of Prediction Probabilities by Class', fontsize=14, fontweight='bold')
axes[0].legend(fontsize=11)
axes[0].grid(True, alpha=0.3)

confidence_pen = y_pred_proba[y_true == 0].flatten()
confidence_pencil = y_pred_proba[y_true == 1].flatten()

box_data = [1 - confidence_pen, confidence_pencil]
box_plot = axes[1].boxplot(box_data, labels=class_names, patch_artist=True,
                            showmeans=True, meanline=True)

colors = ['lightblue', 'lightcoral']
for patch, color in zip(box_plot['boxes'], colors):
    patch.set_facecolor(color)

axes[1].set_ylabel('Confidence Score', fontsize=12)
axes[1].set_title('Confidence Score Distribution by True Class', fontsize=14, fontweight='bold')
axes[1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

print(f"\nMean confidence for Pen predictions: {np.mean(1 - confidence_pen):.4f}")
print(f"Mean confidence for Pencil predictions: {np.mean(confidence_pencil):.4f}")

In [None]:
# Create a comprehensive summary visualization
fig, ax = plt.subplots(figsize=(12, 8))
ax.axis('off')

summary_text = f"""
╔════════════════════════════════════════════════════════════════════════╗
║                    BINARY CLASSIFICATION RESULTS SUMMARY                    ║
╠════════════════════════════════════════════════════════════════════════╣
║                                                                              ║
║  Dataset Information:                                                        ║
║  ├─ Total Training Samples: {train_generator.samples:<40}  ║
║  ├─ Total Validation Samples: {validation_generator.samples:<38}  ║
║  └─ Total Test Samples: {test_generator.samples:<44}  ║
║                                                                              ║
║  Model Architecture:                                                         ║
║  ├─ Base Model: MobileNetV2 (Transfer Learning)                             ║
║  ├─ Input Size: 224x224x3                                                   ║
║  └─ Output: Binary Classification (Pen vs Pencil)                           ║
║                                                                              ║
║  Performance Metrics:                                                        ║
║  ├─ Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)                                      ║
║  ├─ Test Loss: {test_loss:.4f}                                                   ║
║  ├─ Precision: {precision:.4f}                                                   ║
║  ├─ Recall: {recall:.4f}                                                      ║
║  ├─ F1-Score: {f1:.4f}                                                      ║
║  ├─ ROC AUC: {roc_auc:.4f}                                                    ║
║  └─ Average Precision: {avg_precision:.4f}                                        ║
║                                                                              ║
║  Confusion Matrix:                                                           ║
║  ├─ True Negatives (Pen → Pen): {cm[0][0]:<35}  ║
║  ├─ False Positives (Pen → Pencil): {cm[0][1]:<32}  ║
║  ├─ False Negatives (Pencil → Pen): {cm[1][0]:<32}  ║
║  └─ True Positives (Pencil → Pencil): {cm[1][1]:<30}  ║
║                                                                              ║
╚════════════════════════════════════════════════════════════════════════╝
"""

ax.text(0.5, 0.5, summary_text, 
        fontsize=11, 
        family='monospace',
        verticalalignment='center',
        horizontalalignment='center',
        bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))

plt.title('Model Evaluation Summary', fontsize=18, fontweight='bold', pad=20)
plt.tight_layout()
plt.show()

print("\n✓ Notebook execution completed successfully!")
print("✓ Model trained using transfer learning with MobileNetV2")
print("✓ All visualizations and metrics generated")