# # Machine Learning Pipeline: Image Classification

# This notebook demonstrates the end-to-end process for an image classification task,
# including data loading, preprocessing, model training, and evaluation.

In [1]:
# ## 1. Setup and Imports

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
import numpy as np
import matplotlib.pyplot as plt
import os
import shutil

print(f"TensorFlow Version: {tf.__version__}")

2025-08-02 11:32:36.646097: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-02 11:32:36.655913: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-02 11:32:36.667492: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-02 11:32:36.696688: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754127156.735486   69988 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754127156.73

TensorFlow Version: 2.19.0


In [None]:
## 2. Data Acquisition and Preprocessing

# We'll use `ImageDataGenerator` for efficient loading and augmentation of image data.
# Ensure your `data/train` and `data/test` directories are structured with subdirectories
# for each class (e.g., `data/train/class_a`, `data/train/class_b`).

# Define image dimensions and batch size
IMG_HEIGHT = 128
IMG_WIDTH = 128
BATCH_SIZE = 32
NUM_CLASSES = 2 # Adjust based on your dataset (e.g., 2 for binary, >2 for multi-class)

# Paths to your data directories
TRAIN_DIR = '../data/train'
TEST_DIR = '../data/test'

# Create dummy data directories and files for demonstration if they don't exist
# In a real scenario, you would have your actual images here.
if not os.path.exists(TRAIN_DIR):
    os.makedirs(os.path.join(TRAIN_DIR, 'class_a'), exist_ok=True)
    os.makedirs(os.path.join(TRAIN_DIR, 'class_b'), exist_ok=True)
    # Create dummy files (e.g., empty text files as placeholders)
    with open(os.path.join(TRAIN_DIR, 'class_a', 'dummy_a1.txt'), 'w') as f: f.write('')
    with open(os.path.join(TRAIN_DIR, 'class_b', 'dummy_b1.txt'), 'w') as f: f.write('')
    print(f"Created dummy train directories: {TRAIN_DIR}")

if not os.path.exists(TEST_DIR):
    os.makedirs(os.path.join(TEST_DIR, 'class_a'), exist_ok=True)
    os.makedirs(os.path.join(TEST_DIR, 'class_b'), exist_ok=True)
    with open(os.path.join(TEST_DIR, 'class_a', 'dummy_a_test1.txt'), 'w') as f: f.write('')
    with open(os.path.join(TEST_DIR, 'class_b', 'dummy_b_test1.txt'), 'w') as f: f.write('')
    print(f"Created dummy test directories: {TEST_DIR}")


# Data augmentation and normalization for training data
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Only normalization for test data
test_datagen = ImageDataGenerator(rescale=1./255)

# Load images from directories
train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False # Important for evaluation metrics
)

# Get class names
class_names = list(train_generator.class_indices.keys())
print(f"Class names: {class_names}")

Found 19998 images belonging to 2 classes.
Found 5000 images belonging to 2 classes.
Class names: ['cats', 'dogs']


In [3]:
# ## 3. Model Creation

# Define a simple Convolutional Neural Network (CNN) model.

def create_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

model = create_model((IMG_HEIGHT, IMG_WIDTH, 3), NUM_CLASSES)
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-08-02 11:32:40.568129: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [4]:
# ## 4. Model Training

# Train the model using the `train_generator`.

EPOCHS = 10 # You might need more epochs for a real dataset

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=test_generator
)

  self._warn_if_super_not_called()


Epoch 1/10


ValueError: Arguments `target` and `output` must have the same rank (ndim). Received: target.shape=(None,), output.shape=(None, 2)

In [None]:
# ## 5. Model Testing and Evaluation

# Evaluate the model on the test set and display various metrics.

print("\n--- Model Evaluation ---")
loss, accuracy = model.evaluate(test_generator)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

# Get predictions for the test set
Y_pred = model.predict(test_generator)

# Convert predictions to class labels
if NUM_CLASSES > 2:
    y_pred_classes = np.argmax(Y_pred, axis=1)
    y_true_classes = test_generator.classes
else:
    y_pred_classes = (Y_pred > 0.5).astype(int).flatten()
    y_true_classes = test_generator.classes

# Get true labels
true_labels = test_generator.classes
# Map true labels to class names for report
true_labels_mapped = [class_names[label] for label in true_labels]
predicted_labels_mapped = [class_names[label] for label in y_pred_classes]


# Classification Report
print("\n--- Classification Report ---")
print(classification_report(true_labels_mapped, predicted_labels_mapped, target_names=class_names))

# Confusion Matrix
print("\n--- Confusion Matrix ---")
cm = confusion_matrix(true_labels_mapped, predicted_labels_mapped, labels=class_names)
print(cm)

# Plotting training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# ## 6. Model Saving

# Save the trained model for later use in the API.
MODEL_PATH = '../models/image_classifier_model.tf'
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
model.save(MODEL_PATH)
print(f"\nModel saved to: {MODEL_PATH}")

In [5]:
# ## 7. Example Prediction (using the saved model)

# Load the model and make a prediction on a dummy image.
loaded_model = load_model(MODEL_PATH)
print(f"\nModel loaded from: {MODEL_PATH}")

# Create a dummy image for prediction
dummy_image = np.random.rand(1, IMG_HEIGHT, IMG_WIDTH, 3).astype(np.float32)
dummy_prediction = loaded_model.predict(dummy_image)

print(f"Dummy image prediction raw output: {dummy_prediction}")

if NUM_CLASSES > 2:
    predicted_class_index = np.argmax(dummy_prediction, axis=1)[0]
else:
    predicted_class_index = (dummy_prediction > 0.5).astype(int)[0][0]

predicted_class_name = class_names[predicted_class_index]
print(f"Predicted class for dummy image: {predicted_class_name}")

# Clean up dummy data directories (optional)
# shutil.rmtree(TRAIN_DIR, ignore_errors=True)
# shutil.rmtree(TEST_DIR, ignore_errors=True)
# print("Cleaned up dummy data directories.")

NameError: name 'MODEL_PATH' is not defined