In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import pathlib
import random
from sklearn.metrics import confusion_matrix, classification_report



2025-04-26 15:03:15.674448: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-04-26 15:03:15.674513: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# SETTING HYPERPARAMETERS

In [2]:
# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

# Configuration parameters
BATCH_SIZE = 32
IMG_SIZE = 224
NUM_CLASSES = 2  # Binary classification (human or no human)
EPOCHS = 15
LEARNING_RATE = 0.001
VALIDATION_SPLIT = 0.2
DATA_DIR = "./human detection dataset"  # Path to your dataset


# DATASET HELPER FUNCTIONS

In [14]:
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.2),
    layers.RandomTranslation(0.1, 0.1),
])

def preprocess_image(image_path, label):
    """Load and preprocess a single image."""
    # Read image file
    img = tf.io.read_file(image_path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    
    # Convert image to float and normalize to [0, 1]
    img = tf.cast(img, tf.float32) / 255.0
    
    # Resize to target size
    img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
    
    # Ensure the image has 3 channels
    img = tf.ensure_shape(img, [IMG_SIZE, IMG_SIZE, 3])
    
    # One-hot encode the label
    label = tf.one_hot(label, NUM_CLASSES)
    
    return img, label

def create_dataset(image_paths, labels, is_training=False):
    """Create a TensorFlow dataset from image paths and labels."""
    # Create dataset from paths and labels
    ds = tf.data.Dataset.from_tensor_slices(image_paths, labels)
    
    # Load and preprocess images
    ds = ds.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
    
    # Apply data augmentation if training
    if is_training:
        ds = ds.map(lambda x, y: (data_augmentation(x), y), num_parallel_calls=tf.data.AUTOTUNE)
    
    # Batch and prefetch
    ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    
    return ds



class_0_dir = os.path.join(DATA_DIR, "0" )
class_1_dir = os.path.join(DATA_DIR, "1" ) 

# Count files in each class
class_0_files = os.listdir(class_0_dir)
class_1_files = os.listdir(class_1_dir)

print(f"Found {len(class_0_files)} images without humans")
print(f"Found {len(class_1_files)} images with humans")

# Create image paths and labels lists
image_paths = []
labels = []

# Add class 0 (no human) images
for file_path in class_0_files:
    image_paths.append(str(os.path.join(DATA_DIR, "0", file_path)))
    labels.append(0)

# Add class 1 (human) images
for file_path in class_1_files:
    image_paths.append(str(os.path.join(DATA_DIR, "1", file_path)))
    labels.append(1)

# Shuffle the data
temp = list(zip(image_paths, labels))
random.shuffle(temp)
image_paths, labels = zip(*temp)

# Convert labels to numpy arrays
labels = np.array(labels)

# Split into train and validation sets
total_size = len(image_paths)
val_size = int(total_size * VALIDATION_SPLIT)
train_size = total_size - val_size

train_paths = image_paths[:train_size]
train_labels = labels[:train_size]
val_paths = image_paths[train_size:]
val_labels = labels[train_size:]

print(f"Training set: {len(train_paths)} images")
print(f"Validation set: {len(val_paths)} images")

print(train_paths)
print(train_labels)

# Create TensorFlow datasets
train_ds = create_dataset(train_paths, train_labels, is_training=True)
val_ds = create_dataset(val_paths, val_labels, is_training=False)




Found 362 images without humans
Found 559 images with humans
Training set: 737 images
Validation set: 184 images
('./human detection dataset/1/310.png', './human detection dataset/1/240.png', './human detection dataset/1/167.png', './human detection dataset/1/260.png', './human detection dataset/1/356.png', './human detection dataset/1/151.png', './human detection dataset/1/262.png', './human detection dataset/1/177.png', './human detection dataset/0/224.png', './human detection dataset/0/49.png', './human detection dataset/1/536.png', './human detection dataset/1/352.png', './human detection dataset/1/542.png', './human detection dataset/0/290.png', './human detection dataset/0/65.png', './human detection dataset/1/385.png', './human detection dataset/1/76.png', './human detection dataset/0/98.png', './human detection dataset/0/72.png', './human detection dataset/1/538.png', './human detection dataset/1/215.png', './human detection dataset/1/443.png', './human detection dataset/1/312.

ValueError: Unbatching a tensor is only supported for rank >= 1

# BUILD MOBILENET V2 MODEL

In [None]:
def build_mobilenetv2_model():
    """Build model using TensorFlow's pre-implemented MobileNetV2."""
    print("Building MobileNetV2 model for human detection...")
    
    # Create the base model from the pre-trained MobileNetV2
    base_model = MobileNetV2(
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
        include_top=False,
        weights='imagenet'
    )
    
    # First, freeze the base model
    base_model.trainable = False
    
    # Create new model on top
    inputs = tf.keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(NUM_CLASSES, activation='softmax')(x)
    
    model = tf.keras.Model(inputs, outputs, name='MobileNetV2_HumanDetection')
    
    return model, base_model

def unfreeze_model(model, base_model):
    """Unfreeze the base model for fine-tuning."""
    # Unfreeze the base model
    base_model.trainable = True
    
    # Freeze the first 100 layers (early layers)
    for layer in base_model.layers[:100]:
        layer.trainable = False
    
    # Recompile the model with a lower learning rate
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model




# TRAIN THE MODEL

In [None]:
def train_model(model, train_ds, val_ds, base_model):
    """Train the model in two phases: feature extraction and fine-tuning."""
    print("Training the model - Phase 1: Feature Extraction...")
    
    # Compile the model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # Define callbacks for phase 1
    callbacks_phase1 = [
        tf.keras.callbacks.ModelCheckpoint(
            filepath='mobilenetv2_human_detection_phase1.h5',
            monitor='val_accuracy',
            save_best_only=True,
            verbose=1
        ),
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True,
            verbose=1
        )
    ]
    
    # Phase 1: Train with frozen base model (feature extraction)
    history1 = model.fit(
        train_ds,
        epochs=5,  # Fewer epochs for initial phase
        validation_data=val_ds,
        callbacks=callbacks_phase1
    )
    
    print("\nTraining the model - Phase 2: Fine-tuning...")
    
    # Phase 2: Fine-tuning - unfreeze the model and train with a lower learning rate
    model = unfreeze_model(model, base_model)
    
    # Define callbacks for phase 2
    callbacks_phase2 = [
        tf.keras.callbacks.ModelCheckpoint(
            filepath='mobilenetv2_human_detection_best.h5',
            monitor='val_accuracy',
            save_best_only=True,
            verbose=1
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=3,
            min_lr=1e-7,
            verbose=1
        ),
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True,
            verbose=1
        )
    ]
    
    # Phase 2: Train with unfrozen base model (fine-tuning)
    history2 = model.fit(
        train_ds,
        epochs=EPOCHS-5,  # Remaining epochs for fine-tuning
        validation_data=val_ds,
        callbacks=callbacks_phase2
    )
    
    # Combine histories
    combined_history = {}
    for k in history1.history.keys():
        combined_history[k] = history1.history[k] + history2.history[k]
    
    return tf.keras.callbacks.History(), combined_history

In [None]:
def plot_training_history(history):
    """Plot the training and validation accuracy and loss."""
    print("Plotting training history...")
    
    plt.figure(figsize=(12, 4))
    
    # Plot training & validation accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history['accuracy'])
    plt.plot(history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='lower right')
    
    # Plot training & validation loss
    plt.subplot(1, 2, 2)
    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper right')
    
    plt.tight_layout()
    plt.savefig('mobilenetv2_human_detection_training_history.png')
    plt.show()

def visualize_activations(model, val_ds):
    """Visualize intermediate activations to see what the model focuses on."""
    # Get an image from the validation set
    for images, labels in val_ds.take(1):
        sample_image = images[0]
        sample_label = np.argmax(labels[0])
        break
    
    # Create a modified model that outputs intermediate activations
    layer_outputs = [layer.output for layer in model.layers[1].layers[50:60]]  # Get some intermediate layer outputs
    activation_model = tf.keras.Model(inputs=model.layers[1].input, outputs=layer_outputs)
    
    # Get activations
    activations = activation_model(tf.expand_dims(sample_image, 0))
    
    # Display the original image
    plt.figure(figsize=(12, 12))
    plt.subplot(3, 4, 1)
    
    # Convert from preprocessing format back to display format
    display_image = sample_image.numpy()
    display_image = ((display_image * 0.5) + 0.5) * 255  # Reverse preprocessing
    display_image = np.clip(display_image, 0, 255).astype('uint8')
    
    plt.imshow(display_image)
    plt.title(f"Original Image (Class: {'Human' if sample_label == 1 else 'No Human'})")
    plt.axis('off')
    
    # Display some activation maps
    for i, activation in enumerate(activations[:9]):
        plt.subplot(3, 4, i+2)
        plt.imshow(activation[0, :, :, 0], cmap='viridis')
        plt.title(f"Activation {i+1}")
        plt.axis('off')
    
    plt.tight_layout()
    plt.savefig('mobilenetv2_human_detection_activations.png')
    plt.show()

def evaluate_model(model, val_ds):
    """Evaluate the model on validation data."""
    print("Evaluating the model...")
    
    # Evaluate on the validation set
    val_loss, val_accuracy = model.evaluate(val_ds)
    print(f"Validation accuracy: {val_accuracy:.4f}")
    print(f"Validation loss: {val_loss:.4f}")
    
    # Get a batch of validation data
    all_images = []
    all_labels = []
    all_predictions = []
    
    # Collect predictions on the validation set
    for images, labels in val_ds:
        predictions = model.predict(images)
        all_images.extend(images.numpy())
        all_labels.extend(np.argmax(labels.numpy(), axis=1))
        all_predictions.extend(np.argmax(predictions, axis=1))
    
    # Convert lists to numpy arrays
    all_images = np.array(all_images)
    all_labels = np.array(all_labels)
    all_predictions = np.array(all_predictions)
    
    # Plot some predictions
    plt.figure(figsize=(10, 10))
    for i in range(min(9, len(all_images))):
        plt.subplot(3, 3, i+1)
        
        # Convert from preprocessing format back to display format
        display_image = all_images[i]
        display_image = ((display_image * 0.5) + 0.5) * 255  # Reverse preprocessing
        display_image = np.clip(display_image, 0, 255).astype('uint8')
        
        plt.imshow(display_image)
        color = 'green' if all_predictions[i] == all_labels[i] else 'red'
        title = f"Pred: {'Human' if all_predictions[i] == 1 else 'No Human'}\nTrue: {'Human' if all_labels[i] == 1 else 'No Human'}"
        plt.title(title, color=color)
        plt.axis('off')
    
    plt.tight_layout()
    plt.savefig('mobilenetv2_human_detection_predictions.png')
    plt.show()
    
    # Calculate confusion matrix
    cm = confusion_matrix(all_labels, all_predictions)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(NUM_CLASSES)
    plt.xticks(tick_marks, ['No Human', 'Human'], rotation=45)
    plt.yticks(tick_marks, ['No Human', 'Human'])
    
    # Add text annotations to confusion matrix
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, format(cm[i, j], 'd'),
                    horizontalalignment="center",
                    color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig('mobilenetv2_human_detection_confusion_matrix.png')
    plt.show()
    
    # Print classification report
    print("\nClassification Report:")
    print(classification_report(all_labels, all_predictions, target_names=['No Human', 'Human']))


In [None]:
def save_model_for_deployment(model):
    """Save the model for deployment."""
    print("Saving model for deployment...")
    
    # Save the entire model
    model.save('mobilenetv2_human_detection_model')
    
    # Save as TensorFlow Lite model
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    tflite_model = converter.convert()
    
    with open('mobilenetv2_human_detection_model.tflite', 'wb') as f:
        f.write(tflite_model)
    
    # Create TFLite model with quantization for smaller size
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.target_spec.supported_types = [tf.float16]
    tflite_fp16_model = converter.convert()
    
    with open('mobilenetv2_human_detection_model_quantized.tflite', 'wb') as f:
        f.write(tflite_fp16_model)
    
    print("Model saved for deployment.")

def create_inference_function(model):
    """Create a function for easy inference on new images."""
    def predict_human_in_image(image_path):
        # Read image file
        img = tf.io.read_file(image_path)
        img = tf.image.decode_image(img, channels=3, expand_animations=False)
        
        # Resize to target size
        img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
        
        # Apply MobileNetV2 preprocessing
        img = preprocess_input(img)
        
        # Add batch dimension
        img = tf.expand_dims(img, 0)
        
        # Make prediction
        prediction = model.predict(img)
        predicted_class = np.argmax(prediction, axis=1)[0]
        probability = prediction[0][predicted_class] * 100
        
        # Create result dictionary
        result = {
            'prediction': 'Human' if predicted_class == 1 else 'No Human',
            'probability': float(probability),
            'is_human': bool(predicted_class == 1)
        }
        
        # Display the image with prediction
        plt.figure(figsize=(6, 6))
        
        # Convert from preprocessing format back to display format
        display_img = img[0].numpy()
        display_img = ((display_img * 0.5) + 0.5) * 255  # Reverse preprocessing
        display_img = np.clip(display_img, 0, 255).astype('uint8')
        
        plt.imshow(display_img)
        plt.title(f"Prediction: {result['prediction']} ({result['probability']:.2f}%)")
        plt.axis('off')
        plt.show()
        
        return result
    
    return predict_human_in_image

def main():
    """Main function to run the complete pipeline."""
    # Check for GPU
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        print(f"Available GPUs: {len(gpus)}")
        # Limit memory growth to avoid OOM errors
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    else:
        print("No GPU found, running on CPU")
    
    # Load and prepare data
    train_ds, val_ds = load_and_prepare_data()
    
    # Build model
    model, base_model = build_mobilenetv2_model()
    model.summary()
    
    # Train model
    history_obj, history_dict = train_model(model, train_ds, val_ds, base_model)
    
    # Plot training history
    plot_training_history(history_dict)
    
    # Visualize activations
    visualize_activations(model, val_ds)
    
    # Evaluate model
    evaluate_model(model, val_ds)
    
    # Save model for deployment
    save_model_for_deployment(model)
    
    # Create and test inference function
    predict_human = create_inference_function(model)
    
    print("\nModel training and evaluation complete!")
    print("You can now use the model for inference on new images.")
    print("Example usage:")
    print("predict_human('path/to/your/image.jpg')")
    
    return predict_human

if __name__ == "__main__":
    predict_human = main()
    
    # Example of how to use the inference function (uncomment to test)
    # test_image = "./path/to/test/image.jpg"
    # if os.path.exists(test_image):
    #     result = predict_human(test_image)
    #     print(f"\nTest prediction on {test_image}:")
    #     print(f"Prediction: {result['prediction']}")
    #     print(f"Probability: {result['probability']:.2f}%")
    #     print(f"Is human present: {result['is_human']}")