# Prototype Drone Classification Model (Resnet)
#### John D. Valencia & Max Blumenfeld - Senior Projects 2024

##### Importing Libraries

First, we'll import all the necessary libraries required for data handling, model building, training, and evaluation.

In [1]:
# Suppress TensorFlow warnings for cleaner output
import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import datetime
import logging

# Set logging level to ERROR to minimize warnings
logging.getLogger('tensorflow').setLevel(logging.ERROR)

# Verify TensorFlow is using the GPU (Apple Silicon)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  0


### Training Parameters


In [2]:
# Parameters
DATA_DIR = 'synthetic_data_split'  # Update if different
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 10
MODEL_NAME = 'ResNet50'
CLASS_NAMES = ['not_drone', 'drone']  # Ensure this matches your directory structure

# Verify data directory exists
assert os.path.isdir(DATA_DIR), f"Data directory {DATA_DIR} does not exist."


### Loading and Preprocessing the Data

Load the training, validation, and test datasets using image_dataset_from_directory. We'll also apply caching, shuffling, and prefetching for optimized data pipeline performance.

In [3]:
def load_datasets(data_dir, image_size=(224, 224), batch_size=32):
    """
    Loads training, validation, and test datasets from the specified directory.
    """
    train_dir = os.path.join(data_dir, 'train')
    val_dir = os.path.join(data_dir, 'val')
    test_dir = os.path.join(data_dir, 'test')
    
    print("Loading training dataset from:", train_dir)
    train_ds = tf.keras.preprocessing.image_dataset_from_directory(
        train_dir,
        labels='inferred',
        label_mode='binary',
        batch_size=batch_size,
        image_size=image_size,
        shuffle=True,
        seed=123
    )
    
    print("Loading validation dataset from:", val_dir)
    val_ds = tf.keras.preprocessing.image_dataset_from_directory(
        val_dir,
        labels='inferred',
        label_mode='binary',
        batch_size=batch_size,
        image_size=image_size,
        shuffle=True,
        seed=123
    )
    
    print("Loading test dataset from:", test_dir)
    test_ds = tf.keras.preprocessing.image_dataset_from_directory(
        test_dir,
        labels='inferred',
        label_mode='binary',
        batch_size=batch_size,
        image_size=image_size,
        shuffle=False
    )
    
    return train_ds, val_ds, test_ds

def configure_datasets(train_ds, val_ds, test_ds, buffer_size=1000, AUTOTUNE=tf.data.AUTOTUNE):
    """
    Configures datasets for performance.
    """
    train_ds = train_ds.cache().shuffle(buffer_size).prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
    test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)
    
    return train_ds, val_ds, test_ds

# Load the datasets
train_ds, val_ds, test_ds = load_datasets(DATA_DIR, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE)

# **Retrieve class names before configuring the datasets**
class_names = train_ds.class_names
print("Class names:", class_names)

# Configure the datasets for performance
train_ds, val_ds, test_ds = configure_datasets(train_ds, val_ds, test_ds)


Loading training dataset from: synthetic_data_split/train
Found 1451 files belonging to 2 classes.
Loading validation dataset from: synthetic_data_split/val
Found 361 files belonging to 2 classes.
Loading test dataset from: synthetic_data_split/test
Found 366 files belonging to 2 classes.
Class names: ['drone', 'not_drone']


### Building the ResNet50 Model

Construct the ResNet50-based model for binary classification.

In [4]:
def build_resnet_model(input_shape=(224, 224, 3)):
    """
    Builds and compiles a ResNet50-based model for binary classification.
    """
    # Load the ResNet50 model without the top classification layer
    base_model = keras.applications.ResNet50(
        weights='imagenet',
        include_top=False,
        input_shape=input_shape
    )
    
    # Freeze the base model to prevent its weights from being updated during initial training
    base_model.trainable = False
    
    data_augmentation = keras.Sequential([
        layers.RandomFlip("horizontal_and_vertical"),
        layers.RandomRotation(0.2),
        layers.RandomZoom(0.2),
        layers.RandomContrast(0.2),
        layers.RandomBrightness(0.2),
    ])
    
    # Input layer
    inputs = keras.Input(shape=input_shape)
    x = data_augmentation(inputs)
    
    # Preprocessing for ResNet50
    x = keras.applications.resnet50.preprocess_input(x)
    
    # Pass through the base model
    x = base_model(x, training=False)
    
    # Global Average Pooling
    x = layers.GlobalAveragePooling2D()(x)
    
    # Dropout for regularization
    x = layers.Dropout(0.4  )(x)
    
    # Output layer for binary classification
    outputs = layers.Dense(1, activation='sigmoid')(x)
   
    # Define the model
    model = keras.Model(inputs, outputs)
    
    # Compile the model
    model.compile(
        optimizer=keras.optimizers.Adam(),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Build the ResNet50 model
model = build_resnet_model(input_shape=IMAGE_SIZE + (3,))
model.summary()


### Define Training Callbacks

In [5]:
# Set up callbacks
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=3,
        restore_best_weights=True
    ),
    keras.callbacks.ModelCheckpoint(
        filepath='../models/resnet50_best_model.keras', 
        monitor='val_loss',
        save_best_only=True,
        mode='min'
    )
]


### Training the Model

Train the ResNet50 model using the training dataset and validate on the validation dataset.

In [6]:
# # Train the model
# history = model.fit(
#     train_ds,
#     validation_data=val_ds,
#     epochs=EPOCHS,
#     callbacks=callbacks
# )

### Visualizing Training History

In [7]:
# def plot_training_history(history, model_name, dataset_type):
#     """
#     Plots training and validation accuracy and loss.
#     """
#     acc = history.history['accuracy']
#     val_acc = history.history['val_accuracy']
    
#     loss = history.history['loss']
#     val_loss = history.history['val_loss']
    
#     epochs_range = range(len(acc))
    
#     plt.figure(figsize=(14, 6))
    
#     # Accuracy Plot
#     plt.subplot(1, 2, 1)
#     plt.plot(epochs_range, acc, label='Training Accuracy')
#     plt.plot(epochs_range, val_acc, label='Validation Accuracy')
#     plt.legend(loc='lower right')
#     plt.title(f'{model_name} - {dataset_type} - Training and Validation Accuracy')
    
#     # Loss Plot
#     plt.subplot(1, 2, 2)
#     plt.plot(epochs_range, loss, label='Training Loss')
#     plt.plot(epochs_range, val_loss, label='Validation Loss')
#     plt.legend(loc='upper right')
#     plt.title(f'{model_name} - {dataset_type} - Training and Validation Loss')
    
#     plt.show()

# # Plot training history
# plot_training_history(history, MODEL_NAME, 'Real_Data')


### Evaluating the Model

In [8]:
# def evaluate_model(model, test_ds, class_names):
#     """
#     Evaluates the model on the test dataset and prints classification metrics.
#     """
#     # Get predictions and true labels
#     y_true = []
#     y_pred = []
    
#     for images, labels in test_ds:
#         preds = model.predict(images)
#         y_true.extend(labels.numpy())
#         y_pred.extend((preds > 0.5).astype(int).flatten())
    
#     # Classification Report
#     print("Classification Report:")
#     print(classification_report(y_true, y_pred, target_names=class_names))
    
#     # Confusion Matrix
#     cm = confusion_matrix(y_true, y_pred)
#     print("Confusion Matrix:")
#     print(cm)
    
#     # Plot Confusion Matrix
#     plt.figure(figsize=(6, 6))
#     plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
#     plt.title("Confusion Matrix")
#     plt.colorbar()
#     tick_marks = np.arange(len(class_names))
#     plt.xticks(tick_marks, class_names, rotation=45)
#     plt.yticks(tick_marks, class_names)
    
#     # Normalize the confusion matrix
#     cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
#     # Add text annotations
#     thresh = cm.max() / 2.
#     for i in range(cm.shape[0]):
#         for j in range(cm.shape[1]):
#             plt.text(j, i, f"{cm[i, j]} ({cm_normalized[i, j]:.2f})",
#                      horizontalalignment="center",
#                      color="white" if cm[i, j] > thresh else "black")
    
#     plt.tight_layout()
#     plt.ylabel('True label')
#     plt.xlabel('Predicted label')
#     plt.show()

# # Evaluate the model
# evaluate_model(model, test_ds, class_names)


# Repeat Trials

### Trials

In [9]:
def run_single_trial():
    """Runs a single training trial and returns the metrics"""
    
    # Clear session and memory
    tf.keras.backend.clear_session()
    
    # Load fresh datasets
    train_ds, val_ds, test_ds = load_datasets(DATA_DIR, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE)
    train_ds, val_ds, test_ds = configure_datasets(train_ds, val_ds, test_ds)
    
    # Build and train model
    model = build_resnet_model(input_shape=IMAGE_SIZE + (3,))
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=EPOCHS,
        callbacks=callbacks
    )
    
    # Create fresh test dataset for evaluation
    fresh_test_ds = tf.keras.preprocessing.image_dataset_from_directory(
        os.path.join(DATA_DIR, 'test'),
        labels='inferred',
        label_mode='binary',
        shuffle=False,
        batch_size=BATCH_SIZE,
        image_size=IMAGE_SIZE
    )
    
    # Evaluate using fresh dataset
    y_true = []
    y_pred = []
    
    for images, labels in fresh_test_ds:
        preds = model.predict(images, verbose=0)  # Set verbose=0 to reduce output
        y_true.extend(labels.numpy())
        y_pred.extend((preds > 0.5).astype(int).flatten())
    
    # Calculate metrics
    cm = confusion_matrix(y_true, y_pred)
    report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
    
    return {
        'history': history.history,
        'confusion_matrix': cm,
        'accuracy': report['accuracy'],
        'precision': report['weighted avg']['precision'],
        'recall': report['weighted avg']['recall'],
        'f1': report['weighted avg']['f1-score']
    }

In [10]:
run_single_trial()

Loading training dataset from: synthetic_data_split/train
Found 1451 files belonging to 2 classes.
Loading validation dataset from: synthetic_data_split/val
Found 361 files belonging to 2 classes.
Loading test dataset from: synthetic_data_split/test
Found 366 files belonging to 2 classes.
Epoch 1/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 657ms/step - accuracy: 0.6619 - loss: 0.6712 - val_accuracy: 0.9474 - val_loss: 0.1683
Epoch 2/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 641ms/step - accuracy: 0.9179 - loss: 0.2121 - val_accuracy: 0.9917 - val_loss: 0.0853
Epoch 3/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 631ms/step - accuracy: 0.9498 - loss: 0.1508 - val_accuracy: 0.9806 - val_loss: 0.0860
Epoch 4/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 630ms/step - accuracy: 0.9453 - loss: 0.1447 - val_accuracy: 0.9945 - val_loss: 0.0554
Epoch 5/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

2024-11-29 12:03:10.272221: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


{'history': {'accuracy': [0.7780840992927551,
   0.9221226572990417,
   0.9510682225227356,
   0.9510682225227356,
   0.9600275754928589,
   0.9627842903137207,
   0.9634734392166138,
   0.9614059329032898,
   0.9641626477241516,
   0.9662302136421204],
  'loss': [0.4720219671726227,
   0.20292912423610687,
   0.14496254920959473,
   0.1320537030696869,
   0.1174304187297821,
   0.10140539705753326,
   0.09988231211900711,
   0.09737716615200043,
   0.0925494134426117,
   0.08694682270288467],
  'val_accuracy': [0.9473684430122375,
   0.9916897416114807,
   0.9806094169616699,
   0.9944598078727722,
   0.9944598078727722,
   0.9972299337387085,
   0.9972299337387085,
   0.9944598078727722,
   0.9944598078727722,
   0.9972299337387085],
  'val_loss': [0.1683012843132019,
   0.08534508943557739,
   0.08595381677150726,
   0.055442873388528824,
   0.04778416082262993,
   0.029907332733273506,
   0.02890375256538391,
   0.02695157565176487,
   0.04171917587518692,
   0.026214782148599625]}

### Saving the Model

In [None]:
# Save the model in TensorFlow's SavedModel format
model.save('../models/resnet50_model_2.keras')
print("Model training and evaluation completed. Model saved to 'models/resnet50_model_2'.")