In [2]:
import os
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import numpy as np
import collections

### Step 1: Load Pretrained Model

In [2]:
# Load Pretrained Model
base_model = MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
base_model.trainable = False  # Freeze the base model initially

# Add task-specific layers
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(4, activation='softmax')  # Replace 4 with the number of your classes
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

### Step 2: Prepare Your Dataset

In [3]:
# Set the base directory
base_dir = os.getcwd()  # Get the current working directory

# Construct the path to the dataset
dataset_dir = os.path.join(base_dir, 'dataset')

# Construct the path to the training directory
train_dir = os.path.join(dataset_dir, 'train')

# Construct the path to the validation directory
val_dir = os.path.join(dataset_dir, 'val')

# Data augmentation for training
data_augmentation = ImageDataGenerator(
    rescale=1.0 / 255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)

# No augmentation for validation, just rescaling
val_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

# Prepare generators
train_generator = data_augmentation.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical"
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical"
)

Found 3919 images belonging to 4 classes.
Found 395 images belonging to 4 classes.


### Step 3: Train the Model

In [6]:
# Train the model with early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the new layers initially using train_generator and val_generator
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=15,
    callbacks=[early_stopping],
    verbose=1
)

# Fine-tune the entire model
base_model.trainable = True  # Unfreeze the base layers

# Recompile with a lower learning rate
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the entire model
history_fine = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=15,
    callbacks=[early_stopping],
    verbose=1
)

# Save the trained model
model.save('best_model.keras')


Epoch 1/15
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 454ms/step - accuracy: 0.8113 - loss: 0.5115 - val_accuracy: 0.8430 - val_loss: 0.4426
Epoch 2/15
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 480ms/step - accuracy: 0.8135 - loss: 0.4966 - val_accuracy: 0.8557 - val_loss: 0.4614
Epoch 3/15
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 538ms/step - accuracy: 0.8289 - loss: 0.4476 - val_accuracy: 0.8633 - val_loss: 0.4555
Epoch 4/15
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 503ms/step - accuracy: 0.8396 - loss: 0.4298 - val_accuracy: 0.8886 - val_loss: 0.4444
Epoch 5/15
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 487ms/step - accuracy: 0.8463 - loss: 0.4073 - val_accuracy: 0.8785 - val_loss: 0.4193
Epoch 6/15
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 497ms/step - accuracy: 0.8596 - loss: 0.3845 - val_accuracy: 0.8759 - val_loss: 0.4335
Epoch 7/15

### Step 4: Real-Time Webcam Prediction

In [4]:
# Load the saved model
model = tf.keras.models.load_model('best_fine_tuned_model.keras')

# Class labels for predictions
class_labels = ['Headtop', 'Helmet', 'Hoodie', 'No headwear']

# Open the webcam
cap = cv2.VideoCapture(0)  # 0 is the default camera

# Define the target size for the images
target_size = (224, 224)

# Initialize a buffer for smoothing predictions
predictions_buffer = collections.deque(maxlen=10)

print("Press 'q' to exit the video feed.")

try:
    while True:
        # Capture frame-by-frame
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture video frame. Exiting...")
            break

        # Preprocess the frame
        resized_frame = cv2.resize(frame, target_size)  # Resize to match model input
        img_array = np.expand_dims(resized_frame, axis=0) / 255.0  # Normalize and add batch dimension

        # Make prediction
        predictions = model.predict(img_array, verbose=0)
        class_index = np.argmax(predictions[0])
        prediction_label = class_labels[class_index]
        confidence = predictions[0][class_index] * 100

        # Add prediction to the buffer
        predictions_buffer.append(class_index)

        # Smooth predictions using majority voting
        smoothed_prediction = max(set(predictions_buffer), key=predictions_buffer.count)
        smoothed_label = class_labels[smoothed_prediction]

        # Display the prediction on the frame
        cv2.putText(frame, f"{smoothed_label} ({confidence:.2f}%)", 
                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (75, 75, 75), 2)

        # Show the frame
        cv2.imshow('Hat Detection', frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

except KeyboardInterrupt:
    print("\nProgram interrupted by the user. Exiting...")

finally:
    # Release the webcam and close windows
    cap.release()
    cv2.destroyAllWindows()
    print("Resources released, video window closed.")

Press 'q' to exit the video feed.
Resources released, video window closed.
