In [None]:
import os
import shutil
import xml.etree.ElementTree as ET
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Paths
dataset_path = r'C:\Users\ACER\Downloads\pascal_voc_dataset'
voc2012_annotations = os.path.join(dataset_path, 'VOCdevkit', 'VOC2012', 'Annotations')
voc2012_images = os.path.join(dataset_path, 'VOCdevkit', 'VOC2012', 'JPEGImages')
train_dir = os.path.join(dataset_path, 'train')
val_dir = os.path.join(dataset_path, 'val')

# Organize images into train and validation directories
def organize_images(annotation_dir, image_dir, output_dir):
    for file in os.listdir(annotation_dir):
        if file.endswith('.xml'):
            tree = ET.parse(os.path.join(annotation_dir, file))
            root = tree.getroot()
            image_filename = root.find('filename').text
            for obj in root.findall('object'):
                class_name = obj.find('name').text
                class_dir = os.path.join(output_dir, class_name)
                os.makedirs(class_dir, exist_ok=True)
                image_path = os.path.join(image_dir, image_filename)
                if os.path.exists(image_path):
                    shutil.copy(image_path, class_dir)

# Organize train and validation images
organize_images(voc2012_annotations, voc2012_images, train_dir)
organize_images(voc2012_annotations, voc2012_images, val_dir)

# Data Generator
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(train_dir, target_size=(64, 64), batch_size=128, class_mode='categorical')
val_generator = val_datagen.flow_from_directory(val_dir, target_size=(64, 64), batch_size=128, class_mode='categorical')

# Model Architecture
model = Sequential([
    Input(shape=(64, 64, 3)),
    Conv2D(32, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(20, activation='softmax') 
])

# Compile Model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
lr_schedule = LearningRateScheduler(lambda epoch: 1e-3 * 10 ** (epoch / 20))

# Train Model with EarlyStopping and 5 epochs
history = model.fit(train_generator, epochs=5, validation_data=val_generator, callbacks=[early_stopping, lr_schedule])

# Evaluate Model
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Save Model
model.save('optimized_pascal_voc_cnn_model.keras')

# Real-Time Detection with Webcam
# Load the trained model
model = tf.keras.models.load_model('optimized_pascal_voc_cnn_model.keras')

# Class labels for Pascal VOC dataset
class_labels = [
    'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
    'bus', 'car', 'cat', 'chair', 'cow',
    'diningtable', 'dog', 'horse', 'motorbike', 'person',
    'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]

# Open the webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

frame_count = 0
fgbg = cv2.createBackgroundSubtractorMOG2()  # Background subtraction for motion detection
threshold = 0.2  # Reduced confidence threshold to increase detection

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame.")
        break

    # Apply background subtraction to get the foreground mask
    fgmask = fgbg.apply(frame)

    # Find contours to check for motion or objects
    contours, _ = cv2.findContours(fgmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Skip frame if no contours are found
    if contours:
        # Preprocess the frame for prediction
        img = cv2.resize(frame, (64, 64))
        img = np.expand_dims(img, axis=0)
        img = img / 255.0

        # Get predictions from the model
        predictions = model.predict(img, verbose=0)

        # Print predictions for debugging
        print(predictions)

        # Only classify if the confidence is above the threshold
        if np.max(predictions) > threshold:
            class_idx = np.argmax(predictions, axis=1)[0]
            label = class_labels[class_idx]
        else:
            label = "No object detected"  # Show if confidence is below threshold

        # Display the label on the frame
        cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

    # Show the frame with the label
    cv2.namedWindow('Webcam', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('Webcam', 640, 480)
    cv2.imshow('Webcam', frame)

    # Exit on pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
