In [1]:
import os
import numpy as np
import xml.etree.ElementTree as ET
from PIL import Image
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import VGG16
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import selectivesearch
import cv2

In [None]:
# Function for selective search
def selective_search(image):
    _, regions = selectivesearch.selective_search(image, scale=500, sigma=0.9, min_size=10)
    candidates = set()
    for r in regions:
        if r['rect'] in candidates:
            continue
        if r['size'] < 2000:
            continue
        x, y, w, h = r['rect']
        if w == 0 or h == 0:
            continue
        candidates.add(r['rect'])
    return candidates

# Custom Dataset Class
class FastRCNNDataset(tf.keras.utils.Sequence):
    def __init__(self, image_dir, annot_dir, batch_size=1, input_size=(224, 224), shuffle=True):
        self.image_dir = image_dir
        self.annot_dir = annot_dir
        self.batch_size = batch_size
        self.input_size = input_size
        self.shuffle = shuffle
        self.image_paths, self.labels = self.load_dataset()
        self.on_epoch_end()

    def load_dataset(self):
        image_paths = []
        labels = []
        for annot_file in os.listdir(self.annot_dir):
            if annot_file.endswith('.xml'):
                tree = ET.parse(os.path.join(self.annot_dir, annot_file))
                root = tree.getroot()
                filename = root.find('filename').text
                image_path = os.path.join(self.image_dir, filename)
                image_paths.append(image_path)
                for obj in root.findall('object'):
                    label = obj.find('name').text
                    bndbox = obj.find('bndbox')
                    xmin = round(float(bndbox.find('xmin').text))
                    ymin = round(float(bndbox.find('ymin').text))
                    xmax = round(float(bndbox.find('xmax').text))
                    ymax = round(float(bndbox.find('ymax').text))
                    labels.append(1 if label == 'Motorcycle' else 0)  # Binary labels
        return image_paths, labels

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        image_paths = self.image_paths[index * self.batch_size:(index + 1) * self.batch_size]
        batch_images = []
        batch_labels = []
        for img_path, label in zip(image_paths, self.labels[index * self.batch_size:(index + 1) * self.batch_size]):
            image = np.array(Image.open(img_path).resize(self.input_size)) / 255.0
            batch_images.append(image)
            batch_labels.append(label)
        return np.array(batch_images), np.array(batch_labels)

    def on_epoch_end(self):
        if self.shuffle:
            temp = list(zip(self.image_paths, self.labels))
            np.random.shuffle(temp)
            self.image_paths, self.labels = zip(*temp)

# Build the Fast R-CNN Model
def build_fast_rcnn_model(input_shape):
    vgg = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in vgg.layers:
        layer.trainable = False

    input_image = Input(shape=input_shape)
    features = vgg(input_image)
    x = Flatten()(features)
    x = Dense(4096, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(4096, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(2, activation='softmax')(x)  # 2 classes: object/no-object

    model = Model(inputs=input_image, outputs=output)
    return model

input_shape = (224, 224, 3)
fast_rcnn_model = build_fast_rcnn_model(input_shape)
fast_rcnn_model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
fast_rcnn_model.summary()

# Directories
train_image_dir = 'D:/Users HP/Downloads/cnn/train/Images'
train_annot_dir = 'D:/Users HP/Downloads/cnn/train/Annotations'
val_image_dir = 'D:/Users HP/Downloads/cnn/val/Images'
val_annot_dir = 'D:/Users HP/Downloads/cnn/val/Annotations'
test_image_dir = 'D:/Users HP/Downloads/cnn/test/Images'
test_annot_dir = 'D:/Users HP/Downloads/cnn/test/Annotations'

# Data Generators
train_generator = FastRCNNDataset(train_image_dir, train_annot_dir, batch_size=1, input_size=(224, 224), shuffle=True)
val_generator = FastRCNNDataset(val_image_dir, val_annot_dir, batch_size=1, input_size=(224, 224), shuffle=False)
test_generator = FastRCNNDataset(test_image_dir, test_annot_dir, batch_size=1, input_size=(224, 224), shuffle=False)

# Train the Fast R-CNN Model
epochs = 10
history = fast_rcnn_model.fit(train_generator, validation_data=val_generator, epochs=epochs)

# Evaluate the Fast R-CNN Model
def evaluate_fast_rcnn_model(model, generator):
    predictions = model.predict(generator)
    y_true = np.concatenate([generator[i][1] for i in range(len(generator))])
    y_pred = np.argmax(predictions, axis=1)
    cm = confusion_matrix(y_true, y_pred)
    cr = classification_report(y_true, y_pred, target_names=['Car', 'Motorcycle'])
    return cm, cr

cm, cr = evaluate_fast_rcnn_model(fast_rcnn_model, test_generator)

print("Confusion Matrix:\n", cm)
print("Classification Report:\n", cr)

# Plot training & validation accuracy values
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()

Epoch 1/10


  self._warn_if_super_not_called()


[1m698/698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m855s[0m 1s/step - accuracy: 0.4692 - loss: 34.0238 - val_accuracy: 0.5943 - val_loss: 0.6769
Epoch 2/10
[1m581/698[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m2:49[0m 1s/step - accuracy: 0.5957 - loss: 1.2649

In [None]:
# Function to preprocess image and regions
def preprocess_regions(image, regions, input_size):
    region_images = []
    region_coords = []
    for (x, y, w, h) in regions:
        region = image[y:y+h, x:x+w]
        region = cv2.resize(region, input_size)
        region = region / 255.0
        region_images.append(region)
        region_coords.append((x, y, w, h))
    return np.array(region_images), region_coords

# Function to annotate image
def annotate_image(image, regions, labels, scores):
    annotated_image = image.copy()
    for (x, y, w, h), label, score in zip(regions, labels, scores):
        if label == 1 and score > 0.5:  # Assuming 1 is the label for object of interest
            cv2.rectangle(annotated_image, (x, y), (x+w, y+h), (255, 0, 0), 2)
            cv2.putText(annotated_image, f'{label} {score:.2f}', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
    return annotated_image

# Load and preprocess input image
input_image_path = 'D:/Users HP/Downloads/cnn/JPEGImages/frame_01158.jpg'
input_image = np.array(Image.open(input_image_path))
regions = selective_search(input_image)
preprocessed_regions, region_coords = preprocess_regions(input_image, regions, (224, 224))

# Predict on regions
predictions = model.predict(preprocessed_regions)
labels = np.argmax(predictions, axis=1)
scores = np.max(predictions, axis=1)

# Annotate image
annotated_image = annotate_image(input_image, region_coords, labels, scores)

# Display the annotated image
plt.figure(figsize=(10, 10))
plt.imshow(annotated_image)
plt.axis('off')
plt.show()