In [4]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np

def load_data(data_path):
    images = []
    annotations = []
    
    # Assuming annotations are stored in text files with the same name as the images
    for file_name in os.listdir(data_path):
        if file_name.endswith('.jpg') or file_name.endswith('.png'):
            # Load the image
            image_path = os.path.join(data_path, file_name)
            image = load_img(image_path, target_size=(224, 224))
            image = img_to_array(image)
            images.append(image)
            
            # Load the corresponding annotation
            annotation_path = image_path.replace('.jpg', '.txt').replace('.png', '.txt')
            with open(annotation_path, 'r') as f:
                annotation = f.read().strip()
            annotations.append(annotation)
    
    return np.array(images), np.array(annotations)

# Load your data
train_images, train_annotations = load_data('D:/Users HP/Downloads/cnn/train/Annotations')
val_images, val_annotations = load_data('D:/Users HP/Downloads/cnn/val/Annotations')
test_images, test_annotations = load_data('D:/Users HP/Downloads/cnn/test/Annotations')

In [5]:
import tensorflow as tf

class ROIPoolingLayer(tf.keras.layers.Layer):
    def __init__(self, pooled_height, pooled_width, **kwargs):
        super(ROIPoolingLayer, self).__init__(**kwargs)
        self.pooled_height = pooled_height
        self.pooled_width = pooled_width

    def call(self, inputs):
        feature_map, rois = inputs
        box_indices = tf.range(tf.shape(rois)[0])
        boxes = rois / tf.constant([tf.shape(feature_map)[2], tf.shape(feature_map)[1], tf.shape(feature_map)[2], tf.shape(feature_map)[1]], dtype=tf.float32)
        boxes = tf.reshape(boxes, [-1, 4])
        box_indices = tf.tile(tf.expand_dims(box_indices, axis=-1), [1, tf.shape(rois)[1]])

        cropped_features = tf.image.crop_and_resize(
            feature_map,
            boxes,
            box_indices,
            crop_size=(self.pooled_height, self.pooled_width)
        )
        return cropped_features

    def compute_output_shape(self, input_shape):
        return input_shape[0], input_shape[1], self.pooled_height, self.pooled_width

    def get_config(self):
        config = super(ROIPoolingLayer, self).get_config()
        config.update({
            'pooled_height': self.pooled_height,
            'pooled_width': self.pooled_width
        })
        return config

In [7]:
from tensorflow.keras import layers, models
def faster_rcnn_model(input_shape=(224, 224, 3), num_classes=21):
    input_image = layers.Input(shape=input_shape)
    feature_map = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(input_image)
    rpn_conv = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(feature_map)
    rpn_cls_score = layers.Conv2D(2, (1, 1), activation='softmax')(rpn_conv)
    rpn_reg = layers.Conv2D(4, (1, 1))(rpn_conv)

    # Use the ROIPoolingLayer with proper inputs
    rois = ROIPoolingLayer(pooled_height=7, pooled_width=7)([feature_map, rpn_cls_score])

    fc1 = layers.TimeDistributed(layers.Dense(4096, activation='relu'))(rois)
    fc2 = layers.TimeDistributed(layers.Dense(4096, activation='relu'))(fc1)
    cls_output = layers.TimeDistributed(layers.Dense(num_classes, activation='softmax'))(fc2)
    reg_output = layers.TimeDistributed(layers.Dense(num_classes * 4))(fc2)

    model = tf.keras.Model(inputs=input_image, outputs=[cls_output, reg_output])
    return model

# Instantiate the model
faster_rcnn_model = faster_rcnn_model()

# Define the optimizer and loss
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

@tf.function
def train_step(images, targets):
    with tf.GradientTape() as tape:
        cls_score, bbox_pred = faster_rcnn_model(images, training=True)
        cls_loss = loss_fn(targets['classes'], cls_score)
        reg_loss = loss_fn(targets['boxes'], bbox_pred)
        total_loss = cls_loss + reg_loss
    gradients = tape.gradient(total_loss, faster_rcnn_model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, faster_rcnn_model.trainable_variables))
    return total_loss

# Training loop
num_epochs = 10  # Set the number of epochs

for epoch in range(num_epochs):
    for batch, (images, targets) in enumerate(zip(train_images, train_annotations)):
        image_tensor = tf.convert_to_tensor(images, dtype=tf.float32)
        targets_tensor = {
            'boxes': tf.convert_to_tensor(targets['boxes'], dtype=tf.float32),
            'classes': tf.convert_to_tensor(targets['classes'], dtype=tf.float32)
        }

        loss = train_step(image_tensor, targets_tensor)
        print(f"Epoch {epoch}, Batch {batch}, Loss: {loss.numpy()}")

ValueError: Exception encountered when calling ROIPoolingLayer.call().

[1mCannot map function to unknown object 7[0m

Arguments received by ROIPoolingLayer.call():
  • args=(['<KerasTensor shape=(None, 224, 224, 512), dtype=float32, sparse=False, name=keras_tensor_1>', '<KerasTensor shape=(None, 224, 224, 2), dtype=float32, sparse=False, name=keras_tensor_3>'],)
  • kwargs=<class 'inspect._empty'>

In [2]:
# Evaluation
def evaluate_model(model, val_images, val_annotations):
    for image, gt_boxes, gt_classes in zip(val_images, val_annotations['boxes'], val_annotations['classes']):
        input_tensor = tf.convert_to_tensor(image, dtype=tf.float32)
        input_tensor = input_tensor[tf.newaxis, ...]
        
        detections = model(input_tensor)
        
        for i in range(len(detections[0])):
            box = detections[1][0][i]
            class_id = int(detections[0][0][i])
            score = detections[1][0][i][4]
            if score > 0.5:
                xmin, ymin, xmax, ymax = box
                cv2.rectangle(image, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 2)
                cv2.putText(image, str(class_id), (int(xmin), int(ymin)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
        
        plt.figure(figsize=(12, 8))
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.show()

evaluate_model(faster_rcnn_model, val_images, val_annotations)

NameError: name 'faster_rcnn_model' is not defined

In [8]:
import os
import xml.etree.ElementTree as ET
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score

# Define the ROIPoolingLayer
class ROIPoolingLayer(tf.keras.layers.Layer):
    def __init__(self, pooled_height, pooled_width, **kwargs):
        super(ROIPoolingLayer, self).__init__(**kwargs)
        self.pooled_height = pooled_height
        self.pooled_width = pooled_width

    def call(self, inputs):
        feature_map, rois = inputs
        batch_size = tf.shape(rois)[0]
        box_indices = tf.range(batch_size)
        boxes = rois / tf.constant([tf.shape(feature_map)[2], tf.shape(feature_map)[1], tf.shape(feature_map)[2], tf.shape(feature_map)[1]], dtype=tf.float32)
        boxes = tf.reshape(boxes, [-1, 4])
        box_indices = tf.tile(tf.expand_dims(box_indices, axis=-1), [1, tf.shape(rois)[1]])
        box_indices = tf.reshape(box_indices, [-1])

        cropped_features = tf.image.crop_and_resize(
            feature_map,
            boxes,
            box_indices,
            crop_size=(self.pooled_height, self.pooled_width)
        )
        return cropped_features

    def compute_output_shape(self, input_shape):
        return input_shape[0], input_shape[1], self.pooled_height, self.pooled_width

    def get_config(self):
        config = super(ROIPoolingLayer, self).get_config()
        config.update({
            'pooled_height': self.pooled_height,
            'pooled_width': self.pooled_width
        })
        return config

# Define the Faster R-CNN model
def get_faster_rcnn_model(input_shape=(224, 224, 3), num_classes=2):
    base_model = tf.keras.applications.VGG16(input_shape=input_shape, include_top=False, weights='imagenet')
    base_model.trainable = True
    
    feature_map = base_model.output
    
    rpn_conv = layers.Conv2D(512, (3, 3), padding='same', activation='relu')(feature_map)
    rpn_cls_score = layers.Conv2D(2, (1, 1), activation='softmax')(rpn_conv)
    rpn_reg = layers.Conv2D(4, (1, 1))(rpn_conv)
    
    rois = ROIPoolingLayer(pooled_height=7, pooled_width=7)([feature_map, rpn_cls_score])
    
    fc1 = layers.TimeDistributed(layers.Dense(4096, activation='relu'))(rois)
    fc2 = layers.TimeDistributed(layers.Dense(4096, activation='relu'))(fc1)
    
    cls_score = layers.TimeDistributed(layers.Dense(num_classes, activation='softmax'))(fc2)
    bbox_pred = layers.TimeDistributed(layers.Dense(num_classes * 4))(fc2)
    
    model = models.Model(inputs=base_model.input, outputs=[cls_score, bbox_pred])
    return model

# Data preparation
def load_data(path):
    images = []
    annotations = {'boxes': [], 'classes': []}
    for file in os.listdir(path):
        if file.endswith(".xml"):
            tree = ET.parse(os.path.join(path, file))
            root = tree.getroot()
            image_file = root.find('filename').text
            img_path = os.path.join(path, image_file)
            image = cv2.imread(img_path)
            images.append(image)
            boxes = []
            classes = []
            for obj in root.findall('object'):
                bbox = obj.find('bndbox')
                xmin = float(bbox.find('xmin').text)
                ymin = float(bbox.find('ymin').text)
                xmax = float(bbox.find('xmax').text)
                ymax = float(bbox.find('ymax').text)
                boxes.append([xmin, ymin, xmax, ymax])
                classes.append(1 if obj.find('name').text == 'vehicle' else 0)  # Assuming classes are 'vehicle' and 'motorcycle'
            annotations['boxes'].append(boxes)
            annotations['classes'].append(classes)
    return images, annotations

# Load your data
train_images, train_annotations = load_data('D:/Users HP/Downloads/cnn/train/Annotations')
val_images, val_annotations = load_data('D:/Users HP/Downloads/cnn/val/Annotations')
test_images, test_annotations = load_data('D:/Users HP/Downloads/cnn/test/Annotations')

# Instantiate the model
faster_rcnn_model = get_faster_rcnn_model()

# Define the optimizer and loss
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

@tf.function
def train_step(images, targets):
    with tf.GradientTape() as tape:
        cls_score, bbox_pred = faster_rcnn_model(images, training=True)
        cls_loss = loss_fn(targets['classes'], cls_score)
        reg_loss = loss_fn(targets['boxes'], bbox_pred)
        total_loss = cls_loss + reg_loss
    gradients = tape.gradient(total_loss, faster_rcnn_model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, faster_rcnn_model.trainable_variables))
    return total_loss

# Training loop
num_epochs = 10  # Set the number of epochs

for epoch in range(num_epochs):
    for batch, (images, targets) in enumerate(zip(train_images, train_annotations)):
        image_tensor = tf.convert_to_tensor(images, dtype=tf.float32)
        targets_tensor = {
            'boxes': tf.convert_to_tensor(targets['boxes'], dtype=tf.float32),
            'classes': tf.convert_to_tensor(targets['classes'], dtype=tf.float32)
        }

        loss = train_step(image_tensor, targets_tensor)
        print(f"Epoch {epoch}, Batch {batch}, Loss: {loss.numpy()}")

# Evaluation
def evaluate_model(model, val_images, val_annotations):
    for image, gt_boxes, gt_classes in zip(val_images, val_annotations['boxes'], val_annotations['classes']):
        input_tensor = tf.convert_to_tensor(image, dtype=tf.float32)
        input_tensor = input_tensor[tf.newaxis, ...]
        
        detections = model(input_tensor)
        
        for i in range(len(detections[0])):
            box = detections[1][0][i]
            class_id = int(detections[0][0][i])
            score = detections[1][0][i][4]
            if score > 0.5:
                xmin, ymin, xmax, ymax = box
                cv2.rectangle(image, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 0, 0), 2)
                cv2.putText(image, str(class_id), (int(xmin), int(ymin)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
        
        plt.figure(figsize=(12, 8))
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.show()

evaluate_model(faster_rcnn_model, val_images, val_annotations)


ValueError: Exception encountered when calling ROIPoolingLayer.call().

[1mCannot map function to unknown object 7[0m

Arguments received by ROIPoolingLayer.call():
  • args=(['<KerasTensor shape=(None, 7, 7, 512), dtype=float32, sparse=False, name=keras_tensor_25>', '<KerasTensor shape=(None, 7, 7, 2), dtype=float32, sparse=False, name=keras_tensor_27>'],)
  • kwargs=<class 'inspect._empty'>