In [None]:
import tensorflow as tf

class RCNN(tf.keras.Model):
    def __init__(self, num_classes):
        super(RCNN, self).__init__()    
        
        # Feature extractor (typically uses a pre-trained CNN like ResNet or VGG)
        self.backbone = tf.keras.applications.ResNet50(include_top=False, input_shape=(224, 224, 3))
        self.pooling_layer = tf.keras.layers.GlobalAveragePooling2D()
        
        # Region Proposal Network (RPN) Head
        self.rpn_conv = tf.keras.layers.Conv2D(512, (3, 3), padding='same', activation='relu')
        self.rpn_class = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid')  # Objectness score
        self.rpn_bbox = tf.keras.layers.Conv2D(4, (1, 1))  # Bounding box adjustments

        # ROI pooling (just a placeholder for demonstration, would need an actual ROI pooling layer)
        self.roi_pooling = tf.keras.layers.GlobalAveragePooling2D() 
        
        # Classification head
        self.fc1 = tf.keras.layers.Dense(1024, activation='relu')
        self.fc2 = tf.keras.layers.Dense(1024, activation='relu')
        self.classifier = tf.keras.layers.Dense(num_classes, activation='softmax')  # Class prediction
        self.regressor = tf.keras.layers.Dense(4)

    def call(self, inputs):
        # Step 1: Backbone Feature Extraction
        feature_map = self.backbone(inputs)
        
        # Step 2: Region Proposal Network (RPN)
        rpn_output = self.rpn_conv(feature_map)
        rpn_class_logits = self.rpn_class(rpn_output)
        rpn_bbox_logits = self.rpn_bbox(rpn_output)
        
        # Step 3: ROI Pooling (Placeholder: using global pooling)
        roi_pooled = self.roi_pooling(feature_map)
        
        # Step 4: Classification and Regression Heads
        x = self.fc1(roi_pooled)
        x = self.fc2(x)
        class_logits = self.classifier(x)
        bbox_logits = self.regressor(x)
        
        return class_logits, bbox_logits, rpn_class_logits, rpn_bbox_logits



Classification logits shape: (1, 21)
Bounding box logits shape: (1, 4)
RPN classification logits shape: (1, 7, 7, 1)
RPN bounding box logits shape: (1, 7, 7, 4)


In [11]:
num_classes = 21  # Example: 20 classes + 1 background
rcnn_model = RCNN(num_classes)

# Dummy input image of shape (batch_size, height, width, channels)
input_image = tf.random.normal([1, 224, 224, 3])

# Forward pass
class_logits, bbox_logits, rpn_class_logits, rpn_bbox_logits = rcnn_model(input_image)
print("Classification logits shape:", class_logits.shape)
print("Bounding box logits shape:", bbox_logits.shape)
print("RPN classification logits shape:", rpn_class_logits.shape)
print("RPN bounding box logits shape:", rpn_bbox_logits.shape)

Classification logits shape: (1, 21)
Bounding box logits shape: (1, 4)
RPN classification logits shape: (1, 7, 7, 1)
RPN bounding box logits shape: (1, 7, 7, 4)


In [2]:
bbox_logits

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=
array([[-0.1180391 , -0.71070844,  0.73670226, -0.18917577]],
      dtype=float32)>

In [3]:
input_image.shape

TensorShape([1, 224, 224, 3])

In [4]:
import cv2

In [5]:
PATH = r"C:\Abdul Saboor\High Imapact Program\Module 8_1\images.jpg"

def load_img(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    sample_image_t = tf.constant(img, dtype=tf.uint8)
    sample_image_t = tf.image.resize(sample_image_t, size=(224, 224))
    sample_image_t = tf.expand_dims(sample_image_t, axis=0)
    return sample_image_t

image_np = load_img(PATH)

In [6]:
image_np.shape

TensorShape([1, 224, 224, 3])

In [7]:
rcnn_model = RCNN(num_classes)

In [8]:
class_logits, bbox_logits, rpn_class_logits, rpn_bbox_logits = rcnn_model(image_np)
print("Classification logits shape:", class_logits.shape)
print("Bounding box logits shape:", bbox_logits.shape)
print("RPN classification logits shape:", rpn_class_logits.shape)
print("RPN bounding box logits shape:", rpn_bbox_logits.shape)

Classification logits shape: (1, 21)
Bounding box logits shape: (1, 4)
RPN classification logits shape: (1, 7, 7, 1)
RPN bounding box logits shape: (1, 7, 7, 4)


In [9]:
type(bbox_logits)

tensorflow.python.framework.ops.EagerTensor

In [10]:
class_logits

<tf.Tensor: shape=(1, 21), dtype=float32, numpy=
array([[0.07997061, 0.02919159, 0.08195425, 0.04333873, 0.05308235,
        0.03456607, 0.03332048, 0.08424754, 0.02428325, 0.04586925,
        0.08789396, 0.0151935 , 0.02786977, 0.11036954, 0.04872915,
        0.03229164, 0.02892794, 0.05933132, 0.01798878, 0.02226914,
        0.03931108]], dtype=float32)>