In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
import numpy as np
import matplotlib.pyplot as plt
import os


2024-12-10 14:04:39.097401: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733810679.109986    1488 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733810679.113536    1488 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-10 14:04:39.127021: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# Enable GPU for computation
tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[0], True)

print(tf.config.list_physical_devices('GPU'))

with tf.device('/GPU:0'):
    x = tf.constant([1.0, 2.0, 3.0])
    y = x * 2.0
    print("Results from GPU computation:", y.numpy())

Num GPUs Available:  1
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Results from GPU computation: [2. 4. 6.]


I0000 00:00:1733810682.608275    1488 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3586 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


In [3]:
def parse_tfrecord(example):
    feature_description = {
        'image/encoded': tf.io.FixedLenFeature([], tf.string),
        'image/height': tf.io.FixedLenFeature([], tf.int64),
        'image/width': tf.io.FixedLenFeature([], tf.int64),
        'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
        'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
        'image/object/class/label': tf.io.VarLenFeature(tf.int64),
    }
    parsed_example = tf.io.parse_single_example(example, feature_description)

    # Decode image
    image = tf.image.decode_jpeg(parsed_example['image/encoded'])
    image = tf.image.resize(image, [512, 512]) / 255.0  # Normalize to [0, 1]

    # Decode bounding boxes and labels
    xmin = tf.sparse.to_dense(parsed_example['image/object/bbox/xmin'])
    xmax = tf.sparse.to_dense(parsed_example['image/object/bbox/xmax'])
    ymin = tf.sparse.to_dense(parsed_example['image/object/bbox/ymin'])
    ymax = tf.sparse.to_dense(parsed_example['image/object/bbox/ymax'])
    labels = tf.sparse.to_dense(parsed_example['image/object/class/label'])

    bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1)  # Combine into (ymin, xmin, ymax, xmax) format
    return image, bboxes, labels


In [4]:
def load_tfrecord_dataset(tfrecord_path, batch_size=8):
    raw_dataset = tf.data.TFRecordDataset(tfrecord_path)
    parsed_dataset = raw_dataset.map(parse_tfrecord)
    dataset = parsed_dataset.shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset


In [5]:
base_model = tf.keras.applications.ResNet50(
    include_top=False, input_shape=(512, 512, 3)
)

feature_extractor = models.Model(
    inputs=base_model.input,
    outputs=base_model.get_layer("conv4_block6_out").output
)


In [6]:
def build_rpn(feature_maps):
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(feature_maps)
    objectness_scores = layers.Conv2D(1, (1, 1), activation='sigmoid')(x)
    bbox_deltas = layers.Conv2D(4, (1, 1))(x)
    return objectness_scores, bbox_deltas


In [7]:
def roi_align(feature_maps, rois, output_size=(7, 7)):
    """
    Perform ROI Align on given feature maps using provided ROIs.

    Args:
        feature_maps: Tensor with shape [batch_size, height, width, channels].
        rois: A tensor of shape [num_rois, 4] representing normalized bounding boxes (in range [0, 1]).
        output_size: Tuple specifying the desired cropped size (default is (7, 7)).

    Returns:
        roi_features: Cropped features after applying ROI Align.
    """
    # Determine number of ROIs
    num_rois = tf.shape(rois)[0]
    
    # Ensure `rois` has the correct rank (remove any unexpected extra dimensions)
    rois = tf.reshape(rois, [num_rois, 4])  # Shape must be [num_rois, 4]

    # Prepare the batch indices corresponding to each ROI
    batch_size = tf.shape(feature_maps)[0]
    batch_indices = tf.random.uniform(
        [num_rois], minval=0, maxval=batch_size, dtype=tf.int32
    )  # Random valid indices for batch

    # Use crop_and_resize for ROI alignment
    roi_features = tf.image.crop_and_resize(
        feature_maps,  # Input feature map
        boxes=rois,  # Normalized bounding boxes
        box_indices=batch_indices,  # Indices indicating the batch dimension
        crop_size=output_size  # Desired size for ROI (e.g., 7x7)
    )
    
    return roi_features


In [8]:
def resize_images(images, target_size=(1024, 1024)):
    """
    Resize images to the expected input size of ResNet50.
    Args:
        images: Input images of shape (batch_size, h, w, 3).
        target_size: Desired target size, default is (1024, 1024).

    Returns:
        Resized images with the shape matching the target_size.
    """
    # Resize each image in the batch
    resized_images = tf.image.resize(images, target_size)
    return resized_images


In [9]:
class DetectionHead(tf.keras.layers.Layer):
    """
    A simple classification head for ROI features.
    """
    def __init__(self, num_classes):
        super(DetectionHead, self).__init__()
        self.num_classes = num_classes
        # Define the layers only once (weights will be initialized once)
        self.dense1 = layers.Dense(256, activation='relu')
        self.classifier = layers.Dense(num_classes, activation='softmax')
        self.bbox_regressor = layers.Dense(4, activation=None)

    def call(self, roi_features):
        """
        Forward pass through the detection head.
        
        Args:
            roi_features: Input features of shape [num_rois, 7, 7, channels].

        Returns:
            class_scores: The predicted class scores.
            bbox_deltas: The predicted bounding box deltas.
        """
        # Flatten the spatial dimensions (7x7) to make it compatible with Dense layers
        x = tf.keras.layers.Flatten()(roi_features)  # Flatten from [7,7,channels] to 1D
        x = self.dense1(x)
        
        # Class scores and bounding box predictions
        class_scores = self.classifier(x)  # Classification predictions
        bbox_deltas = self.bbox_regressor(x)  # Bounding box offsets
        
        return class_scores, bbox_deltas

In [10]:
def rpn_loss(objectness_pred, objectness_true, bbox_pred, bbox_true):
    obj_loss = tf.keras.losses.BinaryCrossentropy()(objectness_true, objectness_pred)
    bbox_loss = tf.keras.losses.Huber()(bbox_true, bbox_pred)
    return obj_loss + bbox_loss


In [11]:
def rpn_loss(objectness_pred, objectness_true, bbox_pred, bbox_true):
    obj_loss = tf.keras.losses.BinaryCrossentropy()(objectness_true, objectness_pred)
    bbox_loss = tf.keras.losses.Huber()(bbox_true, bbox_pred)
    return obj_loss + bbox_loss


In [12]:
# Example feature map tensor
feature_maps = tf.random.normal([8, 32, 32, 1024])  # [batch_size=8, height=32, width=32, channels=1024]

# Example normalized ROIs
rois = tf.random.uniform([10, 4], minval=0.0, maxval=1.0)  # Generate 10 random ROIs normalized in [0,1]

# Call roi_align
roi_features = roi_align(feature_maps, rois)
print("ROI Features Shape:", roi_features.shape)  # Expected: [10, 7, 7, 1024]


ROI Features Shape: (10, 7, 7, 1024)


In [13]:
def create_backbone():
    """
    Create the backbone feature extraction model using ResNet50 as an example.
    Set input shape to match the actual data dimensions.
    """
    # Use the actual image size (512, 512) as input
    base_model = ResNet50(include_top=False, weights="imagenet", input_shape=(1024, 1024, 3))

    # Optionally freeze some layers for transfer learning
    for layer in base_model.layers:
        layer.trainable = False  # Freeze the layers for transfer learning; set to True for fine-tuning
    
    return base_model


In [14]:
# Define the model globally
def create_model():
    # Example: A simple CNN model
    base_model = tf.keras.applications.ResNet50(
        include_top=False, input_shape=(512, 512, 3)
    )
    x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
    x = tf.keras.layers.Dense(128, activation="relu")(x)
    outputs = tf.keras.layers.Dense(2, activation="softmax")(x)
    model = tf.keras.Model(inputs=base_model.input, outputs=outputs)

    # Compile the model to ensure compatibility
    model.compile(optimizer=tf.keras.optimizers.Adam(), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

In [15]:
model = create_model()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

@tf.function
def train_step(images, bboxes, labels):
    """
    A single training step.

    Args:
        images: Input images batch.
        bboxes: Bounding boxes.
        labels: Classification labels.

    Returns:
        Loss value for backpropagation.
    """
    with tf.GradientTape() as tape:
        # Resize images if necessary
        images_resized = resize_images(images)  # Resize images as shown before

        # Generate feature maps from the backbone
        feature_maps = backbone_model(images_resized)
        rois = roi_align(feature_maps, bboxes)  # Extract ROIs
        
        # Pass ROIs through detection head
        roi_features = tf.image.resize(rois, [7, 7])  # Resize for compatibility
        class_scores, bbox_deltas = head(roi_features)  # Pass through DetectionHead
        
        # Calculate classification loss
        classification_loss = tf.keras.losses.sparse_categorical_crossentropy(labels, class_scores)
        
        # Calculate bounding box loss
        bbox_loss_fn = tf.keras.losses.MeanAbsoluteError()
        bbox_loss = bbox_loss_fn(bboxes, bbox_deltas)  # Mean absolute error computation

        # Combine losses
        total_loss = tf.reduce_mean(classification_loss + bbox_loss)

    # Compute gradients
    gradients = tape.gradient(total_loss, head.trainable_variables + backbone_model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, head.trainable_variables + backbone_model.trainable_variables))

    return total_loss






In [26]:
print("Current Working Directory:", os.getcwd())

Current Working Directory: /mnt/c/Users/ACER/Downloads


In [28]:
# Navigate to Desktop and search for the directory
base_path = r"C:\Users\ACER\Desktop"
for root, dirs, files in os.walk(base_path):
    if 'train.record' in files:
        print(f"Found train.record in: {root}")
        break
else:
    print("train.record not found in Desktop or subdirectories.")

train.record not found in Desktop or subdirectories.


In [29]:
path = r"C:\Users\ACER\Desktop\IS"
if os.path.exists(path):
    print("Directories and files in 'IS':")
    print(os.listdir(path))
else:
    print(f"Path does not exist: {path}")

Path does not exist: C:\Users\ACER\Desktop\IS


In [25]:
train_file = r"C:\Users\ACER\Desktop\IS\Obj recognition Version 1\tfrecords\train.record"
valid_file = r"C:\Users\ACER\Desktop\IS\Obj recognition Version 1\tfrecords\valid.record"

# Verify that the files exist before proceeding
if not os.path.exists(train_file):
    raise FileNotFoundError(f"Train file not found at {train_file}")

if not os.path.exists(valid_file):
    raise FileNotFoundError(f"Validation file not found at {valid_file}")

# Function to load TFRecord dataset
def load_tfrecord_dataset(file_path, batch_size=8):
    raw_dataset = tf.data.TFRecordDataset(file_path)
    # Example parsing function (customize based on your data schema)
    def parse_function(proto):
        # Define your TFRecord parsing logic here
        return proto  # Replace this with actual parsing logic
    dataset = raw_dataset.map(parse_function).batch(batch_size)
    return dataset

# Load datasets
train_dataset = load_tfrecord_dataset(train_file, batch_size=8)
print("Train dataset loaded successfully.")

valid_dataset = load_tfrecord_dataset(valid_file, batch_size=8)
print("Validation dataset loaded successfully.")

FileNotFoundError: Train file not found at C:\Users\ACER\Desktop\IS\Obj recognition Version 1\tfrecords\train.record

In [24]:
for images, bboxes, labels in train_dataset.take(1):
    print("Images shape:", images.shape)
    print("Bounding boxes:", bboxes)
    print("Labels:", labels)


2024-12-10 14:13:57.221692: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: NOT_FOUND: C:\Users\ACER\Desktop\IS\Obj recognition Version 1\tfrecords\train.record; No such file or directory


NotFoundError: {{function_node __wrapped__IteratorGetNext_output_types_1_device_/job:localhost/replica:0/task:0/device:CPU:0}} C:\Users\ACER\Desktop\IS\Obj recognition Version 1\tfrecords\train.record; No such file or directory [Op:IteratorGetNext] name: 

In [None]:
backbone_model = create_backbone()

# Create detection head
num_classes = 19
head = DetectionHead(num_classes)

# Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

In [None]:
epochs = 10
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    for step, (images, bboxes, labels) in enumerate(train_dataset):
        loss = train_step(images, bboxes, labels)
        if step % 10 == 0:
            print(f"Step {step}, Loss: {loss:.4f}")


Epoch 1/10
Step 0, Loss: 9.5322


KeyboardInterrupt: 

In [None]:
model.save("Saved Models\my_trained_model.h5")
model.save_weights("Saved weights\my_model_weights.h5")

In [None]:
def evaluate_model(dataset):
    for images, bboxes, labels in dataset:
        class_scores, bbox_deltas = predict(images)  # Replace with the prediction function
        # Add evaluation logic such as IoU calculation or mAP


In [None]:
def predict(image):
    feature_maps = feature_extractor(image)
    rpn_scores, rpn_deltas = build_rpn(feature_maps)
    rois = tf.random.uniform((1, 10, 4), 0, 1)  # Replace with actual proposals
    roi_features = roi_align(feature_maps, rois, (7, 7))
    class_scores, bbox_deltas = build_head(roi_features, num_classes=2)
    return class_scores, bbox_deltas

test_image = next(iter(valid_dataset))[0]  # Example test image
class_scores, bbox_deltas = predict(test_image)
