In [4]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import SGD


In [1]:
import os
from PIL import Image
import pandas as pd
# Define function to read annotations from text file
def read_annotations(annotation_file):
    annotations = []
    with open(annotation_file, 'r') as file:
        for line in file:
            # Process each line in the annotation file
            values = line.strip().split(',')
            # Extract bounding box coordinates
            bbox_coords = [int(coord.strip('()')) for coord in values[:-1]]
            # Extract class label if available
            class_label = int(values[-1]) if values[-1] else None
            dic={"bbox":bbox_coords,"class":class_label}
            # Append bounding box coordinates and class label to annotations list
            annotations.append(dic)
    return annotations


def create_dataframe(image_dir, annotation_dir):
    data = []
    for filename in os.listdir(image_dir):
        if filename.endswith('.jpg'):
            image_path = os.path.join(image_dir, filename).replace("\\", "/")  
            annotation_file = os.path.join(annotation_dir, os.path.splitext(filename)[0] + '.txt')
            if os.path.exists(annotation_file):
                image = Image.open(image_path)
                annotations = read_annotations(annotation_file)
                # Append image file path and annotations to data list
                data.append({'Image_Path': image_path, 'Annotations': annotations})
    # Create DataFrame from data list
    
    return data
# Example usage
annotationz=read_annotations("C:/Users/arson/OneDrive/Bureau/DL_project/vhr10_dataset/ground_truth/002.txt")
image_dir = 'C:/Users/arson/OneDrive/Bureau/DL_project/vhr10_dataset/positive_image_set'
annotation_dir = 'C:/Users/arson/OneDrive/Bureau/DL_project/vhr10_dataset/ground_truth'
dataset = create_dataframe(image_dir, annotation_dir)
print(type(annotationz))


<class 'list'>


In [47]:
print(dataset)

[{'Image_Path': 'C:/Users/arson/OneDrive/Bureau/DL_project/vhr10_dataset/positive_image_set/001.jpg', 'Annotations': [{'bbox': [563, 478, 630, 573], 'class': 1}]}, {'Image_Path': 'C:/Users/arson/OneDrive/Bureau/DL_project/vhr10_dataset/positive_image_set/002.jpg', 'Annotations': [{'bbox': [575, 114, 635, 162], 'class': 1}, {'bbox': [72, 305, 133, 369], 'class': 1}, {'bbox': [210, 317, 273, 384], 'class': 1}, {'bbox': [306, 374, 344, 420], 'class': 1}, {'bbox': [447, 531, 535, 632], 'class': 1}, {'bbox': [546, 605, 625, 707], 'class': 1}, {'bbox': [632, 680, 720, 790], 'class': 1}]}, {'Image_Path': 'C:/Users/arson/OneDrive/Bureau/DL_project/vhr10_dataset/positive_image_set/003.jpg', 'Annotations': [{'bbox': [100, 256, 170, 319], 'class': 1}, {'bbox': [575, 139, 668, 240], 'class': 1}, {'bbox': [753, 382, 843, 484], 'class': 1}, {'bbox': [313, 648, 415, 764], 'class': 1}, {'bbox': [434, 674, 527, 787], 'class': 1}]}, {'Image_Path': 'C:/Users/arson/OneDrive/Bureau/DL_project/vhr10_dataset

In [13]:
from PIL import Image
import numpy as np

def preprocess_dataset(dataset, input_size):
    processed_data = []
    for entry in dataset:
        image_path = entry['Image_Path']
        annotations = entry['Annotations']
        
        # Load and resize image
        image = Image.open(image_path)
        image = image.resize(input_size)
        
        # Normalize pixel values
        image = np.array(image) / 255.0  # Assuming normalization to [0, 1]
        
        # Append processed data to list
        processed_data.append({'Image': image, 'Annotations': annotations})
        
    return processed_data

# Example usage
input_size = (1024, 1024)  # Example input size
processed_dataset = preprocess_dataset(dataset, input_size)

In [18]:
print(processed_dataset)

[{'Image': array([[[0.30588235, 0.34901961, 0.19215686],
        [0.30588235, 0.34901961, 0.19215686],
        [0.30980392, 0.35294118, 0.19607843],
        ...,
        [0.37647059, 0.42745098, 0.30980392],
        [0.36862745, 0.41960784, 0.30980392],
        [0.36470588, 0.41568627, 0.30588235]],

       [[0.30196078, 0.34901961, 0.18823529],
        [0.30196078, 0.34509804, 0.18823529],
        [0.30980392, 0.35294118, 0.19607843],
        ...,
        [0.38039216, 0.43137255, 0.31372549],
        [0.37254902, 0.42352941, 0.31372549],
        [0.36862745, 0.41960784, 0.30980392]],

       [[0.29803922, 0.34117647, 0.18823529],
        [0.29803922, 0.34117647, 0.18823529],
        [0.30980392, 0.34901961, 0.2       ],
        ...,
        [0.38039216, 0.43137255, 0.31372549],
        [0.37254902, 0.42352941, 0.31372549],
        [0.36470588, 0.41568627, 0.30588235]],

       ...,

       [[0.44313725, 0.49411765, 0.46666667],
        [0.44313725, 0.49411765, 0.45882353],
        [0.

In [10]:
from tensorflow.keras import layers, models

# Load the ResNet50 model
resnet_model = tf.keras.applications.ResNet50(include_top=False,
                                               weights='imagenet',
                                               input_shape=(1024, 1024, 3),
                                               pooling='none')

# Define additional layers for Mask R-CNN
upsample_layer = layers.UpSampling2D(size=(32, 32), interpolation='nearest', name="upsample")

# Define input layer
input_layer = layers.Input(shape=(1024, 1024, 3))

# Pass input through ResNet50 backbone
resnet_output = resnet_model(input_layer)

# Upsample the output to 32x32
upsampled_output = upsample_layer(resnet_output)

# Define RPN layer
rpn_layer = layers.Conv2D(filters=256, kernel_size=(3, 3), padding="same", activation="relu", name="rpn_conv")(upsampled_output)
# Define classification layer
classification_layer = layers.Conv2D(filters=10, kernel_size=(1, 1), activation="softmax", name="classification")(rpn_layer)
# Define bbox regression layer
bbox_regression_layer = layers.Conv2D(filters=10 * 4, kernel_size=(1, 1), activation="linear", name="bbox_regression")(rpn_layer)
# Define mask layer
mask_layer = layers.Conv2D(filters=10, kernel_size=(3, 3), padding="same", activation="sigmoid", name="mask")(upsampled_output)

# Define the model
model = models.Model(inputs=input_layer, outputs=[classification_layer, bbox_regression_layer, mask_layer])

# Fine-tuning strategy 1: Train the head layer for 30 epochs while freezing other layers with learning rate 0.1
for layer in resnet_model.layers:
    layer.trainable = False

for layer in model.layers:
    if 'classification' in layer.name or 'bbox_regression' in layer.name or 'mask' in layer.name:
        layer.trainable = True

# Compile the model with Adam optimizer
adam_optimizer = tf.keras.optimizers.Adam(lr=0.1)
model.compile(optimizer=adam_optimizer,
              loss={'classification': 'binary_crossentropy',
                    'bbox_regression': 'mse',
                    'mask': 'binary_crossentropy'},
              metrics={'classification': 'accuracy',
                       'bbox_regression': 'mse',
                       'mask': 'accuracy'})

# Train the model for 30 epochs
model.fit(train_dataset, epochs=30)

# Fine-tuning strategy 2: Train the convolution layers (+5) and convolution layers (+4) for 30 epochs each using learning rate 0.01 and 0.001, respectively
for layer in model.layers:
    if 'conv' in layer.name and int(layer.name.split('_')[1]) >= 4:
        layer.trainable = True

# Adjust learning rates
learning_rates = [0.01, 0.001]
for i, layer in enumerate(model.layers):
    if 'conv' in layer.name and int(layer.name.split('_')[1]) >= 4:
        layer.optimizer.lr = learning_rates[i]

# Compile the model with SGD optimizer
sgd_optimizer = tf.keras.optimizers.SGD(lr=0.01)
model.compile(optimizer=sgd_optimizer,
              loss={'classification': 'binary_crossentropy',
                    'bbox_regression': 'mse',
                    'mask': 'binary_crossentropy'},
              metrics={'classification': 'accuracy',
                       'bbox_regression': 'mse',
                       'mask': 'accuracy'})

# Train the model for 30 epochs
model.fit(train_dataset, epochs=30)

# Fine-tuning strategy 3: Train the convolution layer (+3) for 400 epochs with learning rate 0.001
for layer in model.layers:
    if 'conv_3' in layer.name:
        layer.trainable = True
        layer.optimizer.lr = 0.001

# Train the model for 50 epochs
model.fit(train_dataset, epochs=50)


Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_11 (InputLayer)          [(None, 1024, 1024,  0           []                               
                                 3)]                                                              
                                                                                                  
 resnet50 (Functional)          (None, 32, 32, 2048  23587712    ['input_11[0][0]']               
                                )                                                                 
                                                                                                  
 upsample (UpSampling2D)        (None, 1024, 1024,   0           ['resnet50[0][0]']               
                                2048)                                                       

In [None]:
mask_rcnn = tf.keras.applications.MaskRCNN(weights='mask_rcnn_resnet50_atrous_coco', 
                                           include_top=False)

# Fine-tune the model on your custom dataset
# Replace this with your dataset and training code
# Example:
# model.compile(optimizer='adam', loss=[...])
# model.fit(train_dataset, epochs=...)

# You can also access individual components of the model
backbone_model = mask_rcnn.backbone
fpn_model = mask_rcnn.fpn
rpn_model = mask_rcnn.rpn_model
mrcnn_mask = mask_rcnn.mask_submodel