<a href="https://colab.research.google.com/github/Cralsic123/Siamese-network-object-detection/blob/main/Local_Matching.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# List of folder names to copy
folder_names = ["Anchor", "Positive", "Negative"]

# Destination path in Colab
destination_path = "/content"

# Loop through each folder and copy it to Colab
for folder_name in folder_names:
    # Path to the folder in Google Drive
    folder_path = f"/content/drive/MyDrive/Places/{folder_name}"

    # Copy the folder from Google Drive to Colab
    shutil.copytree(folder_path, os.path.join(destination_path, folder_name))

# List the contents of the destination path to verify the folders have been copied
print("Contents of destination path:")
print(os.listdir(destination_path))


Contents of destination path:
['.config', 'Negative', 'Anchor', 'Positive', 'drive', 'sample_data']


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import tensorflow as tf
from pathlib import Path
from keras import layers
from keras import losses
from keras import optimizers
from keras import metrics
from keras import Model
from keras.applications import ResNet50

target_shape = (200, 200)

def preprocess_image(filename):
    """
    Load the specified file as a JPEG image, preprocess it, and
    resize it to the target shape.
    """
    image_string = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, target_shape)
    return image

def preprocess_triplets(anchor, positive, negative):
    """
    Given the filenames corresponding to the three images, load and
    preprocess them.
    """
    return (
        preprocess_image(anchor),
        preprocess_image(positive),
        preprocess_image(negative),
    )

# Load image paths from directories
anchor_images_path = "/content/Anchor"
positive_images_path = "/content/Positive"
negative_images_path = "/content/Negative"

anchor_images = sorted([os.path.join(anchor_images_path, f) for f in os.listdir(anchor_images_path)])
positive_images = sorted([os.path.join(positive_images_path, f) for f in os.listdir(positive_images_path)])
negative_images = sorted([os.path.join(negative_images_path, f) for f in os.listdir(negative_images_path)])

image_count = len(anchor_images)

# Create TensorFlow datasets
anchor_dataset = tf.data.Dataset.from_tensor_slices(anchor_images)
positive_dataset = tf.data.Dataset.from_tensor_slices(positive_images)
negative_dataset = tf.data.Dataset.from_tensor_slices(negative_images)

# Shuffle and preprocess the datasets
anchor_dataset = anchor_dataset.shuffle(buffer_size=len(anchor_images))
positive_dataset = positive_dataset.shuffle(buffer_size=len(positive_images))
negative_dataset = negative_dataset.shuffle(buffer_size=len(negative_images))

dataset = tf.data.Dataset.zip((anchor_dataset, positive_dataset, negative_dataset))
dataset = dataset.shuffle(buffer_size=1024)
dataset = dataset.map(preprocess_triplets)

# Split dataset into train and validation sets
train_dataset = dataset.take(round(image_count * 0.8))
val_dataset = dataset.skip(round(image_count * 0.8))

train_dataset = train_dataset.batch(32, drop_remainder=False).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(32, drop_remainder=False).prefetch(tf.data.AUTOTUNE)

def visualize(anchor, positive, negative):
    """Visualize a few triplets from the supplied batches."""
    def show(ax, image):
        ax.imshow(image)
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

    fig = plt.figure(figsize=(9, 9))
    axs = fig.subplots(3, 3)
    for i in range(3):
        show(axs[i, 0], anchor[i])
        show(axs[i, 1], positive[i])
        show(axs[i, 2], negative[i])

visualize(*list(train_dataset.take(1).as_numpy_iterator())[0])

"""def visualize_attention(anchor, positive, negative):
    def show(ax, image):
        ax.imshow(image)
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

    fig = plt.figure(figsize=(12, 12))
    axs = fig.subplots(3, 6)
    for i in range(3):
        show(axs[i, 0], anchor[i])
        show(axs[i, 1], positive[i])
        show(axs[i, 2], negative[i])
        # Show attention maps
        for j, img in enumerate([anchor[i], positive[i], negative[i]]):
            ax = axs[i, j+3]
            ax.imshow(img)
            ax.imshow(attention_map[i, j], cmap='jet', alpha=0.4)
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)"""

#visualize(*list(train_dataset.take(1).as_numpy_iterator())[0])

# Define the Siamese network with attention
base_cnn = ResNet50(weights="imagenet", input_shape=target_shape + (3,), include_top=False)

flatten = layers.Flatten()(base_cnn.output)
dense1 = layers.Dense(512, activation="relu")(flatten)
dense1 = layers.BatchNormalization()(dense1)
dense2 = layers.Dense(256, activation="relu")(dense1)
dense2 = layers.BatchNormalization()(dense2)
output = layers.Dense(256)(dense2)

embedding = Model(base_cnn.input, output, name="Embedding")

trainable = False
for layer in base_cnn.layers:
    if layer.name == "conv5_block1_out":
        trainable = True
    layer.trainable = trainable

class DistanceLayer(layers.Layer):
    """
    This layer is responsible for computing the distance between the anchor
    embedding and the positive embedding, and the anchor embedding and the
    negative embedding.
    """
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, anchor, positive, negative):
        ap_distance = tf.reduce_sum(tf.square(anchor - positive), axis=1)
        an_distance = tf.reduce_sum(tf.square(anchor - negative), axis=1)
        return (ap_distance, an_distance)

anchor_input = layers.Input(shape=target_shape + (3,), name="anchor")
positive_input = layers.Input(shape=target_shape + (3,), name="positive")
negative_input = layers.Input(shape=target_shape + (3,), name="negative")

anchor_embedding = embedding(anchor_input)
positive_embedding = embedding(positive_input)
negative_embedding = embedding(negative_input)

distances = DistanceLayer()(
    anchor_embedding,
    positive_embedding,
    negative_embedding
)

siamese_network = Model(
    inputs=[anchor_input, positive_input, negative_input],
    outputs=distances
)

# Instantiate the Siamese model with attention
siamese_model = Model(inputs=[anchor_input, positive_input, negative_input], outputs=distances)

class SiameseModelWithAttention(Model):
    """Siamese Network model with attention."""

    def __init__(self, siamese_network_with_attention, margin=0.5):
        super().__init__()
        self.siamese_network_with_attention = siamese_network_with_attention
        self.margin = margin
        self.loss_tracker = metrics.Mean(name="loss")

    def call(self, inputs):
        return self.siamese_network_with_attention(inputs)

    def train_step(self, data):
        with tf.GradientTape() as tape:
            loss = self._compute_loss(data)

        gradients = tape.gradient(loss, self.siamese_network_with_attention.trainable_weights)
        self.optimizer.apply_gradients(zip(gradients, self.siamese_network_with_attention.trainable_weights))
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def test_step(self, data):
        loss = self._compute_loss(data)
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def _compute_loss(self, data):
        ap_distance, an_distance = self.siamese_network_with_attention(data)
        loss = tf.maximum(ap_distance - an_distance + self.margin, 0.0)
        return loss

    @property
    def metrics(self):
        return [self.loss_tracker]

siamese_model_with_attention = SiameseModelWithAttention(siamese_network)
siamese_model_with_attention.compile(optimizer=optimizers.Adam(0.0001))

# Train the Siamese model with attention
siamese_model_with_attention.fit(train_dataset, epochs=4, validation_data=val_dataset)

# Visualize the attention maps
sample = next(iter(train_dataset))
#visualize_attention(*sample)


In [None]:
# Step 3: Introduce an attention mechanism
# Define the attention mechanism
def compute_attention_map(model, img_array):
    """
    Compute the attention map using Grad-CAM or LRP.
    """
    # Perform preprocessing on the image array if necessary
    # Example: img_array = preprocess_input(img_array)

    # Obtain the last convolutional layer of the model
    last_conv_layer = model.get_layer("last_conv_layer")

    # Compute gradients of the target class with respect to the output feature map
    with tf.GradientTape() as tape:
        conv_output = model(img_array)
        if isinstance(conv_output, tuple):
            conv_output = conv_output[0]
        target_class = tf.argmax(conv_output, axis=-1)
        grads = tape.gradient(conv_output, last_conv_layer.output)
        pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    # Compute the weighted sum of the feature maps and normalize
    heatmap = tf.reduce_mean(tf.multiply(pooled_grads, last_conv_layer.output), axis=-1)
    heatmap = tf.maximum(heatmap, 0) / tf.reduce_max(heatmap)

    return heatmap

# Load the pre-trained model
from keras.applications import ResNet50

pretrained_model = ResNet50(weights='imagenet', include_top=False, input_shape=target_shape + (3,))
# Load your pre-trained model here

# Define the Siamese network with attention mechanism
base_cnn = resnet.ResNet50(
    weights="imagenet", input_shape=target_shape + (3,), include_top=False
)

flatten = layers.Flatten()(base_cnn.output)
dense1 = layers.Dense(512, activation="relu")(flatten)
dense1 = layers.BatchNormalization()(dense1)
dense2 = layers.Dense(256, activation="relu")(dense1)
dense2 = layers.BatchNormalization()(dense2)
output = layers.Dense(256)(dense2)

# Output layer for attention mechanism
output_attention = layers.Conv2D(1, (1, 1), name="attention_output")(base_cnn.output)

# Define the Siamese network model
siamese_network = Model(
    inputs=base_cnn.input,
    outputs=[output, output_attention],  # Output feature vector and attention map
    name="SiameseWithAttention"
)

# Define the Siamese model
class SiameseModelWithAttention(Model):
    def __init__(self, siamese_network, margin=0.5):
        super().__init__()
        self.siamese_network = siamese_network
        self.margin = margin
        self.loss_tracker = metrics.Mean(name="loss")

    def call(self, inputs):
        return self.siamese_network(inputs)

    def train_step(self, data):
        # Your training step implementation
        with tf.GradientTape() as tape:
            loss = self._compute_loss(data)

        gradients = tape.gradient(loss, self.siamese_network.trainable_weights)
        self.optimizer.apply_gradients(
            zip(gradients, self.siamese_network.trainable_weights)
        )

        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

        pass

    def test_step(self, data):
        # Your testing step implementation
        loss = self._compute_loss(data)
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}
        pass

    def _compute_loss(self, data):
        # Your loss computation implementation
        ap_distance, an_distance = self.siamese_network(data)
        loss = ap_distance - an_distance + self.margin
        loss = tf.maximum(loss, 0.0)
        return loss
        pass

    @property
    def metrics(self):
        return [self.loss_tracker]

# Instantiate the Siamese model with attention
siamese_model_with_attention = SiameseModelWithAttention(siamese_network)
# Compile the Siamese model with attention
siamese_model_with_attention.compile(optimizer=optimizers.Adam(0.0001))

# Train the Siamese model with attention
siamese_model_with_attention.fit(train_dataset, epochs=3, validation_data=val_dataset)



In [None]:
import torch

In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.1.27-py3-none-any.whl (721 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.2/721.2 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m44.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m823.6/823.6 kB[0m [31m70.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda

In [None]:
import torch

In [None]:
import ultralytics
ultralytics.__version__

'8.1.27'

In [None]:
!pip install ultralytics.vit

[31mERROR: Could not find a version that satisfies the requirement ultralytics.vit (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for ultralytics.vit[0m[31m
[0m

In [None]:
from ultralytics import SAM

In [None]:
model = SAM('sam_b.pt')
model.predict('/content/Negative/gsun_01c2a2bdc3dd21273daaa508828df0c5.jpg')


image 1/1 /content/Negative/gsun_01c2a2bdc3dd21273daaa508828df0c5.jpg: 1024x1024 1 0, 1 1, 1 2, 1 3, 1 4, 1 5, 1 6, 1 7, 1 8, 1 9, 1 10, 1 11, 1 12, 1 13, 1 14, 1 15, 1 16, 1 17, 1 18, 1 19, 1 20, 1 21, 1 22, 1 23, 1 24, 1 25, 1 26, 1 27, 1 28, 1 29, 1 30, 1 31, 1 32, 1 33, 1 34, 1 35, 1 36, 1 37, 1 38, 1 39, 1 40, 1 41, 1 42, 1 43, 1 44, 1 45, 1 46, 1 47, 1 48, 1 49, 1 50, 1 51, 1 52, 1 53, 1 54, 1 55, 1 56, 1 57, 1 58, 1 59, 1 60, 1 61, 1 62, 1 63, 1 64, 1 65, 1 66, 1 67, 1 68, 1 69, 1 70, 1 71, 10279.7ms
Speed: 6.2ms preprocess, 10279.7ms inference, 3.0ms postprocess per image at shape (1, 3, 1024, 1024)


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: ultralytics.engine.results.Masks object
 names: {0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9', 10: '10', 11: '11', 12: '12', 13: '13', 14: '14', 15: '15', 16: '16', 17: '17', 18: '18', 19: '19', 20: '20', 21: '21', 22: '22', 23: '23', 24: '24', 25: '25', 26: '26', 27: '27', 28: '28', 29: '29', 30: '30', 31: '31', 32: '32', 33: '33', 34: '34', 35: '35', 36: '36', 37: '37', 38: '38', 39: '39', 40: '40', 41: '41', 42: '42', 43: '43', 44: '44', 45: '45', 46: '46', 47: '47', 48: '48', 49: '49', 50: '50', 51: '51', 52: '52', 53: '53', 54: '54', 55: '55', 56: '56', 57: '57', 58: '58', 59: '59', 60: '60', 61: '61', 62: '62', 63: '63', 64: '64', 65: '65', 66: '66', 67: '67', 68: '68', 69: '69', 70: '70', 71: '71'}
 obb: None
 orig_img: array([[[3, 5, 6],
         [3, 5, 6],
         [2, 4, 5],
         ...,
         [0, 2, 3],
 

In [None]:
import os
import cv2
import numpy as np

# Define the directory containing your images
image_dir = "/content/Positive"

# Create a directory to save images with bounding boxes
output_dir = "/content/output"
os.makedirs(output_dir, exist_ok=True)

# Loop through each image in the directory
for filename in os.listdir(image_dir):
    if filename.endswith(".jpg") or filename.endswith(".png"):  # Assuming images are in JPG or PNG format
        image_path = os.path.join(image_dir, filename)
        image = cv2.imread(image_path)

        # Convert the image to the HSV color space
        hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

        # Define lower and upper bounds for the color you want to segment (e.g., green)
        lower_bound = np.array([40, 40, 40])  # Adjust these values based on the color you want to detect
        upper_bound = np.array([70, 255, 255])

        # Create a mask using the specified color range
        mask = cv2.inRange(hsv_image, lower_bound, upper_bound)

        # Find contours in the mask
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Create a bounding box around each contour and draw it on the original image
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # Save the image with bounding boxes
        output_path = os.path.join(output_dir, filename)
        cv2.imwrite(output_path, image)
