<a href="https://colab.research.google.com/github/MuhammadHasbiAshshiddieqy/image-similarity/blob/master/Image_Similarity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dino V2

https://towardsai.net/p/machine-learning/vision-embedding-comparison-for-image-similarity-search-efficientnet-vs-vit-vs-vino-vs-clip-vs-blip2

In [None]:
import numpy as np
import torch
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import os
from torchvision import transforms
from transformers import AutoModel

class BagSimilarityDetector:
    def __init__(self):
        print("Initializing DINOv2 model for bag similarity detection...")
        # Using DINOv2 model which excels at visual feature extraction
        self.model_name = "facebook/dinov2-base"
        self.model = AutoModel.from_pretrained(self.model_name)
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model.to(self.device)

        # Set up image transformation pipeline (DINOv2 expects 224x224 images)
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        print(f"Model loaded successfully. Using device: {self.device}")

        # List of bag attributes to test specifically
        self.bag_attributes = [
            "a designer handbag",
            "a tote bag",
            "a crossbody bag",
            "a clutch bag",
            "a backpack",
            "a bag with leather material",
            "a bag with canvas material",
            "a bag with pattern",
            "a bag with solid color",
            "a bag with logo print",
            "a structured bag",
            "a soft bag"
        ]

    def preprocess_image(self, img_path):
        """Preprocess image for DINOv2"""
        # Read image
        img = cv2.imread(img_path)
        if img is None:
            raise ValueError(f"Cannot read image at {img_path}")

        # Convert to RGB (DINOv2 expects RGB format)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Convert to PIL Image
        pil_img = Image.fromarray(img)

        # Apply transformations
        tensor_img = self.transform(pil_img).unsqueeze(0)  # Add batch dimension

        return tensor_img, pil_img

    def extract_visual_features(self, img_path):
        """Extract visual features using DINOv2"""
        try:
            # Preprocess image
            tensor_img, _ = self.preprocess_image(img_path)
            tensor_img = tensor_img.to(self.device)

            # Extract visual features
            with torch.no_grad():
                outputs = self.model(tensor_img)

            # Use the [CLS] token features which contains global image representation
            image_features = outputs.last_hidden_state[:, 0, :].detach().cpu().numpy()

            # Normalize features
            normalized_features = image_features / np.linalg.norm(image_features, axis=1, keepdims=True)

            return normalized_features

        except Exception as e:
            print(f"Error extracting features: {e}")
            return None

    def analyze_specific_attributes(self, img_path):
        """Analyze specific bag attributes"""
        try:
            # Read and preprocess the image
            _, pil_img = self.preprocess_image(img_path)
            img = np.array(pil_img)

            # Resize for consistency
            img = cv2.resize(img, (224, 224))

            # Calculate basic visual statistics for attribute approximation

            # Color analysis (for solid color vs pattern)
            gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
            color_variance = np.std(img, axis=(0, 1)).mean()
            texture_variance = np.std(gray)

            # Edge detection (for structured vs soft bag)
            edges = cv2.Canny(gray, 50, 150)
            edge_density = np.sum(edges > 0) / (edges.shape[0] * edges.shape[1])

            # Color distribution (for material approximation)
            hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
            saturation = np.mean(hsv[:,:,1])
            brightness = np.mean(hsv[:,:,2])

            # Shape analysis (for bag type approximation)
            height, width = img.shape[:2]
            aspect_ratio = width / height

            # Create simulated attribute scores
            attribute_scores = {}

            # Bag type scores based on aspect ratio and edge density
            attribute_scores["a tote bag"] = 0.5 + 0.3 * (1 - abs(aspect_ratio - 0.9)) - 0.2 * edge_density
            attribute_scores["a crossbody bag"] = 0.5 + 0.3 * (1 - abs(aspect_ratio - 0.7)) - 0.1 * edge_density
            attribute_scores["a clutch bag"] = 0.5 + 0.3 * (1 - abs(aspect_ratio - 1.8)) + 0.1 * edge_density
            attribute_scores["a backpack"] = 0.5 + 0.3 * (1 - abs(aspect_ratio - 0.6)) + 0.2 * edge_density
            attribute_scores["a designer handbag"] = 0.5 + 0.1 * edge_density + 0.2 * saturation

            # Material scores based on texture, saturation and brightness
            attribute_scores["a bag with leather material"] = 0.5 + 0.2 * saturation - 0.1 * texture_variance
            attribute_scores["a bag with canvas material"] = 0.5 - 0.1 * saturation + 0.2 * texture_variance

            # Pattern scores based on variance
            attribute_scores["a bag with pattern"] = 0.5 + 0.3 * (color_variance / 80)
            attribute_scores["a bag with solid color"] = 0.5 + 0.3 * (1 - color_variance / 80)
            attribute_scores["a bag with logo print"] = 0.5 + 0.2 * edge_density + 0.1 * (color_variance / 80)

            # Structure scores based on edge density
            attribute_scores["a structured bag"] = 0.5 + 0.4 * edge_density
            attribute_scores["a soft bag"] = 0.5 + 0.4 * (1 - edge_density)

            # Normalize all scores to range [0, 1]
            for attr in attribute_scores:
                attribute_scores[attr] = max(0, min(1, attribute_scores[attr]))

            return attribute_scores

        except Exception as e:
            print(f"Error analyzing attributes: {e}")
            return {}

    def compare_bags(self, img_path1, img_path2):
        """Compare two bag images using DINOv2"""
        print(f"Comparing bags: {img_path1} and {img_path2}")

        # Extract visual features
        features1 = self.extract_visual_features(img_path1)
        features2 = self.extract_visual_features(img_path2)

        if features1 is None or features2 is None:
            print("Failed to extract features from one or both images")
            return None, None, None

        # Calculate cosine similarity
        visual_similarity = np.dot(features1, features2.T)[0][0]

        # Analyze specific attributes
        attributes1 = self.analyze_specific_attributes(img_path1)
        attributes2 = self.analyze_specific_attributes(img_path2)

        # Calculate attribute similarity
        attribute_similarities = {}
        for attr in self.bag_attributes:
            score1 = attributes1.get(attr, 0)
            score2 = attributes2.get(attr, 0)
            # The closer the attribute values, the higher the similarity
            attr_sim = 1 - abs(score1 - score2)
            attribute_similarities[attr] = attr_sim

        # Calculate average attribute similarity
        avg_attr_similarity = sum(attribute_similarities.values()) / len(attribute_similarities)

        # Weighted similarity (80% visual, 20% attribute)
        weighted_similarity = 0.8 * visual_similarity + 0.2 * avg_attr_similarity

        return visual_similarity, attribute_similarities, weighted_similarity

    def create_comparison_visualization(self, img_path1, img_path2, visual_similarity, attribute_similarities, weighted_similarity):
        """Create visualization for bag comparison"""
        # Prepare images
        img1 = cv2.imread(img_path1)
        img2 = cv2.imread(img_path2)
        img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2RGB)
        img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)

        # Resize images if too large
        max_height = 400
        if img1.shape[0] > max_height:
            scale = max_height / img1.shape[0]
            img1 = cv2.resize(img1, (int(img1.shape[1] * scale), max_height))
        if img2.shape[0] > max_height:
            scale = max_height / img2.shape[0]
            img2 = cv2.resize(img2, (int(img2.shape[1] * scale), max_height))

        # Create plot
        plt.figure(figsize=(14, 10))

        # Plot bag images
        plt.subplot(2, 2, 1)
        plt.imshow(img1)
        plt.title("Tas 1")
        plt.axis('off')

        plt.subplot(2, 2, 2)
        plt.imshow(img2)
        plt.title("Tas 2")
        plt.axis('off')

        # Plot overall similarity
        plt.subplot(2, 2, 3)
        similarity_labels = ['Visual Similarity', 'Attribute Similarity', 'Weighted Similarity']
        similarity_values = [visual_similarity, sum(attribute_similarities.values())/len(attribute_similarities), weighted_similarity]
        plt.bar(similarity_labels, similarity_values, color=['#3498db', '#2ecc71', '#e74c3c'])
        plt.ylim(0, 1)
        plt.title("Overall Similarity Metrics")
        plt.ylabel("Similarity Score")

        # Plot attribute similarities
        plt.subplot(2, 2, 4)

        # Show top 5 attributes with highest similarity
        sorted_attributes = sorted(attribute_similarities.items(), key=lambda x: x[1], reverse=True)
        top_attrs = sorted_attributes[:5]

        # Use shorter labels for the plot
        top_labels = [attr.replace("a bag with ", "").replace("a ", "") for attr, _ in top_attrs]
        top_values = [value for _, value in top_attrs]

        plt.barh(top_labels, top_values, color='#2ecc71')
        plt.xlim(0, 1)
        plt.title("Top 5 Attribute Similarities")
        plt.xlabel("Similarity Score")

        # Add overall description
        similarity_category = ""
        if weighted_similarity >= 0.85:
            similarity_category = "Sangat mirip"
        elif weighted_similarity >= 0.7:
            similarity_category = "Mirip"
        elif weighted_similarity >= 0.5:
            similarity_category = "Agak mirip"
        else:
            similarity_category = "Tidak mirip"

        plt.figtext(0.5, 0.01, f"Overall Similarity: {weighted_similarity:.4f} ({similarity_category})",
                   ha="center", fontsize=14, bbox={"facecolor":"orange", "alpha":0.5, "pad":5})

        plt.tight_layout()
        plt.subplots_adjust(bottom=0.1)

        # Save image
        result_dir = "comparison_results"
        os.makedirs(result_dir, exist_ok=True)
        result_path = os.path.join(result_dir, f"comparison_{os.path.basename(img_path1)}_{os.path.basename(img_path2)}.png")
        plt.savefig(result_path)
        print(f"Saved comparison visualization to {result_path}")

        plt.show()

        return result_path

def main():
    # Initialize detector
    detector = BagSimilarityDetector()
    img_1 = "tas1.jpeg"
    img_2 = "tas1.jpeg"

    # Compare bags
    visual_similarity, attribute_similarities, weighted_similarity = detector.compare_bags(img_1, img_2)

    if visual_similarity is None:
        print("Comparison failed.")
        return

    # Print results
    print("\n===== HASIL PERBANDINGAN =====")
    print(f"Visual Similarity: {visual_similarity:.4f}")

    print("\nAttribute Similarities:")
    for attr, score in sorted(attribute_similarities.items(), key=lambda x: x[1], reverse=True):
        print(f"  - {attr}: {score:.4f}")

    print(f"\nWeighted Similarity: {weighted_similarity:.4f}")

    # Categorize similarity
    if weighted_similarity >= 0.85:
        print("Kesimpulan: Kedua tas sangat mirip")
    elif weighted_similarity >= 0.7:
        print("Kesimpulan: Kedua tas mirip")
    elif weighted_similarity >= 0.5:
        print("Kesimpulan: Kedua tas agak mirip")
    else:
        print("Kesimpulan: Kedua tas tidak mirip")

    # Create visualization
    detector.create_comparison_visualization(img_1, img_2,
                                          visual_similarity, attribute_similarities,
                                          weighted_similarity)

if __name__ == "__main__":
    main()