<a href="https://colab.research.google.com/github/JaveyBae/exist2025/blob/main/Clip_Implement.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
!pip install -U sentence-transformers



In [28]:
import requests

img_paths = [
    ("dog.jpg", "https://images.unsplash.com/photo-1547494912-c69d3ad40e7f?ixlib=rb-4.1.0&q=85&fm=jpg&crop=entropy&cs=srgb&w=640"),
    ("cat.jpg", "https://images.unsplash.com/photo-1518791841217-8f162f1e1131?ixlib=rb-4.0.3&w=640&q=80"),
    ("beach.jpg", "https://images.unsplash.com/photo-1507525428034-b723cf961d3e?ixlib=rb-4.0.3&w=640&q=80"),
]

for filename, url in img_paths:
    r = requests.get(url)
    with open(filename, "wb") as f:
        f.write(r.content)
    print(f"✅ Saved {filename}")

✅ Saved dog.jpg
✅ Saved cat.jpg
✅ Saved beach.jpg


In [29]:
from sentence_transformers import SentenceTransformer, util
from PIL import Image
import torch
import os

# Load models
img_model = SentenceTransformer('clip-ViT-B-32')
text_model = SentenceTransformer('sentence-transformers/clip-ViT-B-32-multilingual-v1')

# --- Ensure you have the full paths to your files ---
try:
    img_paths = ["/content/dog.jpg", "/content/cat.jpg", "/content/beach.jpg"]
    images = [Image.open(p).convert("RGB") for p in img_paths]
except FileNotFoundError:
    print("Error: Please ensure the image files are uploaded and the paths above are correct!")
    images = []

if images:
    # --- Encode images and text ---
    img_embeddings = img_model.encode(images, convert_to_tensor=True, normalize_embeddings=True)

    texts = [
        "A dog in the snow",      # English
        "Eine Katze",             # German: A cat
        "Una playa con palmeras." # Spanish: A beach with palm trees
    ]
    text_embeddings = text_model.encode(texts, convert_to_tensor=True, normalize_embeddings=True)

    # --- New section: Output semantic vectors ---
    print("--- Semantic Vector Output ---")

    # Print Image Embeddings
    print("\n🖼️  Image Embeddings:")
    print(f"Shape: {img_embeddings.shape}")
    print(img_embeddings)

    # Print Text Embeddings
    print("\n📝 Text Embeddings:")
    print(f"Shape: {text_embeddings.shape}")
    print(text_embeddings)

    print("\n--- Similarity Matching Results ---")
    # --- Original code continues ---

    # Compute cosine similarity
    cos_sim = util.cos_sim(text_embeddings, img_embeddings)

    # Print results
    for text, scores in zip(texts, cos_sim):
        max_idx = torch.argmax(scores)
        print(f"\n📝 Text: {text}")
        print(f"📈 Best match score: {scores[max_idx]:.4f}")
        print(f"🖼️  Matched image: {img_paths[max_idx]}")

--- Semantic Vector Output ---

🖼️  Image Embeddings:
Shape: torch.Size([3, 512])
tensor([[ 0.0292,  0.0259, -0.0095,  ..., -0.0249, -0.0065,  0.0040],
        [-0.0188,  0.0002,  0.0189,  ...,  0.0100,  0.0011, -0.0014],
        [ 0.0051,  0.0061, -0.0170,  ...,  0.0406,  0.0036, -0.0241]])

📝 Text Embeddings:
Shape: torch.Size([3, 512])
tensor([[-6.4259e-05,  1.0246e-02, -2.9634e-02,  ..., -1.0483e-02,
          4.3085e-03, -7.4803e-02],
        [ 9.0206e-03, -9.9677e-03, -1.3959e-02,  ..., -3.5666e-02,
         -4.1808e-02,  3.4178e-03],
        [ 8.9918e-03,  4.3758e-02,  7.3397e-03,  ...,  3.5206e-02,
         -3.9791e-02,  3.1345e-02]])

--- Similarity Matching Results ---

📝 Text: A dog in the snow
📈 Best match score: 0.3132
🖼️  Matched image: /content/dog.jpg

📝 Text: Eine Katze
📈 Best match score: 0.2582
🖼️  Matched image: /content/cat.jpg

📝 Text: Una playa con palmeras.
📈 Best match score: 0.2690
🖼️  Matched image: /content/beach.jpg


In [36]:
from sentence_transformers import SentenceTransformer, util
from PIL import Image
import torch

# 1. Load the model
model = SentenceTransformer('clip-ViT-B-32')

# 2. Define the image paths and categories
image_paths = ["dog.jpg", "cat.jpg", "beach.jpg"] # Changed to a list of image paths
categories = ["a photo of a dog", "a photo of a cat", "a photo of a car", "a photo of a bird" , "a photo of a beach"]

# Process each image
for image_path in image_paths: # Loop through each image path
    # Load the image
    try:
        image = Image.open(image_path).convert("RGB")
    except FileNotFoundError:
        print(f"Error: Image not found at path '{image_path}'.")
        image = None

    if image:
        # 3. Encode
        image_embedding = model.encode(image, convert_to_tensor=True)
        text_embeddings = model.encode(categories, convert_to_tensor=True)

        # 4. Compute cosine similarity
        cosine_scores = util.cos_sim(image_embedding, text_embeddings)

        # --- Key modification here ---
        # 5. Before Softmax, add a scaling factor to amplify score differences
        scaling_factor = 50  # You can try different values, e.g., 20 or 100
        scaled_scores = cosine_scores[0] * scaling_factor

        # Apply Softmax to the scaled scores
        probs = torch.nn.functional.softmax(scaled_scores, dim=0)
        # --- End of modification ---

        # Print probabilities for each category
        print(f"--- Classification Results for Image '{image_path}' (Scaled) ---")
        for i, category in enumerate(categories):
            print(f"Category: '{category}', Probability: {probs[i].item():.4f}")

        # Find the category with the highest probability
        best_category_idx = torch.argmax(probs).item()
        best_category = categories[best_category_idx]

        print(f"\n✅ Final Prediction: This is most likely a photo of '{best_category}'.")
    print("-" * 30) # Separator for clarity between images

--- Classification Results for Image 'dog.jpg' (Scaled) ---
Category: 'a photo of a dog', Probability: 0.9140
Category: 'a photo of a cat', Probability: 0.0359
Category: 'a photo of a car', Probability: 0.0104
Category: 'a photo of a bird', Probability: 0.0278
Category: 'a photo of a beach', Probability: 0.0119

✅ Final Prediction: This is most likely a photo of 'a photo of a dog'.
------------------------------
--- Classification Results for Image 'cat.jpg' (Scaled) ---
Category: 'a photo of a dog', Probability: 0.0720
Category: 'a photo of a cat', Probability: 0.8963
Category: 'a photo of a car', Probability: 0.0078
Category: 'a photo of a bird', Probability: 0.0205
Category: 'a photo of a beach', Probability: 0.0034

✅ Final Prediction: This is most likely a photo of 'a photo of a cat'.
------------------------------
--- Classification Results for Image 'beach.jpg' (Scaled) ---
Category: 'a photo of a dog', Probability: 0.0128
Category: 'a photo of a cat', Probability: 0.0107
Catego