In [60]:
import torch
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import os
import shutil


In [51]:
# Load CLIP model and processor
model_name = "openai/clip-vit-base-patch16"  # You can change this if you prefer another version
processor = CLIPProcessor.from_pretrained(model_name)
model = CLIPModel.from_pretrained(model_name)

# Ensure the model is in evaluation mode
model.eval()

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


CLIPModel(
  (text_model): CLIPTextTransformer(
    (embeddings): CLIPTextEmbeddings(
      (token_embedding): Embedding(49408, 512)
      (position_embedding): Embedding(77, 512)
    )
    (encoder): CLIPEncoder(
      (layers): ModuleList(
        (0-11): 12 x CLIPEncoderLayer(
          (self_attn): CLIPSdpaAttention(
            (k_proj): Linear(in_features=512, out_features=512, bias=True)
            (v_proj): Linear(in_features=512, out_features=512, bias=True)
            (q_proj): Linear(in_features=512, out_features=512, bias=True)
            (out_proj): Linear(in_features=512, out_features=512, bias=True)
          )
          (layer_norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (mlp): CLIPMLP(
            (activation_fn): QuickGELUActivation()
            (fc1): Linear(in_features=512, out_features=2048, bias=True)
            (fc2): Linear(in_features=2048, out_features=512, bias=True)
          )
          (layer_norm2): LayerNorm((512,), eps=1e

In [73]:
# List of prompts for known authority figures
authority_figures = [
    'politician',
    'government official',
    'Norwegian politician',
    'political figure',
    'authority figure',
    'people in suits',
    'man in suit',
    'woman in suit',
    'minister',
    'diplomat',
    'legislator',
    'public official'
]


In [67]:
# Folder where images are stored
image_folder = 'D:\\Images\\Covid\\output_images_detected'
classified_folder = 'D:\\Images\\Covid\\classified_authority'
no_authority_folder = 'D:\\Images\\Covid\\no_authority'

In [75]:
# Get list of image files in the folder
image_files = [f for f in os.listdir(image_folder) if f.endswith(('png', 'jpg', 'jpeg'))]

# Function to compute image embedding and match with text prompts
def get_image_embeddings(image_path):
    # Open image
    image = Image.open(image_path)
    
    # Preprocess the image and text (prompt)
    inputs = processor(text=authority_figures, images=image, return_tensors="pt", padding=True)
    
    # Generate embeddings
    with torch.no_grad():
        outputs = model(**inputs)
        
    image_embeddings = outputs.image_embeds  # Embedding for the image
    text_embeddings = outputs.text_embeds    # Embeddings for the text prompts
    
    return image_embeddings, text_embeddings

# Function to compute cosine similarity between image and text embeddings
from torch.nn.functional import cosine_similarity

def find_matching_prompts(image_embeddings, text_embeddings):
    # Calculate cosine similarities between image and text embeddings
    similarities = cosine_similarity(image_embeddings, text_embeddings)
    
    # Get the best match (the text with highest similarity)
    best_match_idx = similarities.argmax().item()  # Use .item() to convert to scalar
    best_similarity = similarities[best_match_idx].item()  # Access the similarity directly without indexing [0]
    
    return authority_figures[best_match_idx], best_similarity

# Process all images and save matches or "no authority" images
matches = []

for image_file in image_files:
    image_path = os.path.join(image_folder, image_file)
    
    # Get embeddings for the image and the text prompts
    image_embeddings, text_embeddings = get_image_embeddings(image_path)
    
    # Find the best matching authority figure
    best_match, similarity = find_matching_prompts(image_embeddings, text_embeddings)
    
    # If the similarity is above threshold, save it to the "classified_authority" folder
    if similarity > 0.25:  # You can adjust this threshold based on your preference
        classified_image_path = os.path.join(classified_folder, image_file)
        shutil.copy(image_path, classified_image_path)  # Copy image to classified folder
        matches.append((image_file, best_match, similarity))
    else:
        # If no authority figure is detected, save it to the "no_authority" folder
        no_authority_image_path = os.path.join(no_authority_folder, image_file)
        shutil.copy(image_path, no_authority_image_path)  # Copy image to "no_authority" folder

# Output matches
for match in matches:
    print(f"Image: {match[0]} | Matched Authority: {match[1]} | Similarity: {match[2]:.2f}")

print(f"\nTotal classified images: {len(matches)}")
print(f"Images with no authority detected: {len(image_files) - len(matches)}")

Image: FI_eGqhVIAM2x9Y.jpg | Matched Authority: Norwegian politician | Similarity: 0.26
Image: FIwO-xuXIAAYbbR.jpg | Matched Authority: Norwegian politician | Similarity: 0.30
Image: FIvnj5sWQAsU0AJ.jpg | Matched Authority: Norwegian politician | Similarity: 0.30
Image: FIk5v3tWUAAFOAB.jpg | Matched Authority: Norwegian politician | Similarity: 0.25
Image: FH5JjL_X0AAsIYH.jpg | Matched Authority: Norwegian politician | Similarity: 0.25
Image: FHtLz9tWUAUN4zx.jpg | Matched Authority: Norwegian politician | Similarity: 0.27
Image: FHRylriWQAQ15Cg.png | Matched Authority: Norwegian politician | Similarity: 0.28
Image: FHHto0vXwAE-p29.jpg | Matched Authority: Norwegian politician | Similarity: 0.27
Image: FG58QYAWYAAB_ai.jpg | Matched Authority: Norwegian politician | Similarity: 0.25
Image: FGwD7bPWQAYpFWQ.jpg | Matched Authority: Norwegian politician | Similarity: 0.29
Image: FGfJ7jpXsAI-_an.jpg | Matched Authority: Norwegian politician | Similarity: 0.27
Image: FGZ9X2OXIAEfQcg.jpg | Mat