In [None]:
import torch
import numpy as np
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from sklearn.metrics.pairwise import cosine_similarity
import os

# Load pre-trained CLIP model and processor
model_name = "openai/clip-vit-base-patch16"
model = CLIPModel.from_pretrained(model_name)
processor = CLIPProcessor.from_pretrained(model_name)

# Function to generate and save embeddings for the dataset using CLIP
def generate_and_save_embeddings_with_clip(image_dir, save_path, class_names):
    embeddings = {}
    labels = {}  # Dictionary to store the class labels corresponding to embeddings
    
    # Generate text embeddings for the classes (e.g., 'drone', 'bird')
    text_inputs = processor(text=class_names, return_tensors="pt", padding=True)
    text_embeddings = model.get_text_features(**text_inputs)
    
    for label in os.listdir(image_dir):  # Loop through each class folder (e.g., Bird, Drone)
        label_dir = os.path.join(image_dir, label)
        if os.path.isdir(label_dir):
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                img = Image.open(img_path).convert("RGB")
                img = img.resize((224, 224))  # Resize image to match CLIP's input size
                img_array = np.array(img) / 255.0  # Normalize the image to [0, 1]
                img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

                # Extract image embedding using CLIP
                image_input = processor(images=img, return_tensors="pt")
                image_embedding = model.get_image_features(**image_input)

                # Normalize the image embedding
                image_embedding = image_embedding / image_embedding.norm(p=2, dim=-1, keepdim=True)

                # Store the embedding and label (class) for each image
                embeddings[img_name] = image_embedding.detach().numpy().flatten()
                labels[img_name] = label  # Store the label (class) for the image

    # Save the embeddings and labels to files
    np.save(save_path + '_embeddings.npy', embeddings)
    np.save(save_path + '_labels6.npy', labels)

# Function to load the embeddings and labels from the saved files
def load_embeddings_and_labels(file_path):
    embeddings = np.load(file_path + '_embeddings.npy', allow_pickle=True).item()
    labels = np.load(file_path + '_labels.npy', allow_pickle=True).item()
    return embeddings, labels

# Function to classify an image using Zero-Shot Learning (ZSL) with CLIP
def classify_using_zsl(features, embeddings, labels, threshold=0.7):
    similarities = {}
    for img_name, embedding in embeddings.items():
        similarity = cosine_similarity([features], [embedding])[0][0]
        similarities[img_name] = similarity
    
    # Find the image with the highest similarity score
    best_match = max(similarities, key=similarities.get)
    max_similarity = similarities[best_match]

    # If the similarity score is above the threshold, classify it as the best match; otherwise, return "Unknown"
    if max_similarity >= threshold:
        predicted_label = labels[best_match]
    else:
        predicted_label = "Unknown"  # If similarity is below threshold, return 'Unknown'
    
    return predicted_label

# Path to save embeddings (updated)
save_path = r'D:\capstone\Project\Clip_image_embeddings'  # Path to save the embeddings and labels file

# Define class names (this should match your dataset's classes)
class_names = ["drone", "bird"]  # Extend as per your dataset

# Generate and save embeddings for the dataset using CLIP
image_dir = r"D:\capstone\imageDataset\Dataset"  # Path to your dataset folder
generate_and_save_embeddings_with_clip(image_dir, save_path, class_names)

# Load the embeddings and labels from the saved file
embeddings, labels = load_embeddings_and_labels(save_path)

# Now, let's classify a new image
img_path = r"D:\capstone\imageDataset\split_dataset\train\drone\UAVS_618.jpg"  # Replace with actual image path
img = Image.open(img_path).convert("RGB")
img = img.resize((224, 224))  # Resize image to match CLIP's input size

# Extract image features using CLIP
image_input = processor(images=img, return_tensors="pt")
image_embedding = model.get_image_features(**image_input)

# Normalize the image embedding
image_embedding = image_embedding / image_embedding.norm(p=2, dim=-1, keepdim=True)

# Classify the image using the embeddings and labels
predicted_label = classify_using_zsl(image_embedding.detach().numpy().flatten(), embeddings, labels)

print(f"Predicted label for the new image (using ZSL with CLIP): {predicted_label}")


Predicted label for the new image (using ZSL with CLIP): drone


In [1]:
import os
import numpy as np
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Load pre-trained CLIP model and processor
model_name = "openai/clip-vit-base-patch16"
model = CLIPModel.from_pretrained(model_name)
processor = CLIPProcessor.from_pretrained(model_name)

# Function to generate and save embeddings for the training dataset
def generate_and_save_embeddings(image_dir, embeddings_path, labels_path):
    embeddings = {}
    labels = {}
    
    for label in os.listdir(image_dir):  # Loop through each class folder (e.g., 'drone', 'bird')
        label_dir = os.path.join(image_dir, label)
        if os.path.isdir(label_dir):
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                img = Image.open(img_path).convert("RGB")
                img_input = processor(images=img, return_tensors="pt")
                img_embedding = model.get_image_features(**img_input)
                img_embedding = img_embedding / img_embedding.norm(p=2, dim=-1, keepdim=True)
                
                # Store embedding and corresponding label
                embeddings[img_name] = img_embedding.detach().numpy().flatten()
                labels[img_name] = label  # Store class label
    
    # Save embeddings and labels as .npy files
    np.save(embeddings_path, embeddings)
    np.save(labels_path, labels)
    print(f"Embeddings and labels saved to {embeddings_path} and {labels_path}")

# Function to load embeddings and labels from saved .npy files
def load_embeddings_and_labels(embeddings_path, labels_path):
    embeddings = np.load(embeddings_path, allow_pickle=True).item()
    labels = np.load(labels_path, allow_pickle=True).item()
    return embeddings, labels

# Function to classify validation dataset using generated embeddings and labels
def classify_and_evaluate(valid_dir, train_embeddings, train_labels, threshold=0.2):
    y_true = []
    y_pred = []
    
    for label in os.listdir(valid_dir):  # Loop through validation class folders
        label_dir = os.path.join(valid_dir, label)
        if os.path.isdir(label_dir):
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                img = Image.open(img_path).convert("RGB")
                img_input = processor(images=img, return_tensors="pt")
                img_embedding = model.get_image_features(**img_input)
                img_embedding = img_embedding / img_embedding.norm(p=2, dim=-1, keepdim=True)
                
                similarities = cosine_similarity(
                    img_embedding.detach().numpy(), 
                    np.array(list(train_embeddings.values()))
                )
                
                best_match_idx = np.argmax(similarities)
                best_match_similarity = similarities[0][best_match_idx]
                
                if best_match_similarity >= threshold:
                    best_match_image_name = list(train_embeddings.keys())[best_match_idx]
                    predicted_label = train_labels[best_match_image_name]
                else:
                    predicted_label = "Unknown"
                
                y_true.append(label)
                y_pred.append(predicted_label)
    
    # Calculate metrics
    accuracy = accuracy_score(y_true, y_pred)
    conf_matrix = confusion_matrix(y_true, y_pred, labels=list(set(y_true + y_pred)))
    report = classification_report(y_true, y_pred)
    
    return accuracy, conf_matrix, report

# Paths for train and validation datasets
train_dir = r"D:\capstone\imageDataset\split_dataset\train"  # Path to training dataset
valid_dir = r"D:\capstone\imageDataset\split_dataset\valid"  # Path to validation dataset

# Paths to save embeddings and labels
embeddings_path = r"D:\capstone\Project\train_embeddings.npy"
labels_path = r"D:\capstone\Project\train_labels.npy"

# Step 1: Generate embeddings and labels for the training dataset
generate_and_save_embeddings(train_dir, embeddings_path, labels_path)

# Step 2: Load generated embeddings and labels
train_embeddings, train_labels = load_embeddings_and_labels(embeddings_path, labels_path)

# Step 3: Classify validation dataset and evaluate
accuracy, conf_matrix, report = classify_and_evaluate(valid_dir, train_embeddings, train_labels)

# Step 4: Print the evaluation metrics
print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(report)


  from .autonotebook import tqdm as notebook_tqdm


Embeddings and labels saved to D:\capstone\Project\train_embeddings.npy and D:\capstone\Project\train_labels.npy
Accuracy: 1.00
Confusion Matrix:
[[116   0]
 [  0  79]]
Classification Report:
              precision    recall  f1-score   support

        bird       1.00      1.00      1.00        79
       drone       1.00      1.00      1.00       116

    accuracy                           1.00       195
   macro avg       1.00      1.00      1.00       195
weighted avg       1.00      1.00      1.00       195

