## Fine tuning Object Detection model

In [None]:
import torch

# Load pretrained model weights (for example, object detection model)
object_model = torch.load("model/object.params")
scene_model = torch.load("model/scene.params")
sentiment_model = torch.load("model/Sentiment.params")

# If these models are in a specific architecture like CNN, you can modify them as needed
object_model.eval()
scene_model.eval()
sentiment_model.eval()

# Example for image feature extraction
def extract_features(image):
    # Assuming the image is preprocessed and loaded as a PyTorch tensor
    object_features = object_model(image)  # Get object features
    scene_features = scene_model(image)    # Get scene features
    sentiment_features = sentiment_model(image)  # Get sentiment features

    return object_features, scene_features, sentiment_features

  object_model = torch.load("model/object.params")


UnpicklingError: invalid load key, '\x12'.

In [None]:
from transformers import AutoModel, AutoFeatureExtractor
from PIL import Image
import torch

# Load a pretrained ResNet or other model for image classification
model_name = "google/vit-base-patch16-224-in21k"  # You can replace this with other models like ResNet
model = AutoModel.from_pretrained(model_name)
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)

# Load your image
image = Image.open("../images/00000000.jpg")
# Preprocess the image for the model
inputs = feature_extractor(images=image, return_tensors="pt")

# Get the feature embeddings from the model
with torch.no_grad():
    features = model.get_input_embeddings()(inputs['pixel_values'])



In [None]:
from PIL import Image
import torch
from transformers import CLIPProcessor, CLIPModel
import requests

def get_conceptnet_objects(query, limit=50):
    url = f"http://api.conceptnet.io/c/en/{query}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return [edge['end']['label'] for edge in data['edges'][:limit]]
    return []


# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")

# Load an image
image_path = "../images/00000010_(3).jpg"
image = Image.open(image_path)

# Example usage
possible_objects = get_conceptnet_objects("landscape") + get_conceptnet_objects("nature")


# Preprocess the image and text
inputs = processor(text=possible_objects, images=image, return_tensors="pt", padding=True)

# Get image and text features
with torch.no_grad():
    outputs = model(**inputs)

# Compute cosine similarities between the image and each text label (object)
image_features = outputs.image_embeds
text_features = outputs.text_embeds

# Normalize the features
image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)
text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)

# Compute similarity (cosine similarity)
similarities = (image_features @ text_features.T).squeeze(0)

# Get the top N most similar objects
top_k = 2

# Sort the similarities in descending order and get the top k indices
top_k_indices = similarities.topk(top_k).indices

# Print the top N objects
top_objects = [possible_objects[i] for i in top_k_indices]
print(f"Objects detected in the image: {top_objects}")


Objects detected in the image: ['nature', 'nature']
