# Emotion-Based Image Captioning using  Open Source Models from HuggingFace and Langchain.

In [1]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from diffusers import StableDiffusionPipeline

from transformers import AutoFeatureExtractor, AutoModelForImageClassification
from PIL import Image
import torch

In [2]:
# Load the image captioning model ( load model directly from Hugging Face)
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

In [3]:
# Load the emotion detection model (pre-trained emotion detection model from Hugging Face)
emotion_extractor = AutoFeatureExtractor.from_pretrained("microsoft/resnet-50")
emotion_model = AutoModelForImageClassification.from_pretrained("microsoft/resnet-50")




In [4]:
# Function to generate an emotional caption for an image

def generate_emotional_caption(image_path):
    image = Image.open(image_path)

    # Generate the base caption
    inputs = caption_processor(images=image, return_tensors="pt")
    outputs = caption_model.generate(**inputs)
    base_caption = caption_processor.decode(outputs[0], skip_special_tokens=True)

    # Detect emotion
    inputs = emotion_extractor(images=image, return_tensors="pt")
    with torch.no_grad():
        logits = emotion_model(**inputs).logits
    predicted_class = logits.argmax(-1).item()
    emotion = emotion_model.config.id2label[predicted_class]

    # Enhance the caption with emotion
    emotional_caption = f"{base_caption}. The scene evokes a sense of {emotion.lower()}."

    return emotional_caption



In [6]:
from transformers import ViTFeatureExtractor, ViTForImageClassification
from PIL import Image
import torch

In [7]:
# Example usage
image_path = "Image.jpg"
print(generate_emotional_caption(image_path))



a woman with long brown hair and red lipstick. The scene evokes a sense of lipstick, lip rouge.
