In [1]:
pip install ollama

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install spacy python -m spacy download en_core_web_sm


Usage:   
  /Users/snehshah/anaconda3/bin/python -m pip install [options] <requirement specifier> [package-index-options] ...
  /Users/snehshah/anaconda3/bin/python -m pip install [options] -r <requirements file> [package-index-options] ...
  /Users/snehshah/anaconda3/bin/python -m pip install [options] [-e] <vcs project url> ...
  /Users/snehshah/anaconda3/bin/python -m pip install [options] [-e] <local project path> ...
  /Users/snehshah/anaconda3/bin/python -m pip install [options] <archive url/path> ...

no such option: -m
Note: you may need to restart the kernel to use updated packages.


In [19]:
import base64
import os
import uuid
import random
from io import BytesIO
from PIL import Image
import numpy as np
from pymongo import MongoClient
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration
import spacy
import ollama

# Load spaCy model for Named Entity Recognition (NER) and noun phrase extraction
nlp = spacy.load("en_core_web_sm")

# Load BLIP model for Image Captioning
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Predefined list of moods that can be inferred from hand drawings
valid_moods = [
    "happy", "sad", "angry", "neutral", "surprised", 
    "confused", "excited", "calm"
]

# Define mood inference from dominant colors
def infer_mood_from_color(image):
    # Convert image to RGB and then to numpy array
    image_rgb = image.convert("RGB")
    image_np = np.array(image_rgb)
    
    # Calculate the average color of the image (simple approach)
    avg_color = np.mean(image_np, axis=(0, 1))  # Average color in RGB channels

    # Calculate the dominant color (R, G, B) dominance
    r, g, b = avg_color

    # Based on RGB value dominance, classify mood
    if r > g and r > b:
        return "happy"  # Red dominance, warm color (e.g., reds, oranges)
    elif g > r and g > b:
        return "calm"   # Green dominance, cool color (e.g., greens)
    elif b > r and b > g:
        return "neutral"  # Blue dominance, cool color (e.g., blues)
    else:
        return "neutral"  # Default if unclear

# Load Ollama model for mood detection
def detect_mood_from_caption(caption, image):
    # Create an Ollama prompt to detect mood
    ollama_prompt = f"""
    You are a mood detection model. Based on the following caption, classify the mood of the image.
    The mood should be one of the following options: {', '.join(valid_moods)}.

    Caption: "{caption}"

    Your output should only be one of these moods: happy, sad, angry, neutral, surprised, confused, excited, calm.
    """

    try:
        # Call Ollama's model
        response = ollama.chat(model="llama3:latest", messages=[{"role": "user", "content": ollama_prompt}])
        
        # Access the 'content' field from the response's 'message' object
        mood_description = response.message.content.strip()  # Get the detailed mood description
        
        # Check if mood is in the valid moods list
        detected_mood = "neutral"  # Default mood to "neutral"
        for mood in valid_moods:
            if mood.lower() in mood_description.lower():
                detected_mood = mood
                break
        
        if detected_mood == "neutral":
            print(f"Detected mood is neutral. Inferring from image color...")
            inferred_mood = infer_mood_from_color(image)
            print(f"Inferred Mood from color: {inferred_mood}")
            return inferred_mood

        print(f"Detected Mood: {detected_mood}")
        return detected_mood
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return "neutral"  # Return neutral if there's an error

def generate_caption_from_pil(image):
    inputs = processor(image, return_tensors="pt")
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

def extract_keywords_from_caption(caption):
    # Use spaCy to process the caption and extract keywords
    doc = nlp(caption)
    keywords = [chunk.text for chunk in doc.noun_chunks]  # Extracting noun phrases
    return list(set(keywords))  # Removing duplicates

# ---------- Encode & Decode Helpers ----------
def encode_image_to_base64(filepath):
    with open(filepath, 'rb') as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
    return encoded_string

def base64_to_image(base64_str):
    image_data = base64.b64decode(base64_str)
    return Image.open(BytesIO(image_data)).convert('RGB')

# ---------- MongoDB Insert ----------
def insert_image_into_db(user_id, image_base64, image_name, db_collection):
    image_pil = base64_to_image(image_base64)
    caption = generate_caption_from_pil(image_pil)
    print(f"Caption for image {image_name}: {caption}")

    # Extract keywords and mood from the caption
    keywords = extract_keywords_from_caption(caption)
    mood = detect_mood_from_caption(caption, image_pil)

    # Print keywords
    print(f"Keywords for image {image_name}: {keywords}")

    # Insert image, caption, keywords, and mood into the MongoDB document
    document = {
        'user_id': user_id,
        'image': image_base64,
        'image_name': image_name,
        'caption': caption,
        'keywords': keywords,
        'mood': mood
    }
    return db_collection.insert_one(document)

# ---------- Directory Processor ----------
def process_directory(directory, db_collection, max_images=200):
    image_paths = []
    for subdir, dirs, files in os.walk(directory):
        for file in sorted(files):
            if file.endswith(".png"):
                filepath = os.path.join(subdir, file)
                image_paths.append(filepath)
    
    random.shuffle(image_paths)
    image_paths = image_paths[:max_images]  

    for filepath in image_paths:
        folder_name = os.path.basename(os.path.dirname(filepath))
        image_base64 = encode_image_to_base64(filepath)
        user_id = str(uuid.uuid4())  
        image_name = f"{folder_name}_{os.path.splitext(os.path.basename(filepath))[0]}" 
        result = insert_image_into_db(user_id, image_base64, image_name, db_collection)
        print(f'Image {image_name} from {filepath} inserted with ID: {result.inserted_id}')

# ---------- Main Entry ----------
def main():
    client = MongoClient('mongodb+srv://dev:CkVPtXuiNweaYZ8t@doodle-dj.c3vk0.mongodb.net/doodle-dj-db')
    db = client['doodle-dj-db']  
    users = db['users'] 

    image_directory = 'png'  # Make sure this folder exists and contains your .png images

    process_directory(image_directory, users)

if __name__ == '__main__':
    main()


Caption for image speed-boat_15914: a boat with a line drawing on it
Detected mood is neutral. Inferring from image color...
Inferred Mood from color: neutral
Keywords for image speed-boat_15914: ['it', 'a line', 'a boat']
Image speed-boat_15914 from png/speed-boat/15914.png inserted with ID: 67fd9a33557e8365a53df207
Caption for image pizza_12754: a piece of pizza on a white background
Detected mood is neutral. Inferring from image color...
Inferred Mood from color: neutral
Keywords for image pizza_12754: ['a white background', 'a piece', 'pizza']
Image pizza_12754 from png/pizza/12754.png inserted with ID: 67fd9a44557e8365a53df208
Caption for image snail_15413: a line drawing of a man's body
Detected mood is neutral. Inferring from image color...
Inferred Mood from color: neutral
Keywords for image snail_15413: ['a line drawing', "a man's body"]
Image snail_15413 from png/snail/15413.png inserted with ID: 67fd9a63557e8365a53df209


KeyboardInterrupt: 