In [None]:
import os
import json
import logging
from pathlib import Path

import requests  
from PIL import Image
from tqdm import tqdm
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
OPENAI_API_KEY = ""  # use your own openai api 

SCENT_LABELS = [
    "muddy", "earthy", "woody", "grassy", "floral",
    "rotten", "bloody", "musty", "sulfuric", "burnt",
    "chemical", "metallic", "clean", "oily", "dusty",
    "damp", "smoky", "salty", "sweet", "arcane", "no_scent"
]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large", use_fast=True)
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
model.to(device)
model.eval()

def generate_caption(image_path: Path) -> str:
    """
    Generate a more detailed caption for an image using the BLIP model on GPU.
    """
    try:
        with Image.open(image_path) as img:
            img = img.convert("RGB")
        inputs = processor(img, return_tensors="pt").to(device)
        output_ids = model.generate(
            **inputs,
            max_length=500,
            min_length=30,
            # num_beams=3,
            # early_stopping=True
        )
        caption = processor.decode(output_ids[0], skip_special_tokens=True)
        return caption
    except Exception as e:
        logger.error(f"Error generating caption for {image_path}: {e}")
        return ""

def build_prompt(caption: str, label_list: list[str]) -> str:
    """
    Build a prompt for scent labeling based on the caption and predefined scent categories.
    """
    scent_list = ", ".join(label_list)
    prompt = (
        "You are an expert in immersive game design and olfactory simulation.\n"
        "Given a game scene description, predict the intensity of different scent categories that would be experienced in that scene.\n"
        f"Only use the following scent categories: [{scent_list}].\n\n"
        f"Scene: \"{caption}\"\n\n"
        "Return a valid and complete JSON object with each scent label as a key and a float from 0 to 1 as its value, where the sum of all values is 1.\n"
        "If the scene does not contain any scent-related information, set 'no_scent' to 1 and all other categories to 0.\n"
        "Do not include any additional text or markdown formatting."
    )
    return prompt

def extract_json(text: str) -> dict:
    """
    Attempt to extract a JSON object from the provided text.
    """
    try:
        return json.loads(text.strip())
    except json.JSONDecodeError:
        first_brace = text.find('{')
        last_brace = text.rfind('}')
        if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
            json_str = text[first_brace:last_brace + 1]
            try:
                return json.loads(json_str)
            except json.JSONDecodeError as e:
                logger.error(f"Failed to extract JSON: {e}. Raw output: {text}")
                return {}
        else:
            logger.error(f"Could not find JSON in output. Raw output: {text}")
            return {}

def get_scent_labels(caption: str, label_list: list[str]) -> dict:
    """
    Call the GPT model via the Chat Completions API to generate a scent label distribution for a given caption.
    """
    prompt = build_prompt(caption, label_list)
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {OPENAI_API_KEY}"
    }
    data = {
        "model": "gpt-3.5-turbo",
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.3,
        "max_tokens": 250, 
    }
    try:
        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=data)
        response.raise_for_status()
        response_json = response.json()
        output = response_json["choices"][0]["message"]["content"]
        if "```" in output:
            output = output.split("```")[-1]
        scent_profile = extract_json(output)
        return scent_profile
    except Exception as e:
        logger.error(f"Error processing GPT response: {e}")
        return {}

def process_images(image_folder: Path, output_file: Path) -> None:
    """
    Process all images in the given folder:
      - Generate captions using BLIP.
      - Label scents using the GPT API.
      - Save the results as a JSON file.
    """
    results = {}
    image_files = [file for file in image_folder.iterdir() if file.suffix.lower() in {".jpg", ".jpeg", ".png",".webp"}]
    
    for image_file in tqdm(image_files, desc="Processing images"):
        caption = generate_caption(image_file)
        if not caption:
            continue  

        scent_profile = get_scent_labels(caption, SCENT_LABELS)
        if scent_profile:
            results[image_file.name] = {
                "caption": caption,
                "scent_distribution": scent_profile
            }

    with output_file.open("w", encoding="utf-8") as f:
        json.dump(results, f, indent=4)
    logger.info(f"Captioning and labeling complete! Saved to {output_file}")

def main():
    image_folder = Path(r"C:\Users\crazycyt\Desktop\1470\filtered_dataset2")
    output_file = Path("scent_labels_with_captions2.json")
    
    if not image_folder.exists():
        logger.error(f"Image folder {image_folder} does not exist.")
        return

    process_images(image_folder, output_file)

if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm


cuda


Processing images: 100%|█████████████████████████████████████████████████████████| 1289/1289 [2:14:02<00:00,  6.24s/it]
INFO:__main__:Captioning and labeling complete! Saved to scent_labels_with_captions2.json
