In [1]:
import os
import re
import torch
import glob
import random

from PIL import Image, ImageOps
from transformers import pipeline


from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

from huggingface_hub import hf_hub_download

from tqdm.auto import tqdm


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from diffusers import (
    StableDiffusionPipeline,
    StableDiffusionImg2ImgPipeline,
    DPMSolverMultistepScheduler,
    EulerAncestralDiscreteScheduler
)


In [3]:

# --- Configuration ---
# Default reference image directory
DEFAULT_REFERENCE_IMAGES_DIR = r"D:\college\imp-doc\sem6\GENAI\project\new\all_images"

# Model configurations
SD_MODEL_ID = "SG161222/Realistic_Vision_V5.1_noVAE"  # Known for photorealism
FALLBACK_MODEL_ID = "stabilityai/stable-diffusion-2-1"  # Original model as fallback

# Load required models
def load_models(device="cuda", use_img2img=True):
    """Loads the text processing and image generation models."""
    print("Loading models...")
    # Load text processing model for enhancing architectural descriptions
    text_processor = pipeline("text-generation", model="gpt2-large")
    
    # Image generation models
    models = {"text_processor": text_processor}
    
    try:
        print(f"Loading SD model: {SD_MODEL_ID}")
        torch_dtype = torch.float16 if device == "cuda" else torch.float32
        
        # Load base text-to-image model
        text_to_image = StableDiffusionPipeline.from_pretrained(
            SD_MODEL_ID,
            torch_dtype=torch_dtype,
            safety_checker=None
        )
        
        # Use DPMSolver for faster, quality sampling
        text_to_image.scheduler = DPMSolverMultistepScheduler.from_config(
            text_to_image.scheduler.config,
            algorithm_type="dpmsolver++",
            use_karras_sigmas=True
        )
        
        text_to_image = text_to_image.to(device)
        models["text_to_image"] = text_to_image
        
        # Load image-to-image model if requested
        if use_img2img:
            print("Loading img2img model...")
            img2img = StableDiffusionImg2ImgPipeline(
                vae=text_to_image.vae,
                text_encoder=text_to_image.text_encoder,
                tokenizer=text_to_image.tokenizer,
                unet=text_to_image.unet,
                scheduler=text_to_image.scheduler,
                safety_checker=None,
                feature_extractor=None,
                requires_safety_checker=False
            )
            img2img = img2img.to(device)
            models["img2img"] = img2img
            
        print("Models loaded successfully")
    except Exception as e:
        print(f"Error loading primary model: {e}")
        print(f"Falling back to {FALLBACK_MODEL_ID}")
        
        # Fall back to original model
        text_to_image = StableDiffusionPipeline.from_pretrained(
            FALLBACK_MODEL_ID,
            torch_dtype=torch_dtype,
            safety_checker=None
        )
        text_to_image.scheduler = EulerAncestralDiscreteScheduler.from_config(
            text_to_image.scheduler.config
        )
        text_to_image = text_to_image.to(device)
        models["text_to_image"] = text_to_image
        
        if use_img2img:
            img2img = StableDiffusionImg2ImgPipeline(
                vae=text_to_image.vae,
                text_encoder=text_to_image.text_encoder,
                tokenizer=text_to_image.tokenizer,
                unet=text_to_image.unet,
                scheduler=text_to_image.scheduler,
                safety_checker=None,
                feature_extractor=None,
                requires_safety_checker=False
            )
            img2img = img2img.to(device)
            models["img2img"] = img2img
    
    return models

# Utility function to load reference images
def load_reference_images(directory=DEFAULT_REFERENCE_IMAGES_DIR, limit=None):
    """
    Load reference images from the specified directory.
    
    Args:
        directory: Path to reference images
        limit: Maximum number of images to load (None for all)
    
    Returns:
        List of PIL Image objects
    """
    print(f"Loading reference images from {directory}...")
    image_paths = []
    
    # Get all image files with common extensions
    for ext in ['*.jpg', '*.jpeg', '*.png']:
        image_paths.extend(glob.glob(os.path.join(directory, ext)))
    
    if not image_paths:
        raise ValueError(f"No images found in {directory}")
    
    print(f"Found {len(image_paths)} images")
    
    # Limit the number of images if specified
    if limit and len(image_paths) > limit:
        image_paths = random.sample(image_paths, limit)
        print(f"Randomly selected {limit} images")
    
    # Load images
    images = []
    for path in tqdm(image_paths, desc="Loading images"):
        try:
            img = Image.open(path).convert("RGB")
            images.append({"path": path, "image": img})
        except Exception as e:
            print(f"Error loading {path}: {e}")
    
    print(f"Successfully loaded {len(images)} reference images")
    return images

# Preprocess reference image for the model
def preprocess_reference_image(image, target_size=(768, 768)):
    """
    Preprocess a reference image for use with img2img.
    
    Args:
        image: PIL Image
        target_size: Target size as (width, height)
    
    Returns:
        Preprocessed PIL Image
    """
    # Resize while maintaining aspect ratio
    image = ImageOps.contain(image, target_size)
    
    # Create a blank canvas of the target size
    canvas = Image.new('RGB', target_size, (255, 255, 255))
    
    # Paste the resized image centered on the canvas
    offset = ((target_size[0] - image.width) // 2, 
              (target_size[1] - image.height) // 2)
    canvas.paste(image, offset)
    
    return canvas

# Extract architectural details from Q&A pairs
def extract_architectural_details(qa_text):
    """
    Parse Q&A text to extract key architectural details
    that can be used in image generation prompts
    """
    # Extract answer portion from Q&A text
    answer_match = re.search(r'Answer: (.*?)(?=$|\n\n)', qa_text, re.DOTALL)
    if not answer_match:
        return None
        
    answer_text = answer_match.group(1).strip()
    
    # Extract key visual elements using simple heuristics
    # Look for sentences with visual descriptors
    visual_sentences = []
    
    # Keywords that suggest visual elements - expanded list
    visual_keywords = [
        'feature', 'design', 'decorated', 'carved', 'ornate', 'structure', 
        'shape', 'pattern', 'motif', 'pillar', 'column', 'arch', 'dome',
        'appearance', 'visible', 'style', 'height', 'proportion', 'material',
        'stone', 'granite', 'sandstone', 'marble', 'sculpture', 'statue',
        'relief', 'facade', 'exterior', 'interior', 'wall', 'ceiling',
        'floor', 'entrance', 'gateway', 'courtyard', 'hall', 'chamber',
        'shrine', 'sanctum', 'tower', 'spire', 'roof', 'terrace', 'platform',
        'steps', 'stairway', 'balcony', 'window', 'door', 'panel',
        'frieze', 'cornice', 'capital', 'base', 'plinth', 'pedestal'
    ]
    
    # Extract sentences that contain visual elements
    sentences = re.split(r'(?<=[.!?])\s+', answer_text)
    for sentence in sentences:
        if any(keyword in sentence.lower() for keyword in visual_keywords):
            visual_sentences.append(sentence)
    
    # Join the visual elements together
    if visual_sentences:
        return " ".join(visual_sentences)
    else:
        # If no specific visual elements found, use the first 3 sentences
        return " ".join(sentences[:min(3, len(sentences))])

# Format architectural details into optimized image generation prompts
def format_image_prompt(architectural_details, monument_name=None):
    """
    Create a well-structured prompt for image generation
    that emphasizes realism and architectural accuracy
    """
    # Hampi-specific architectural components
    hampi_components = [
        "Vijayanagara Empire architecture",
        "16th century Hindu temple",
        "ancient granite stonework",
        "intricate stone carvings",
        "archaeological UNESCO World Heritage site",
        "South Indian architecture",
        "Dravidian architectural style",
        "monolithic stone sculptures",
        "historic ruins",
        "ancient temple complex"
    ]
    
    # Photography style components for realism
    photo_style = [
        "professional photography",
        "8k resolution",
        "photorealistic",
        "golden hour lighting",
        "clear details",
        "architectural photography",
        "detailed texture",
        "sharp focus",
        "wide angle lens",
        "HDR photography",
        "documentary style",
        "National Geographic quality"
    ]
    
    # Start with base prompt parts
    prompt_parts = []
    
    # Add monument name if provided
    if monument_name:
        prompt_parts.append(f"The {monument_name} at Hampi, India")
    else:
        prompt_parts.append("A monument at Hampi, India")
    
    # Add architectural details
    prompt_parts.append(architectural_details)
    
    # Add 3-4 random Hampi components
    prompt_parts.extend(random.sample(hampi_components, k=min(4, len(hampi_components))))
    
    # Add 3-4 random photo style components
    prompt_parts.extend(random.sample(photo_style, k=min(4, len(photo_style))))
    
    # Combine into final prompt
    final_prompt = ", ".join(prompt_parts)
    
    # Enhanced negative prompt
    negative_prompt = "cartoon, painting, illustration, 3d render, sketch, drawing, anime, blur, grainy, text, watermark, signature, low quality, deformed, disfigured, distorted architecture, unrealistic, fantasy elements, artificial, digital art style, oversaturated, low resolution"
    
    return final_prompt, negative_prompt

# Generate images from reference images
def generate_from_reference(
    models, 
    prompt, 
    reference_image, 
    negative_prompt=None,
    strength=0.7,
    guidance_scale=7.5,
    num_inference_steps=50,
    seed=None
):
    """
    Generate an image based on a reference image and prompt.
    
    Args:
        models: Dictionary of loaded models
        prompt: Text prompt for generation
        reference_image: PIL Image to use as reference
        negative_prompt: Negative prompt
        strength: Strength parameter for img2img (how much to transform)
        guidance_scale: Guidance scale for generation
        num_inference_steps: Number of inference steps
        seed: Random seed for reproducibility
    
    Returns:
        Generated PIL Image
    """
    img2img = models.get("img2img")
    if img2img is None:
        raise ValueError("Image-to-image model not loaded")
    
    # Set seed for reproducibility if provided
    if seed is not None:
        generator = torch.Generator(device=img2img.device).manual_seed(seed)
    else:
        generator = None
    
    # Generate image
    result = img2img(
        prompt=prompt,
        image=reference_image,
        negative_prompt=negative_prompt,
        strength=strength,  # How much to change the reference (0-1)
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        generator=generator
    )
    
    return result.images[0]

# Generate images based on architectural descriptions and reference images
def generate_images(prompt, negative_prompt, models, num_images=1, output_dir="hampi_images", 
                   reference_dir=DEFAULT_REFERENCE_IMAGES_DIR, reference_count=5, monument_name=None):
    """
    Generate realistic images of Hampi architecture based on prompts and reference images
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Load reference images
    reference_images = load_reference_images(reference_dir, limit=reference_count)
    if not reference_images:
        print(f"Warning: No reference images found in {reference_dir}. Falling back to direct generation.")
        # Fallback to direct generation if no reference images are available
        # This would use the original code's approach
        images = models["text_to_image"](
            prompt=prompt,
            negative_prompt=negative_prompt,
            num_images_per_prompt=num_images,
            num_inference_steps=50,
            guidance_scale=7.5
        ).images
        
        # Save images
        image_paths = []
        for i, image in enumerate(images):
            short_desc = prompt.split('.')[0][:30].replace(" ", "_")
            filename = f"{short_desc}_{i}.png"
            save_path = os.path.join(output_dir, filename)
            image.save(save_path)
            image_paths.append(save_path)
            print(f"Saved image to {save_path}")
        
        return image_paths
    
    # Generate images using references
    image_paths = []
    for i in range(num_images):
        # Select a random reference image
        ref_image_data = random.choice(reference_images)
        ref_image = ref_image_data["image"]
        ref_path = ref_image_data["path"]
        
        print(f"Using reference image: {os.path.basename(ref_path)}")
        
        # Preprocess the reference image
        processed_ref = preprocess_reference_image(ref_image)
        
        # Generate with a unique seed for variety
        seed = random.randint(0, 2147483647)
        strength = random.uniform(0.6, 0.85)  # Vary strength for diversity
        
        try:
            # Generate the image
            generated_img = generate_from_reference(
                models=models,
                prompt=prompt,
                reference_image=processed_ref,
                negative_prompt=negative_prompt,
                strength=strength,
                guidance_scale=7.5,
                num_inference_steps=50,
                seed=seed
            )
            
            # Create filename based on shortened prompt and monument
            mon_text = monument_name or "hampi"
            filename = f"{mon_text.replace(' ', '_')}_{i+1}_seed{seed}.png"
            save_path = os.path.join(output_dir, filename)
            
            # Save the image
            generated_img.save(save_path)
            image_paths.append(save_path)
            print(f"Saved image to {save_path}")
            
            # Save a comparison image
            comparison = Image.new('RGB', (processed_ref.width * 2, processed_ref.height))
            comparison.paste(processed_ref, (0, 0))
            comparison.paste(generated_img, (processed_ref.width, 0))
            
            comparison_path = os.path.join(output_dir, f"{mon_text.replace(' ', '_')}_{i+1}_comparison.png")
            comparison.save(comparison_path)
            print(f"Saved comparison to {comparison_path}")
            
        except Exception as e:
            print(f"Error generating image {i+1}: {e}")
    
    return image_paths

# Extract Q&A pairs from vector store or text file
def get_qa_pairs(source_type="file", file_path="Hampi_Architecture_QA.txt", vector_store_path=None, query=None):
    """
    Extract Q&A pairs from either:
    1. Text file generated by store.ipynb
    2. Vector store created by store.ipynb
    """
    qa_pairs = []
    
    if source_type == "file":
        # Parse the text file format
        if os.path.exists(file_path):
            with open(file_path, "r", encoding="utf-8") as f:
                content = f.read()
            
            # Extract Q&A blocks
            blocks = re.split(r'-{40,}', content)
            for block in blocks:
                if not block.strip():
                    continue
                
                # Extract question and answer
                q_match = re.search(r'Q\d+: (.*?)(?=\n|$)', block)
                a_match = re.search(r'A\d+: (.*?)(?=\n|$)', block, re.DOTALL)
                
                if q_match and a_match:
                    qa_pairs.append({
                        "question": q_match.group(1).strip(),
                        "answer": a_match.group(1).strip(),
                    })
        else:
            print(f"File not found: {file_path}")
    
    elif source_type == "vector_store" and vector_store_path and query:
        # Load the vector store
        embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        vector_store = Chroma(persist_directory=vector_store_path, embedding_function=embedding_model)
        
        # Search for relevant Q&A pairs
        results = vector_store.similarity_search(query, k=5)
        
        for doc in results:
            # Extract question and answer from the document content
            content = doc.page_content
            q_match = re.search(r'Question: (.*?)(?=\nAnswer:|$)', content)
            a_match = re.search(r'Answer: (.*?)(?=$)', content, re.DOTALL)
            
            if q_match and a_match:
                qa_pairs.append({
                    "question": q_match.group(1).strip(),
                    "answer": a_match.group(1).strip(),
                })
    
    return qa_pairs

# Main pipeline function
def qa_to_images_pipeline(source_type="file", file_path="Hampi_Architecture_QA.txt", 
                         vector_store_path="./chroma_architecture_qa_db", query=None,
                         num_images=1, output_dir="hampi_images", monument_filters=None,
                         reference_dir=DEFAULT_REFERENCE_IMAGES_DIR, reference_count=5,
                         device="cuda"):
    """
    Complete pipeline to convert architectural Q&A data to realistic images
    using reference-based image generation
    
    Args:
        source_type: 'file' or 'vector_store'
        file_path: Path to the QA text file (if source_type is 'file')
        vector_store_path: Path to the vector store (if source_type is 'vector_store')
        query: Search query for the vector store (if source_type is 'vector_store')
        num_images: Number of images to generate per QA pair
        output_dir: Directory to save the generated images
        monument_filters: List of specific monuments to include (e.g., ['Vitthala Temple'])
        reference_dir: Directory containing reference images
        reference_count: Number of reference images to use
        device: Device to use for generation
    """
    print("Loading models...")
    models = load_models(device=device)
    
    print("Retrieving Q&A pairs...")
    qa_pairs = get_qa_pairs(source_type, file_path, vector_store_path, query)
    
    if not qa_pairs:
        print("No Q&A pairs found!")
        return []
    
    print(f"Found {len(qa_pairs)} Q&A pairs.")
    
    # Filter by monument if specified
    if monument_filters:
        filtered_pairs = []
        for pair in qa_pairs:
            if any(monument.lower() in pair["question"].lower() for monument in monument_filters):
                filtered_pairs.append(pair)
        qa_pairs = filtered_pairs
        print(f"Filtered to {len(qa_pairs)} Q&A pairs related to specified monuments.")
    
    generated_image_paths = []
    
    for i, pair in enumerate(qa_pairs):
        print(f"\nProcessing Q&A pair {i+1}/{len(qa_pairs)}")
        print(f"Question: {pair['question']}")
        
        # Extract monument name from question if possible
        monument_match = re.search(r'(Vitthala|Virupaksha|Krishna|Hazara Rama|Lotus Mahal|Elephant Stables)', pair['question'])
        monument_name = monument_match.group(1) if monument_match else None
        
        # Extract architectural details
        full_qa_text = f"Question: {pair['question']}\nAnswer: {pair['answer']}"
        architectural_details = extract_architectural_details(full_qa_text)
        
        if not architectural_details:
            print("Could not extract architectural details, skipping...")
            continue
        
        print(f"Extracted details: {architectural_details[:100]}...")
        
        # Format image generation prompt
        prompt, negative_prompt = format_image_prompt(architectural_details, monument_name)
        print(f"Generated prompt: {prompt[:100]}...")
        
        # Generate images using reference-based approach
        image_paths = generate_images(
            prompt=prompt, 
            negative_prompt=negative_prompt, 
            models=models, 
            num_images=num_images, 
            output_dir=output_dir,
            reference_dir=reference_dir,
            reference_count=reference_count,
            monument_name=monument_name
        )
        
        generated_image_paths.extend(image_paths)
    
    print(f"\nGeneration complete. Created {len(generated_image_paths)} images in {output_dir}.")
    return generated_image_paths



In [4]:
import os
import time
import uuid
import re
import random # Added import for random.choice
# Updated imports for LangChain components
from langchain_community.vectorstores import Chroma # Changed from langchain.vectorstores
from langchain_huggingface import HuggingFaceEmbeddings # Changed from langchain_community.embeddings
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.schema import HumanMessage, SystemMessage
import traceback # Add traceback for detailed error printing

# --- Configuration ---
# Define default paths (can be overridden by arguments if needed)
DEFAULT_SOURCE_DB_PATH = r"D:\college\imp-doc\sem6\GENAI\project\3D-Reconstruction-of-Monuments\chroma1_db" # Assuming this is the source text DB
DEFAULT_ARCH_QA_DB_PATH = r"D:\college\imp-doc\sem6\GENAI\project\3D-Reconstruction-of-Monuments\chroma_architecture_qa_db"
DEFAULT_QA_TXT_FILE = r"C:\Users\Rishi S Etagi\Downloads\Hampi Architectural Q&A.txt"
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL = "llama3-8b-8192"
# Ensure you have your Groq API key set as an environment variable or replace the placeholder
GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "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") # Replace with your key if not using env var

# List of architectural topics (from store.ipynb)
hampi_architectural_topics = [
    "Vijayanagara architectural style", "Temple architecture", "Gopurams",
    "Mandapas (pillared halls)", "Pillars (musical pillars, ornate pillars)",
    "Islamic influences on Vijayanagara architecture", "Royal Enclosure structures",
    "Water structures (stepwells, tanks, aqueducts)", "Fortifications and gateways",
    "Bas-reliefs and carvings", "Materials used in construction (granite)",
    "Comparison with other South Indian styles (Dravidian, Chalukya)",
    "Specific monument features (e.g., Stone Chariot, Lotus Mahal design)"
]

# --- Helper Functions ---

def initialize_components(source_db_path=DEFAULT_SOURCE_DB_PATH, arch_qa_db_path=DEFAULT_ARCH_QA_DB_PATH):
    """Initializes embedding model, LLMs, and vector stores."""
    print("Initializing models and vector stores...")
    embedding_model, llm, question_generator_llm, source_vectorstore, architecture_qa_vectorstore, qa_chain = None, None, None, None, None, None # Initialize to None
    try:
        print("--> Initializing Embedding Model...")
        embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
        print("    Embedding Model Initialized.")

        # Source vector store (for context)
        print(f"--> Attempting to load source vector store from: {os.path.abspath(source_db_path)}")
        if not os.path.exists(source_db_path):
            print(f"    Error: Source DB directory not found: {os.path.abspath(source_db_path)}")
            raise FileNotFoundError(f"Directory not found: {source_db_path}")
        source_vectorstore = Chroma(persist_directory=source_db_path, embedding_function=embedding_model)
        print("    Source vector store loaded.")

        # Architecture Q&A vector store (for storing generated pairs)
        print(f"--> Attempting to load/create architecture Q&A vector store at: {os.path.abspath(arch_qa_db_path)}")
        architecture_qa_vectorstore = Chroma(persist_directory=arch_qa_db_path, embedding_function=embedding_model)
        print("    Architecture Q&A vector store loaded/initialized.")

        # LLMs
        print("--> Initializing LLMs...")
        # Define the placeholder key used as default
        placeholder_key = "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"
        # Modify the check: Error if key is missing OR if it's exactly the placeholder
        if not GROQ_API_KEY or GROQ_API_KEY == placeholder_key:
             print("    Error: GROQ_API_KEY is missing or is the default placeholder.")
             raise ValueError("Invalid or missing Groq API Key provided. Ensure it's set via environment variable.")
        llm = ChatGroq(model=LLM_MODEL, groq_api_key=GROQ_API_KEY)
        question_generator_llm = ChatGroq(model=LLM_MODEL, groq_api_key=GROQ_API_KEY) # Separate instance if needed
        print("    LLMs initialized.")

        # RAG chain for answering questions based on source documents
        print("--> Initializing RAG chain...")
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            retriever=source_vectorstore.as_retriever()
        )
        print("    RAG chain initialized.")

        print("--> Initialization complete (try block finished).")
        # Explicitly return the initialized components here
        return embedding_model, llm, question_generator_llm, source_vectorstore, architecture_qa_vectorstore, qa_chain

    except Exception as e:
        # Print the specific error that occurred during initialization
        print(f"\n!!! Error during initialization: {e} !!!\n")
        # Print the full traceback to see where the error originated
        traceback.print_exc()
        print("\n!!! Please ensure ChromaDB directories exist and are valid, required models are accessible, and API keys are valid. !!!\n")
        # Return None for all components to signal failure
        return None, None, None, None, None, None

def generate_architectural_question(question_generator_llm, topic=None):
    """Generates a specific architectural question about Hampi."""
    if not question_generator_llm:
        return "Error: Question generator LLM not initialized."

    # Use random.choice if topic is not provided
    selected_topic = topic if topic else random.choice(hampi_architectural_topics)

    prompt = f"""Generate a specific, detailed question about the architectural features of Hampi, focusing on the topic: '{selected_topic}'.
    Examples:
    - What are the typical dimensions and decorative motifs found on the pillars of the Vitthala Temple's main mandapa?
    - Describe the construction techniques used for the corbelled arches seen in the Lotus Mahal.
    - How does the design of the gopuram at the Virupaksha Temple incorporate elements from earlier Dravidian styles?

    Focus specifically on construction methods, design elements, structural innovations, or artistic aspects of the architecture.
    The question should concentrate on architectural style only, not history or cultural significance.
    Provide ONLY the question with no additional text."""

    messages = [
        SystemMessage(content="You are an architectural historian specializing in Hampi and Vijayanagara architecture."),
        HumanMessage(content=prompt)
    ]

    try:
        response = question_generator_llm.invoke(messages)
        # Basic cleaning: remove potential quotes or prefixes
        question = response.content.strip().strip('"').strip("'")
        if not question.endswith("?"):
             question += "?" # Ensure it's a question
        return question
    except Exception as e:
        print(f"Error generating question: {e}")
        return f"Could you detail the architectural features of {selected_topic} in Hampi?" # Fallback

def save_qa_to_txt(qa_pairs, filename=DEFAULT_QA_TXT_FILE):
    """Saves generated Q&A pairs to a text file."""
    output_dir = os.path.dirname(filename)
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)

    try:
        with open(filename, "w", encoding="utf-8") as file:
            file.write("Hampi Architectural Q&A\n")
            file.write("=" * 40 + "\n\n")

            for i, qa in enumerate(qa_pairs):
                file.write(f"Q{i+1}: {qa['question']}\n")
                file.write(f"A{i+1}: {qa['answer']}\n")
                file.write("-" * 40 + "\n\n")

        print(f"Q&A pairs saved to {filename}")
    except Exception as e:
        print(f"Error saving Q&A to file {filename}: {e}")


# --- Main Functions for incorporated.py ---

def process_architectural_qa_batch_and_save(num_questions=10, arch_qa_db_path=DEFAULT_ARCH_QA_DB_PATH, qa_txt_file=DEFAULT_QA_TXT_FILE, topic=None):
    """Generates and stores architecture-focused Q&A pairs."""
    embedding_model, llm, question_generator_llm, source_vectorstore, architecture_qa_vectorstore, qa_chain = initialize_components(arch_qa_db_path=arch_qa_db_path)

    # --- Existing Debugging ---
    print("\n--- Debugging component values after initialization ---")
    print(f"  embedding_model: {type(embedding_model)}, Is None: {embedding_model is None}")
    print(f"  llm: {type(llm)}, Is None: {llm is None}")
    print(f"  question_generator_llm: {type(question_generator_llm)}, Is None: {question_generator_llm is None}")
    print(f"  source_vectorstore: {type(source_vectorstore)}, Is None: {source_vectorstore is None}")
    print(f"  architecture_qa_vectorstore: {type(architecture_qa_vectorstore)}, Is None: {architecture_qa_vectorstore is None}")
    print(f"  qa_chain: {type(qa_chain)}, Is None: {qa_chain is None}")
    # Keep the all() print for comparison, but don't use it for the check
    print(f"  Result of all([...]): {all([embedding_model, llm, question_generator_llm, source_vectorstore, architecture_qa_vectorstore, qa_chain])}")
    print("--- End Debugging ---\n")
    # --- End Existing Debugging ---

    # --- Modified Check: Check each component individually ---
    initialization_failed = False
    if embedding_model is None:
        print("Initialization Check Failed: embedding_model is None")
        initialization_failed = True
    if llm is None:
        print("Initialization Check Failed: llm is None")
        initialization_failed = True
    if question_generator_llm is None:
        print("Initialization Check Failed: question_generator_llm is None")
        initialization_failed = True
    if source_vectorstore is None:
        print("Initialization Check Failed: source_vectorstore is None")
        initialization_failed = True
    if architecture_qa_vectorstore is None:
        print("Initialization Check Failed: architecture_qa_vectorstore is None")
        initialization_failed = True
    if qa_chain is None:
        print("Initialization Check Failed: qa_chain is None")
        initialization_failed = True

    if initialization_failed:
        print("Aborting Q&A generation due to initialization failure (individual check).")
        return
    # --- End Modified Check ---

    # Original check (commented out)
    # if not all([embedding_model, llm, question_generator_llm, source_vectorstore, architecture_qa_vectorstore, qa_chain]):
    #     print("Aborting Q&A generation due to initialization failure.")
    #     return

    print(f"Generating {num_questions} synthetic architecture-focused Q&A pairs about Hampi...")
    qa_pairs = []

    for i in range(num_questions):
        print(f"\n--- Generating Pair {i+1}/{num_questions} ---")
        # 1. Generate Question
        question = generate_architectural_question(question_generator_llm, topic=topic)
        print(f"Generated Question: {question}")

        if "Error:" in question:
            continue # Skip if question generation failed

        # 2. Generate Answer using RAG
        try:
            print("Generating answer using RAG...")
            # Use invoke instead of deprecated run
            answer_result = qa_chain.invoke({"query": question})
            answer = answer_result.get("result", "Could not retrieve an answer.") # Adjust based on actual output structure
            print(f"Generated Answer: {answer[:150]}...")

            qa_pairs.append({"question": question, "answer": answer})

            # 3. Store in Architecture Vector Store
            qa_text = f"Question: {question}\nAnswer: {answer}"
            topic_keywords = [t for t in hampi_architectural_topics if any(word in question.lower() for word in t.lower().split())]
            metadata_topic = topic_keywords[0] if topic_keywords else "General Hampi architecture"

            architecture_qa_vectorstore.add_texts(
                [qa_text],
                metadatas=[{
                    "question": question,
                    "topic": metadata_topic,
                    "content_type": "architecture"
                }],
                ids=[str(uuid.uuid4())] # Ensure unique IDs
            )
            print(f"Stored Q&A pair in {arch_qa_db_path}")

            # Sleep briefly to avoid potential API rate limits
            time.sleep(1.5)

        except Exception as e:
            print(f"Error processing Q&A pair for question '{question}': {e}")
            time.sleep(1) # Wait a bit longer after an error

    # 4. Save results to TXT file
    save_qa_to_txt(qa_pairs, filename=qa_txt_file)

    # 5. Persist the vector store changes
    try:
        print("Persisting architecture Q&A vector store...")
        architecture_qa_vectorstore.persist() # Ensure data is saved
        print(f"Completed architectural Q&A generation. Stored {len(qa_pairs)} pairs in {arch_qa_db_path}")
    except Exception as e:
        print(f"Error persisting vector store {arch_qa_db_path}: {e}")


def search_architecture_qa_database(query, k=3, arch_qa_db_path=DEFAULT_ARCH_QA_DB_PATH):
    """Searches the architecture-specific Q&A vector store."""
    try:
        print(f"Searching architecture Q&A database at {arch_qa_db_path} for: '{query}'")
        embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
        vector_store = Chroma(persist_directory=arch_qa_db_path, embedding_function=embedding_model)
        results = vector_store.similarity_search(query, k=k)
        print(f"Found {len(results)} results.")
        return results
    except Exception as e:
        print(f"Error searching vector store {arch_qa_db_path}: {e}")
        return []

# Example of how to run directly (optional)
if __name__ == "__main__":
    print("Running store_utils.py directly for testing...")
    # Test Q&A generation
    # process_architectural_qa_batch_and_save(num_questions=2, qa_txt_file="test_hampi_qa.txt", arch_qa_db_path="./test_chroma_arch_qa")

    # Test search (assuming the DB was created and populated)
    # test_query = "Tell me about the pillars in Hampi"
    # search_results = search_architecture_qa_database(test_query, arch_qa_db_path="./test_chroma_arch_qa")
    # if search_results:
    #     for doc in search_results:
    #         print("\n-- Result --")
    #         print(doc.page_content)
    # else:
    #     print("No results found for test query.")
    pass

# --- Add this line for debugging ---
# print(f"DEBUG: GROQ_API_KEY from environment: {os.environ.get('GROQ_API_KEY')}")
# --- End of added line ---

# Existing line where ChatGroq is initialized (This line seems misplaced outside the function, consider removing it if it's redundant)
# llm = ChatGroq(model_name="mixtral-8x7b-32768", groq_api_key=os.environ.get("GROQ_API_KEY")) # Or similar

Running store_utils.py directly for testing...


In [5]:
import os
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from controlnet_aux import HEDdetector, MLSDdetector, PidiNetDetector, NormalBaeDetector, LineartDetector, LineartAnimeDetector, CannyDetector, ContentShuffleDetector, ZoeDetector, OpenposeDetector
from diffusers.utils import load_image
from huggingface_hub import hf_hub_download 

# --- Configuration ---
# Choose the appropriate ControlNet model based on desired view generation method
# Depth seems suitable for generating different views from a single image
CONTROLNET_MODEL_ID = "lllyasviel/control_v11f1p_sd15_depth"
STABLE_DIFFUSION_MODEL_ID = "runwayml/stable-diffusion-v1-5" # Base model for ControlNet

# --- Helper Functions ---

def load_controlnet_pipeline(device='cuda'):
    """Loads the ControlNet pipeline for depth-controlled image generation."""
    print("Loading ControlNet pipeline...")
    try:
        controlnet = ControlNetModel.from_pretrained(CONTROLNET_MODEL_ID, torch_dtype=torch.float16)
        pipe = StableDiffusionControlNetPipeline.from_pretrained(
            STABLE_DIFFUSION_MODEL_ID, controlnet=controlnet, torch_dtype=torch.float16
        )
        pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
        # Remove following line if using CPU or have enough VRAM
        # pipe.enable_model_cpu_offload() # Offload parts to CPU if VRAM is limited
        pipe.to(device)
        pipe.enable_xformers_memory_efficient_attention() # Use if xformers is installed
        print("ControlNet pipeline loaded.")
        return pipe
    except Exception as e:
        print(f"Error loading ControlNet pipeline: {e}")
        print("Ensure you have the necessary libraries installed and model IDs are correct.")
        return None

def get_depth_map(image, device='cuda'):
    """Generates a depth map for the input image."""
    print("Generating depth map...")
    try:
        # Initialize the MiDaS depth estimator
        depth_estimator = MiDaSDetector.from_pretrained("Intel/dpt-hybrid-midas")
        depth_map_image = depth_estimator(image, detect_resolution=384, image_resolution=512) # Adjust resolutions as needed
        print("Depth map generated.")
        return depth_map_image
    except Exception as e:
        print(f"Error generating depth map: {e}")
        return None

# --- Main Function for incorporated.py ---

def generate_multiple_views(input_image_path, num_views=4, output_dir="hampi_output/views", device='cuda', prompt_prefix="Another view of the Hampi monument"):
    """Generates multiple views of an object from a single input image using ControlNet."""
    if not os.path.exists(input_image_path):
        print(f"Error: Input image not found at {input_image_path}")
        return []

    # Load models
    controlnet_pipe = load_controlnet_pipeline(device=device)
    if not controlnet_pipe:
        return []

    # Prepare output directory
    view_output_dir = os.path.join(output_dir, os.path.splitext(os.path.basename(input_image_path))[0] + "_views")
    os.makedirs(view_output_dir, exist_ok=True)
    print(f"Saving generated views to: {view_output_dir}")

    # Load input image
    try:
        input_image = Image.open(input_image_path).convert("RGB")
        # Optional: Resize image if needed for consistency or performance
        # input_image = input_image.resize((512, 512))
    except Exception as e:
        print(f"Error loading input image {input_image_path}: {e}")
        return []

    # Get depth map (Control Image)
    control_image = get_depth_map(input_image, device=device)
    if not control_image:
        return []

    # Define base prompt and negative prompt
    # You might want to extract details from the filename or use a fixed prompt
    base_prompt = f"{prompt_prefix}, realistic photo, ancient stone architecture, Hampi, India, detailed stonework, clear daylight, high resolution"
    negative_prompt = "cartoon, illustration, anime, 3d render, painting, sketch, drawing, blur, distortion, low quality, poor lighting, oversaturated, fantasy elements, text, words, signature, watermark"

    generated_image_paths = []

    print(f"Generating {num_views} different views...")
    for i in tqdm(range(num_views)):
        # Generate image with ControlNet
        # You can slightly vary the prompt or seed for different views
        # For more distinct views, you might need more sophisticated techniques
        # like manipulating the depth map or using different ControlNets (e.g., Normal maps, Canny edges)
        # or dedicated multi-view models if available.
        # Here, we rely on the inherent randomness of the diffusion process with a fixed control.
        try:
            generator = torch.Generator(device=device).manual_seed(i * 1234 + 5678) # Vary seed for variation
            output_image = controlnet_pipe(
                prompt=base_prompt,
                negative_prompt=negative_prompt,
                image=control_image, # Provide the depth map as the control
                num_inference_steps=30, # Adjust steps (20-50 typical)
                guidance_scale=7.5,     # Adjust guidance
                generator=generator
            ).images[0]

            # Save the generated image
            filename = f"view_{i+1}.png"
            save_path = os.path.join(view_output_dir, filename)
            output_image.save(save_path)
            generated_image_paths.append(save_path)
            print(f"Saved view {i+1} to {save_path}")

        except Exception as e:
            print(f"Error generating view {i+1}: {e}")

    print(f"Generated {len(generated_image_paths)} views.")
    return generated_image_paths


# Example usage (optional)
if __name__ == "__main__":
    print("Running view_generation.py directly for testing...")
    # Create a dummy input image file for testing if needed
    # dummy_image_path = "dummy_input.png"
    # if not os.path.exists(dummy_image_path):
    #     Image.new('RGB', (512, 512), color = 'red').save(dummy_image_path)

    # test_input_image = dummy_image_path # Replace with a real image path
    # if os.path.exists(test_input_image):
    #     generate_multiple_views(
    #         input_image_path=test_input_image,
    #         num_views=2,
    #         output_dir="test_hampi_output/views",
    #         device='cuda' if torch.cuda.is_available() else 'cpu'
    #     )
    # else:
    #     print(f"Test input image '{test_input_image}' not found.")
    pass



Running view_generation.py directly for testing...


In [6]:
import os
import sys
import random
from datetime import datetime
import torch
import re


DEFAULT_REFERENCE_IMAGES_DIR = "D:\\college\\imp-doc\\sem6\\GENAI\\project\\new\\chariot\\images"

# try:
#     from store_utils import process_architectural_qa_batch_and_save, search_architecture_qa_database, DEFAULT_QA_TXT_FILE, DEFAULT_ARCH_QA_DB_PATH
#     print("- Successfully imported: Q&A Generation & Search Utilities")
# except ImportError as e:
#     print(f"Warning: Could not import from store_utils.py: {e}")
#     process_architectural_qa_batch_and_save = None
#     search_architecture_qa_database = None
#     # Define defaults here if import fails, to avoid NameError later
#     DEFAULT_QA_TXT_FILE = r"C:\Users\Rishi S Etagi\Downloads\Hampi Architectural Q&A.txt"
#     DEFAULT_ARCH_QA_DB_PATH = r"C:\Users\Rishi S Etagi\Desktop\gen ai\bruh\3D-Reconstruction-of-Monuments\chroma_architecture_qa_db"


# try:
#     from view_generation import generate_multiple_views
#     print("- Successfully imported: 3D View Generation Utilities")
# except ImportError as e:
#     print(f"Warning: Could not import from view_generation.py: {e}")
#     generate_multiple_views = None

# print("Imports complete.")

## Configuration

Define default parameters for the pipeline.

In [7]:
# Default paths and settings
REFERENCE_DIR = DEFAULT_REFERENCE_IMAGES_DIR
OUTPUT_DIR = "hampi_output"
os.makedirs(OUTPUT_DIR, exist_ok=True)


# Device configuration
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

Using device: cuda


## Define Operation Modes

Here we define different operation modes for the pipeline.

In [8]:
# Operation mode: "generate_qa", "search_qa", "generate_images", "generate_views", "full_pipeline"
def run_generate_qa_mode(num_questions=10, qa_topic=None, output_file=DEFAULT_QA_TXT_FILE, qa_db_path=DEFAULT_ARCH_QA_DB_PATH):
    """Generate Q&A pairs about Hampi architecture"""
    if process_architectural_qa_batch_and_save:
        print(f"Generating {num_questions} architectural Q&A pairs...")
        qa_output_filepath = os.path.join(OUTPUT_DIR, os.path.basename(output_file))
        process_architectural_qa_batch_and_save(
            num_questions=num_questions,
            arch_qa_db_path=qa_db_path,
            qa_txt_file=qa_output_filepath,
            topic=qa_topic
        )
        print(f"Q&A pairs saved to {qa_output_filepath}")
        return qa_output_filepath
    else:
        print("Error: Q&A generation function not available. Please check imports and store_utils.py.")
        return None

def run_search_qa_mode(query, qa_db_path=DEFAULT_ARCH_QA_DB_PATH):
    """Search the Q&A database for architectural information"""
    if search_architecture_qa_database and query:
        print(f"Searching architectural Q&A database for: '{query}'")
        results = search_architecture_qa_database(
            query=query,
            arch_qa_db_path=qa_db_path
        )
        print("\nSearch Results:")
        if results:
            for i, doc in enumerate(results):
                print(f"\n--- Result {i+1} ---")
                # Extract Q&A from the stored text
                content = doc.page_content
                q_match = re.search(r'Question: (.*?)(?=\nAnswer:|$)', content)
                a_match = re.search(r'Answer: (.*?)(?=$)', content, re.DOTALL)
                
                if q_match: print(f"Q: {q_match.group(1).strip()}")
                if a_match: print(f"A: {a_match.group(1).strip()}")
                print(f"(Similarity Score: {doc.metadata.get('_distance', 'N/A')})")
                print("----------")
        else:
            print("No relevant results found in the architectural Q&A database.")
        return results
    elif not query:
         print("Error: Please provide a query for search_qa mode.")
    else:
        print("Error: Q&A search function not available. Please check imports and store_utils.py.")
    return None

def run_generate_images_mode(source_type="file", input_file=DEFAULT_QA_TXT_FILE, 
                          qa_db_path=DEFAULT_ARCH_QA_DB_PATH, query=None,
                          num_images=2, monuments=None,
                          reference_dir=REFERENCE_DIR, reference_count=5):
    """Generate images from Q&A data using reference images"""
    if qa_to_images_pipeline:
        print("Starting image generation pipeline...")
        # Construct full path for input file if using file source
        qa_input_filepath = os.path.join(OUTPUT_DIR, os.path.basename(input_file)) if source_type == 'file' else input_file

        # Pass reference image directory and count to the pipeline
        generated_image_paths = qa_to_images_pipeline(
            source_type=source_type,
            file_path=qa_input_filepath,
            vector_store_path=qa_db_path,
            query=query,
            num_images=num_images,
            output_dir=OUTPUT_DIR,
            monument_filters=monuments,
            reference_dir=reference_dir,
            reference_count=reference_count,
            device=DEVICE
        )
        return generated_image_paths
    else:
        print("Error: Image generation function not available. Please check imports and qa_to_image_pipeline.py.")
        return []

def run_generate_views_mode(input_image, num_views=4):
    """Generate multiple views of an input image"""
    if generate_multiple_views and input_image:
        print(f"Generating {num_views} views for image: {input_image}")
        if not os.path.exists(input_image):
             print(f"Error: Input image not found at {input_image}")
             return []
        else:
            # Views will be saved in a subdirectory within output_dir by the function
            view_paths = generate_multiple_views(
                input_image_path=input_image,
                num_views=num_views,
                output_dir=OUTPUT_DIR,
                device=DEVICE
            )
            return view_paths
    elif not input_image:
         print("Error: Please provide an input image for generate_views mode.")
    else:
        print("Error: 3D view generation function not available. Please check imports and view_generation.py.")
    return []

def run_full_pipeline(num_questions=10, qa_topic=None, num_images=2, monuments=None, 
                     num_views=4, reference_dir=REFERENCE_DIR, reference_count=5):
    """Run the complete pipeline: generate Q&A, images, and views"""
    start_time = datetime.now()
    print("Running full pipeline...")
    qa_file_for_images = os.path.join(OUTPUT_DIR, os.path.basename(DEFAULT_QA_TXT_FILE))

    # Step 1: Generate Q&A
    if process_architectural_qa_batch_and_save:
        print(f"\n--- Step 1: Generating {num_questions} Q&A pairs ---")
        qa_file_path = run_generate_qa_mode(
            num_questions=num_questions,
            qa_topic=qa_topic,
            output_file=DEFAULT_QA_TXT_FILE
        )
        print("--- Q&A Generation Complete ---")
        if not qa_file_path:
            print("Error in Q&A generation, cannot proceed with full pipeline")
            return None
    else:
        print("\nWarning: Q&A generation function not available. Checking for existing file...")
        # Check if the specified input file exists if we skip generation
        if not os.path.exists(qa_file_for_images):
             print(f"Error: Q&A input file '{qa_file_for_images}' not found and generation was skipped. Cannot proceed.")
             return None
        print(f"Found existing file: {qa_file_for_images}")

    # Step 2: Generate Images from Q&A
    if qa_to_images_pipeline:
        print(f"\n--- Step 2: Generating {num_images} images per Q&A pair ---")
        generated_image_paths = run_generate_images_mode(
            source_type="file",
            input_file=DEFAULT_QA_TXT_FILE,
            num_images=num_images,
            monuments=monuments,
            reference_dir=reference_dir,
            reference_count=reference_count
        )
        print("--- Image Generation Complete ---")
    else:
        print("\nWarning: Image generation function not available. Skipping Step 2.")
        generated_image_paths = []

    # Step 3: Generate Views for a random generated image
    if generate_multiple_views and generated_image_paths:
        # Select one of the generated images randomly
        image_to_process = random.choice(generated_image_paths)
        print(f"\n--- Step 3: Generating {num_views} views for a generated image: {os.path.basename(image_to_process)} ---")
        view_paths = run_generate_views_mode(
            input_image=image_to_process,
            num_views=num_views
        )
        print("--- View Generation Complete ---")
    elif not generated_image_paths:
         print("\nSkipping view generation (Step 3) as no images were generated in Step 2.")
    else:
        print("\nWarning: 3D view generation function not available. Skipping Step 3.")

    end_time = datetime.now()
    print(f"\n{'='*15} Pipeline Finished {'='*15}")
    print(f"Total execution time: {end_time - start_time}")
    print(f"{'='*50}\n")
    
    return generated_image_paths

## Example Usage

Below are examples of how to use each mode of the pipeline.

### 1. Generate Q&A Pairs

In [9]:
# Generate 5 Q&A pairs about Hampi architecture
qa_file_path = run_generate_qa_mode(num_questions=5, qa_topic="Stone chariot")

Generating 5 architectural Q&A pairs...
Initializing models and vector stores...
--> Initializing Embedding Model...

!!! Error during initialization: Failed to import transformers.models.timm_wrapper.configuration_timm_wrapper because of the following error (look up to see its traceback):
cannot import name 'ImageNetInfo' from 'timm.data' (d:\college\imp-doc\sem6\GENAI\project\.venv\lib\site-packages\timm\data\__init__.py) !!!


!!! Please ensure ChromaDB directories exist and are valid, required models are accessible, and API keys are valid. !!!


--- Debugging component values after initialization ---
  embedding_model: <class 'NoneType'>, Is None: True
  llm: <class 'NoneType'>, Is None: True
  question_generator_llm: <class 'NoneType'>, Is None: True
  source_vectorstore: <class 'NoneType'>, Is None: True
  architecture_qa_vectorstore: <class 'NoneType'>, Is None: True
  qa_chain: <class 'NoneType'>, Is None: True
  Result of all([...]): False
--- End Debugging ---

Initialization

Traceback (most recent call last):
  File "d:\college\imp-doc\sem6\GENAI\project\.venv\lib\site-packages\transformers\utils\import_utils.py", line 1967, in _get_module
    return importlib.import_module("." + module_name, self.__name__)
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.3568.0_x64__qbz5n2kfra8p0\lib\importlib\__init__.py", line 127, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
  File "<frozen importlib._bootstrap>", line 1030, in _gcd_import
  File "<frozen importlib._bootstrap>", line 1007, in _find_and_load
  File "<frozen importlib._bootstrap>", line 986, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 680, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 850, in exec_module
  File "<frozen importlib._bootstrap>", line 228, in _call_with_frames_removed
  File "d:\college\imp-doc\sem6\GENAI\project\.venv\lib\site-packages\transformers\models\timm_wrapper\confi

### 3. Generate Images

In [10]:

from PIL import Image as PILImage
img = PILImage.open(DEFAULT_REFERENCE_IMAGES_DIR + "\\2000.png")
img.show()
# Generate images from Q&A pairs
image_paths = run_generate_images_mode(
    source_type="file",
    input_file=r"D:\college\imp-doc\sem6\GENAI\project\3D-Reconstruction-of-Monuments\hampi_output\Hampi_Architecture_QA.txt",
    num_images=1,
    monuments=["Stone Chariot"],
    reference_dir=REFERENCE_DIR,
    reference_count=1
)

Starting image generation pipeline...
Loading models...
Loading models...


Device set to use cuda:0
Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: 
```
pip install accelerate
```
.


Loading SD model: SG161222/Realistic_Vision_V5.1_noVAE


Loading pipeline components...: 100%|██████████| 5/5 [00:03<00:00,  1.55it/s]


Loading img2img model...
Models loaded successfully
Retrieving Q&A pairs...
Found 16 Q&A pairs.
Filtered to 1 Q&A pairs related to specified monuments.

Processing Q&A pair 1/1
Question: How does the Stone Chariot at Hampi demonstrate the advanced engineering and architectural skills of the Vijayanagara craftsmen, particularly in terms of creating the illusion of a monolithic structure while actually using multiple granite blocks?
Extracted details: The Stone Chariot at Hampi stands as a remarkable testament to the engineering prowess of Vijayanaga...
Generated prompt: A monument at Hampi, India, The Stone Chariot at Hampi stands as a remarkable testament to the engin...
Loading reference images from D:\college\imp-doc\sem6\GENAI\project\new\chariot\images...
Found 1 images


Loading images: 100%|██████████| 1/1 [00:00<00:00, 87.78it/s]

Successfully loaded 1 reference images
Using reference image: 2000.png



100%|██████████| 34/34 [00:08<00:00,  3.90it/s]


Saved image to hampi_output\hampi_1_seed640907784.png
Saved comparison to hampi_output\hampi_1_comparison.png

Generation complete. Created 1 images in hampi_output.


### 4. Generate Views of Existing Image

In [11]:
# Generate multiple views of an existing image
# If you already have generated images, provide the path to one of them
# existing_image = "hampi_output/vittala_temple_0.png"  # Replace with actual path
# views = run_generate_views_mode(input_image=existing_image, num_views=3)

### 5. Run Full Pipeline

In [12]:
# Run the complete pipeline
# results = run_full_pipeline(
#     num_questions=10,
#     qa_topic="Hampi architectural features",
#     num_images=2,
#     monuments=["Vittala Temple"],
#     num_views=3,
#     reference_dir=REFERENCE_DIR,
#     reference_count=5
# )

## Display Generated Images

You can use the following cell to display generated images.

In [13]:
# Display a generated image
from IPython.display import Image, display

def display_images(image_paths, max_images=5):
    """Display images in the notebook"""
    if not image_paths:
        print("No images to display")
        return
    
    for i, path in enumerate(image_paths[:max_images]):
        if os.path.exists(path):
            print(f"Image {i+1}: {os.path.basename(path)}")
            display(Image(path, width=800))
        else:
            print(f"Image file not found: {path}")
    
    if len(image_paths) > max_images:
        print(f"... and {len(image_paths) - max_images} more images")

# Uncomment to display images from a previous run
# image_dir = "hampi_output"
# all_images = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(".png") and not "comparison" in f]
# display_images(all_images)