In [1]:
# Installation of dependencies
!pip install llama-index-multi-modal-llms-gemini
!pip install llama-index-vector-stores-qdrant
!pip install llama-index-embeddings-gemini
!pip install llama-index-llms-gemini
!pip install llama-index 'google-generativeai>=0.3.0' matplotlib qdrant_client
!pip install nest_asyncio



In [2]:
# Import necessary dependencies
import os
from google.colab import userdata
from llama_index.llms.gemini import Gemini
from llama_index.core.llms import ChatMessage, ImageBlock
from PIL import Image
import requests
from io import BytesIO
import matplotlib.pyplot as plt
import time
import asyncio
import nest_asyncio  # Fix for Jupyter event loop issue
from pathlib import Path
from pydantic import BaseModel
from llama_index.multi_modal_llms.gemini import GeminiMultiModal
from llama_index.core.program import MultiModalLLMCompletionProgram
from llama_index.core.output_parsers import PydanticOutputParser
from llama_index.core import SimpleDirectoryReader
from llama_index.core.schema import TextNode
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.llms.gemini import Gemini
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import Settings
from llama_index.core import StorageContext
import qdrant_client
import asyncio
import aiohttp  # For handling image URLs asynchronously

# Apply the nest_asyncio patch
nest_asyncio.apply()

In [3]:
# Configure API key and initialize model
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = userdata.get('api_key')

os.environ['GOOGLE_API_KEY']=userdata.get('api_key')
model = Gemini(model_name="models/gemini-1.5-pro")

In [4]:
# Core system
async def chatbot_response(user_input=None, image_url=None):
    try:
        print(f"DEBUG: Received input: {user_input} | Image URL: {image_url}")

        if image_url:
            print("DEBUG: Processing Image Input...")

            # Step 1: Create message with image URL
            msg = ChatMessage("Describe the content of this image in a detailed caption.")
            msg.blocks.append(ImageBlock(url=image_url))

            # Step 2: Generate caption for the image
            caption_response = await asyncio.to_thread(model.chat, messages=[msg])
            print(f"DEBUG: Generated Caption: {caption_response.message.content}")
            image_caption = caption_response.message.content

            # Step 3: Use the caption to generate a conversation response
            chat_prompt = ChatMessage(f"Continue the conversation based on this image description: {image_caption}")
            chat_response = await asyncio.to_thread(model.chat, messages=[chat_prompt])
            print(f"DEBUG: Chatbot Response: {chat_response.message.content}")
            return chat_response.message.content

        elif user_input:
            print("DEBUG: Processing Text Input...")
            response = await asyncio.to_thread(model.chat, messages=[ChatMessage(user_input)])
            print(f"DEBUG: Chatbot Response: {response.message.content}")
            return response.message.content

        else:
            print("DEBUG: No valid input provided")
            return "Please provide an input (text or image URL)."

    except Exception as e:
        print(f"ERROR: {e}")
        return "An error occurred. Please try again."

def run_async_task(task):
    """Runs async functions safely in both normal Python and Jupyter/Colab."""
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    return loop.run_until_complete(task)

def main():
    """
    Command-line chatbot interface.
    """
    print("Welcome to the Chatbot! Type 'exit' to quit.")

    while True:
        user_input = input("\nEnter text message (or type 'img' to provide an image URL): ").strip()

        if user_input.lower() == "exit":
            print("Goodbye!")
            break

        elif user_input.lower() == "img":
            image_url = input("Enter the image URL: ").strip()
            response = run_async_task(chatbot_response(image_url=image_url))
            print("\nBot:", response)

        else:
            response = run_async_task(chatbot_response(user_input=user_input))
            print("\nBot:", response)

if __name__ == "__main__":
    main()

Welcome to the Chatbot! Type 'exit' to quit.

Enter text message (or type 'img' to provide an image URL): https://storage.googleapis.com/generativeai-downloads/data/scene.jpg
DEBUG: Received input: https://storage.googleapis.com/generativeai-downloads/data/scene.jpg | Image URL: None
DEBUG: Processing Text Input...
DEBUG: Chatbot Response: The image at the provided URL depicts a vibrant street scene, likely in a Latin American or Caribbean country, based on the architecture and vegetation.  Here's a breakdown of the scene:

* **Architecture:**  Colorful buildings dominate the scene. They are painted in various pastel shades, including pink, yellow, blue, and green.  The architectural style suggests a mix of colonial influences and more modern construction.  Balconies with decorative ironwork are a prominent feature.
* **Vegetation:**  Lush green plants, including trees and potted plants, add a tropical feel to the scene.  Some of the plants are overflowing from balconies and window box