In [1]:
import os
from getpass import getpass

In [2]:
GOOGLE_API_KEY=getpass("enter your api key:")
os.environ['GOOGLE_API_KEY']=GOOGLE_API_KEY


enter your api key: ········


In [4]:
import google.generativeai as genai 

In [5]:
for model in genai.list_models():
    if 'generateContent' in model.supported_generation_methods:
        print(model.name)

models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-pro-exp-0827
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-exp-0827
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-exp-1206
models/gemini-exp-1121
models/gemini-exp-1114
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/learnlm-1.5-pro-experimental


In [9]:
## data loading and data extraction 
from llama_index.core import SimpleDirectoryReader


In [28]:
document_readers = SimpleDirectoryReader("data")

In [30]:
documents = document_reader.load_data()

In [34]:
for i ,document in enumerate(documents):
    print(f" document {i+1}:")
    print(f"Text: {document.text[:200]}")
    print(f"metadata: {document.metadata}")
    print("_" * 50)

 document 1:
Text: Canada
Flag
Coat of arms
Motto: A mari usque ad mare (Latin)
"From Sea to Sea"
Anthem: "O Canada"
Royal anthem: "God Save the King"[1]
Capital Ottawa
45°24′N 75°40′W
Largest city Toronto
Official lang
metadata: {'page_label': '1', 'file_name': 'Canada.pdf', 'file_path': 'C:\\Users\\Dell\\OneDrive - City Community Education Consultancy Pvt. Ltd\\Desktop\\bot\\data\\Canada.pdf', 'file_type': 'application/pdf', 'file_size': 2556585, 'creation_date': '2025-01-19', 'last_modified_date': '2025-01-19'}
__________________________________________________
 document 2:
Text: • Prime Minister Justin Trudeau
Legislature Parliament
• Upper house Senate
• Lower house House of Commons
Independence from the United Kingdom
• Confederation July 1, 1867
• Statute of
Westminster,
1
metadata: {'page_label': '2', 'file_name': 'Canada.pdf', 'file_path': 'C:\\Users\\Dell\\OneDrive - City Community Education Consultancy Pvt. Ltd\\Desktop\\bot\\data\\Canada.pdf', 'file_type': 'application/pdf'

In [98]:
type(documents)
print(dir(documents))

['__add__', '__class__', '__class_getitem__', '__contains__', '__delattr__', '__delitem__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__iadd__', '__imul__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__rmul__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'append', 'clear', 'copy', 'count', 'extend', 'index', 'insert', 'pop', 'remove', 'reverse', 'sort']


In [183]:
## data extraction part-pydantic base Model
from llama_index.multi_modal_llms.gemini import GeminiMultiModal
from llama_index.core.program.multi_modal_llm_program import MultiModalLLMCompletionProgram
from llama_index.core.output_parsers.pydantic import PydanticOutputParser
from llama_index.core.schema import TextNode
import asyncio

In [184]:
from pydantic import BaseModel
from typing import List, Optional, Type
import json

In [185]:
class PopulationData(BaseModel):
    city_name: str 
    population: int  
    province: str  #
    famous_foods: List[str]  
    languages: List[str]  
    growth_rate: Optional[float] = None  
    history: str  
    description: str  
    tourist_places: List[str] 

    
    area: Optional[float] = None  
    climate: Optional[str] = None  
    transportation: Optional[str] = None  


In [189]:
prompt_template = """
You are an AI assistant with the role of summarizing tables and text from the provided CONTEXT FOR retrieval.
Think of yourself as a knowledgeable tourist guide for Canada, offering insights into the country's key features.

Your task is to:
1. Extract and summarize important details about Canada's cities, provinces, population, famous landmarks, languages, and culture.
2. Provide information in an engaging, concise, and informative manner as if you were explaining it to a tourist visiting Canada for the first time.
3. Be descriptive and include facts such as popular cities, local cuisine, cultural highlights, languages spoken, and interesting geographical features.
4. Focus on clarity and be ready to give useful and relevant details that will help someone exploring Canada, whether for tourism, study, or cultural exploration.

Use the context provided to retrieve and structure the information as a comprehensive guide to Canada.
"""


In [192]:
def pydantic_gemini(model_name: str, output_class: Type[BaseModel], prompt_templates: str, data: dict) -> BaseModel:
    gemini_llm = GeminiMultiModal(model_name=model_name)
    prompt = prompt_templates.format(**data)
    llm_program = MultiModalLLMCompletionProgram.from_defaults(
        output_parser=PydanticOutputParser(output_cls=output_class),
        multi_modal_llm=gemini_llm,
        prompt_template_str=prompt,
        verbose=True
    )
    gemini_response = llm_program
    return gemini_response

In [193]:
results = []
for datas in documents:
    print("Document Text: ", datas.text)
    print("Document Metadata: ", datas.metadata)

    data = {
        'city_name': datas.metadata.get('city_name', 'Unknown'),
        'population': datas.metadata.get('population', 'N/A'),
        'province': datas.metadata.get('province', 'N/A'),
        'famous_foods': datas.metadata.get('famous_foods', 'N/A'),
        'languages': datas.metadata.get('languages', 'N/A'),
        'tourist_places': datas.metadata.get('tourist_places', 'N/A'),
    }

    result = pydantic_gemini(
        model_name='models/gemini-pro-vision',
        output_class=PopulationData,
        prompt_templates=prompt_template,
        data=data
    )

    results.append(result)


Document Text:  Canada
Flag
Coat of arms
Motto: A mari usque ad mare (Latin)
"From Sea to Sea"
Anthem: "O Canada"
Royal anthem: "God Save the King"[1]
Capital Ottawa
45°24′N 75°40′W
Largest city Toronto
Official languages English · French
Demonym(s) Canadian
Government Federal parliamentary
constitutional
monarchy
• Monarch Charles III
• Governor General Mary Simon
Canada
Canada is a country in North America. Its ten
provinces and three territories extend from the Atlantic
Ocean to the Pacific Ocean and northward into the
Arctic Ocean, making it the world's second-largest
country by total area, with the world's longest
coastline. Its border with the United States is the
world's longest international land border. The country
is characterized by a wide range of both meteorologic
and geological regions. With a population of just over
41 million people, it has widely varying population
densities, with the majority residing in urban areas and
large areas of the country being sparsely popula

In [204]:
nodes = []
for res in results:
    metadata = {}
    text_node = TextNode()
    
    # Assuming res has a method or attributes to retrieve the needed data
    # Check if `res` has an attribute 'description' (replace with actual attribute names)
    if hasattr(res, 'description'):
        text_node.text = res.description  # Replace with actual attribute name
    
    # Check for other metadata, assuming `res` has a method `get_metadata()`
    if hasattr(res, 'metadata'):
        metadata = res.metadata  # Adjust based on the actual structure
    
    text_node.metadata = metadata
    nodes.append(text_node)


In [206]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import Settings
from llama_index.core import StorageContext
import qdrant_client

In [208]:
client=qdrant_client.QdrantClient(path="qdrant_gemini")

In [209]:
vector_store=QdrantVectorStore(client=client,collection_name="gemini")

In [211]:
##llm to generate response
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.llms.gemini import Gemini

In [212]:
Settings.embed_model=GeminiEmbedding(model_name="models/embedding-001",api_key=GOOGLE_API_KEY)
Settings.llm=Gemini(api_key=GOOGLE_API_KEY)


In [213]:
storage_context=StorageContext.from_defaults(vector_store=vector_store)

In [216]:
from llama_index.core import VectorStoreIndex
index=VectorStoreIndex(
    nodes=nodes,
    storage_context=storage_context
)

Some nodes are missing content, skipping them...


In [220]:
query_engine=index.as_query_engine(similarity_top_k=1)

In [223]:
query=" what is the capital of the canada"

In [224]:
response=query_engine.query(query)
print(response)

ValueError: Collection gemini not found