<a href="https://colab.research.google.com/github/InduwaraGayashan001/Generative-AI/blob/main/LlamaIndex.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [None]:
!pip install -q llama-index
!pip install pypdf
!pip install docx2txt
!pip install google-generativeai
!pip install transformers

In [None]:
!pip uninstall -y google-generativeai
!pip install google-generativeai --upgrade

In [None]:
!pip install sentence-transformers
!pip install langchain-community
!pip install llama-index-embeddings-langchain

In [3]:
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.core.indices.vector_store import VectorStoreIndex
from llama_index.core import ServiceContext, StorageContext, load_index_from_storage
from llama_index.llms.gemini import Gemini
import os


# Load Data

In [None]:
!mkdir data

In [6]:
documents = SimpleDirectoryReader("data").load_data()

In [7]:
documents[0]

Document(id_='368ee306-e0d9-4045-9a76-a8d83bcddaa8', embedding=None, metadata={'page_label': '1', 'file_name': 'YOLO.pdf', 'file_path': '/content/data/YOLO.pdf', 'file_type': 'application/pdf', 'file_size': 2491132, 'creation_date': '2025-06-28', 'last_modified_date': '2025-06-28'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='YOLO:\n You Only Look Once\n Unified Real-Time Object Detection\nPresenter: Liyang Zhong  Quan Zou', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}')

# Load the model

In [22]:
from google.colab import userdata
import google.generativeai as palm

api_key = userdata.get('GOOGLE_API_KEY')

In [23]:
llm = Gemini(model="models/gemini-2.0-flash", api_key=api_key)

  llm = Gemini(model="models/gemini-2.0-flash", api_key=api_key)


# Chunking

In [24]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
embed_model = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en")

In [25]:
from llama_index.core.settings import Settings

Settings.llm = llm
Settings.embed_model = embed_model
Settings.chunk_size = 1000
Settings.chunk_overlap = 200


In [26]:
index = VectorStoreIndex.from_documents(documents)

# Storing and Loading the Index

In [27]:
# Save to a directory
index.storage_context.persist()

In [28]:
# Load from directory
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)

Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage/docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage/index_store.json.


# Q/A

In [29]:
query_engine = index.as_query_engine()

In [30]:
response = query_engine.query("What is YOLO?")

In [31]:
response

Response(response='YOLO is extremely fast, reasons globally on the entire image, and learns generalizable representations. However, its performance is lower than state-of-the-art and it makes more localization errors.\n', source_nodes=[NodeWithScore(node=TextNode(id_='882aa484-113b-4e34-b784-6fef4be4d79c', embedding=None, metadata={'page_label': '58', 'file_name': 'YOLO.pdf', 'file_path': '/content/data/YOLO.pdf', 'file_type': 'application/pdf', 'file_size': 2491132, 'creation_date': '2025-06-28', 'last_modified_date': '2025-06-28'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='015a7239-f4ab-4718-ba4c-4afca11bfec6', node_type='4', metadata={'page_label': '58', 'file_name': 'YOLO.pdf', 'file_path': '/cont

In [32]:
from IPython.display import Markdown, display
display(Markdown(f"<b>{response}</b>"))

<b>YOLO is extremely fast, reasons globally on the entire image, and learns generalizable representations. However, its performance is lower than state-of-the-art and it makes more localization errors.
</b>