In [1]:
!pip install llama-index==0.9.39
!pip install pypdf
!pip install docx2txt
!pip install transformers



In [2]:
!pip install google-generativeai



In [3]:
from llama_index import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.palm import PaLM
from llama_index import ServiceContext
from llama_index import StorageContext, load_index_from_storage
import os

## Load data

In [4]:
!mkdir data

mkdir: cannot create directory ‘data’: File exists


In [5]:
documents = SimpleDirectoryReader("data").load_data()

In [6]:
documents

[Document(id_='5681431b-2d0a-4304-a7e4-c7160b1de261', embedding=None, metadata={'page_label': '1', 'file_name': 'transformer.pdf', 'file_path': 'data/transformer.pdf', 'file_type': 'application/pdf', 'file_size': 779655, 'creation_date': '2024-11-07', 'last_modified_date': '2024-11-07', 'last_accessed_date': '2024-11-07'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text="Transformer  \nelectrical transformer is a static electrical machine which transforms electrical power from one circuit to \nanother circuit, without changing the frequency. Transformer can increase or decrease the voltage with \ncorresponding decrease or increase in current. \nWorking principle of transformer \n \n The basic principle behind working of a transformer is the phenomenon of 

## Split the Text into Small Chunks

In [7]:
!pip install sentence_transformers



In [8]:
!pip install langchain-community



In [9]:
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings

In [25]:
os.environ['GOOGLE_API_KEY'] = ''

In [32]:
from llama_index.llms import Gemini
llm = Gemini(model="models/gemini-ultra")

In [33]:
embed_model = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en")

In [34]:
# creating Chunks
service_context = ServiceContext.from_defaults(llm = llm, embed_model=embed_model, chunk_size = 800, chunk_overlap=20)

In [35]:
# Convert to vector format
index = VectorStoreIndex.from_documents(documents, service_context= service_context)

## Storing and Loading the Index

In [36]:
index.storage_context.persist() # Check storage folder

In [None]:
# Loading the index
# storage_context = StorageContext.from_defaults(persist_dir = './storage')
# index = load_index_from_storage(storage_context=storage_context)

## Q/A

In [37]:
query_engine = index.as_query_engine()

In [39]:
response = query_engine.query("What is transformer?")
response

Response(response='Transformer is a static electrical machine which transforms electrical power from one circuit to another circuit, without changing the frequency.', source_nodes=[NodeWithScore(node=TextNode(id_='8fb40f9e-f7ce-4b32-a7a6-cc968dd9c6a6', embedding=None, metadata={'page_label': '1', 'file_name': 'transformer.pdf', 'file_path': 'data/transformer.pdf', 'file_type': 'application/pdf', 'file_size': 779655, 'creation_date': '2024-11-07', 'last_modified_date': '2024-11-07', 'last_accessed_date': '2024-11-07'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='5681431b-2d0a-4304-a7e4-c7160b1de261', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '1', 'file_name': 'transformer.pdf', 'file_

In [40]:
from IPython.display import Markdown, display

In [41]:
display(Markdown(f"<b>{response}</b>"))

<b>Transformer is a static electrical machine which transforms electrical power from one circuit to another circuit, without changing the frequency.</b>