In [2]:
## Retrival augmented generation

import os
from dotenv import load_dotenv

load_dotenv()

True

In [5]:
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [12]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)

In [13]:
print(len(documents))
index

1226


<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x1f3bc079ae0>

In [14]:
query_engine = index.as_query_engine()

In [16]:
response = query_engine.query("What is flip flop?")
print(response)

A flip-flop is a digital circuit element that can store one bit of information. It can change its output based on the input it receives and a clock signal. Flip-flops are commonly used in digital electronics for storing data and synchronizing signals.


In [23]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [18]:
index.storage_context.persist()

DEBUG:fsspec.local:open file: d:/Machine learning/Generative AI/Llama Index/storage/docstore.json
open file: d:/Machine learning/Generative AI/Llama Index/storage/docstore.json
DEBUG:fsspec.local:open file: d:/Machine learning/Generative AI/Llama Index/storage/index_store.json
open file: d:/Machine learning/Generative AI/Llama Index/storage/index_store.json
DEBUG:fsspec.local:open file: d:/Machine learning/Generative AI/Llama Index/storage/graph_store.json
open file: d:/Machine learning/Generative AI/Llama Index/storage/graph_store.json
DEBUG:fsspec.local:open file: d:/Machine learning/Generative AI/Llama Index/storage/default__vector_store.json
open file: d:/Machine learning/Generative AI/Llama Index/storage/default__vector_store.json
DEBUG:fsspec.local:open file: d:/Machine learning/Generative AI/Llama Index/storage/image__vector_store.json
open file: d:/Machine learning/Generative AI/Llama Index/storage/image__vector_store.json


In [21]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.indices.postprocessor import SimilarityPostprocessor


retriever = VectorIndexRetriever(index=index, similarity_top_k=5)
postprocessor = SimilarityPostprocessor(similarity_cutoff=0.80)
query_engine = RetrieverQueryEngine(retriever=retriever, node_postprocessors=[postprocessor])

In [25]:
response = query_engine.query("What is flip flop?")
response

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': <function Embeddings.create.<locals>.parser at 0x000001F3BEB0AE60>, 'json_data': {'input': ['What is flip flop?'], 'model': 'text-embedding-ada-002', 'encoding_format': 'base64'}}
Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': <function Embeddings.create.<locals>.parser at 0x000001F3BEB0AE60>, 'json_data': {'input': ['What is flip flop?'], 'model': 'text-embedding-ada-002', 'encoding_format': 'base64'}}
Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': <function Embeddings.create.<locals>.parser at 0x000001F3BEB0AE60>, 'json_data': {'input': ['What is flip flop?'], 'model': 'text-embedding-ada-002', 'encoding_format': 'base64'}}
DEBUG:openai._base_client:Sending HTTP Request: POST https://api.openai.com/v1/embeddings
Sending HTTP Request: POST https://api.openai.com/v1/embeddings
Sending HTTP Req

Response(response='A flip-flop is a binary cell that stores 1-bit of information. It is a sequential circuit that can change its state when a clock pulse occurs, typically triggered by the transition of the clock signal from 0 to 1 (rising edge) or from 1 to 0 (falling edge). Unlike a latch, which changes state when the clock is exactly at 1, a flip-flop is edge-triggered and changes state based on specific clock transitions.', source_nodes=[NodeWithScore(node=TextNode(id_='bc5e481e-82ce-47e2-8978-4b3218178817', embedding=None, metadata={'page_label': '87', 'file_name': 'Block-1_merged.pdf', 'file_path': 'd:\\Machine learning\\Generative AI\\Llama Index\\data\\Block-1_merged.pdf', 'file_type': 'application/pdf', 'file_size': 35858591, 'creation_date': '2024-06-13', 'last_modified_date': '2024-05-02'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'f

In [26]:
from llama_index.core.response.pprint_utils import pprint_response

pprint_response(response, show_source=True)
print(response)

Final Response: A flip-flop is a binary cell that stores 1-bit of
information. It is a sequential circuit that can change its state when
a clock pulse occurs, typically triggered by the transition of the
clock signal from 0 to 1 (rising edge) or from 1 to 0 (falling edge).
Unlike a latch, which changes state when the clock is exactly at 1, a
flip-flop is edge-triggered and changes state based on specific clock
transitions.
______________________________________________________________________
Source Node 1/5
Node ID: bc5e481e-82ce-47e2-8978-4b3218178817
Similarity: 0.8302208891766474
Text: D flip-flop is also referred as Delay flip-flop because it
delays the 0 or 1 applied to its Principles of Logic  I input by a
single clock pulse. Circuits 11  I I J-K flip-flopa I  I The J-K flip-
flop is also a modification of SR flip-flop, it has 2 inputs like S &
R and  all possible inputs combinations are valid in J K flip-flop.
Figure. 4.6 ...
____________________________________________________

In [28]:
import os.path
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

# check if storage already exists
PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = SimpleDirectoryReader("data").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

# either way we can now query the index
query_engine = index.as_query_engine()
response = query_engine.query("What are transformers?")
print(response)

DEBUG:llama_index.core.storage.kvstore.simple_kvstore:Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage\docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage\docstore.json.
DEBUG:fsspec.local:open file: d:/Machine learning/Generative AI/Llama Index/storage/docstore.json
open file: d:/Machine learning/Generative AI/Llama Index/storage/docstore.json
open file: d:/Machine learning/Generative AI/Llama Index/storage/docstore.json
DEBUG:llama_index.core.storage.kvstore.simple_kvstore:Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage\index_store.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage\index_store.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage\index_store.json.
DEBUG:fsspec.local:open file: d:/Machine learning/Generative AI/Llama Index/storage/index_store.json
open file: