In [1]:
from dotenv import load_dotenv

load_dotenv('../../../.env')

True

# Query Engine

In [2]:
from llama_index.core.readers import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader("data/").load_data()

# Divide to chunks

In [16]:
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
from llama_index.core import set_global_tokenizer
from transformers import AutoTokenizer

llm = HuggingFaceInferenceAPI(
    model_name='mistralai/Mistral-7B-Instruct-v0.2',
    generate_kwargs={'temperature':0.5,"max_length": 64,"max_new_tokens":512}
)

embed = set_global_tokenizer(
    AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha").encode
)


In [4]:
from llama_index.core import ServiceContext

service_context = ServiceContext.from_defaults(chunk_size=512, chunk_overlap=64,llm=None, embed_model='local')
node_parser = service_context.node_parser

nodes = node_parser.get_nodes_from_documents(documents)

  service_context = ServiceContext.from_defaults(chunk_size=512, chunk_overlap=64,llm=None, embed_model='local')


LLM is explicitly disabled. Using MockLLM.


# Put in dataset

In [5]:
from llama_index.vector_stores.deeplake import DeepLakeVectorStore

my_activeloop_org_id = "thapabibek1129"
my_activeloop_dataset_name = "LlamaIndex_paulgraham_essays"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"

# Create an index over the documnts
vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=False)



Deep Lake Dataset in hub://thapabibek1129/LlamaIndex_paulgraham_essays already exists, loading from the storage


In [11]:
from llama_index.core.storage.storage_context import StorageContext
from llama_index.core import VectorStoreIndex

storage_context = StorageContext.from_defaults(vector_store=vector_store)
storage_context.docstore.add_documents(nodes)
vector_index = VectorStoreIndex(nodes, storage_context=storage_context, embed_model='local')

Uploading data to deeplake dataset.


100%|██████████| 45/45 [00:00<00:00, 430.81it/s]
/

Dataset(path='hub://thapabibek1129/LlamaIndex_paulgraham_essays', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype      shape      dtype  compression
  -------    -------    -------    -------  ------- 
 embedding  embedding  (180, 384)  float32   None   
    id        text      (180, 1)     str     None   
 metadata     json      (180, 1)     str     None   
   text       text      (180, 1)     str     None   


 

In [19]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs = {'device':'cpu'} )

In [20]:
query_engine = vector_index.as_query_engine(streaming=True, similarity_top_k=10, llm=llm, embed_model = embeddings)

client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False})
  (2): Normalize()
) model_name='sentence-transformers/all-MiniLM-L6-v2' cache_folder=None model_kwargs={'device': 'cpu'} encode_kwargs={} multi_process=False show_progress=False


# Run

In [21]:
streaming_response = query_engine.query(
    "What does Paul Graham do?"
)
streaming_response.print_response_stream()

AttributeError: 'HuggingFaceEmbeddings' object has no attribute 'get_agg_embedding_from_queries'

# Sub Question Query Engine

In [23]:
query_engine = vector_index.as_query_engine(similarity_top_k=10, llm=llm)

None


In [26]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine

query_engine_tools = [
    QueryEngineTool(
        query_engine=query_engine,
        metadata=ToolMetadata(
            name="pg_essay",
            description="Paul Graham essay on What I Worked On",
        ),
    ),
]

query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools,
    service_context=service_context,
    use_async=True,
)

In [27]:
response = query_engine.query(
    "How was Paul Grahams life different before, during, and after YC?"
)
print( ">>> The final response:\n", response )

ValidationError: 1 validation error for SubQuestion
__root__
  SubQuestion expected dict not str (type=type_error)

# Custom Retriever Engine

In [None]:
import cohere
import os

os.environ['COHERE_API_KEY'] = "<YOUR_COHERE_API_KEY>"

# Get your cohere API key on: www.cohere.com
co = cohere.Client(os.environ['COHERE_API_KEY'])

# Example query and passages
query = "What is the capital of the United States?"
documents = [
   "Carson City is the capital city of the American state of Nevada. At the  2010 United States Census, Carson City had a population of 55,274.",
   "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.",
   "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.",
   "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. ",
   "Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
   "North Dakota is a state in the United States. 672,591 people lived in North Dakota in the year 2010. The capital and seat of government is Bismarck."
   ]

In [None]:
results = co.rerank(query=query, documents=docs, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.
for idx, r in enumerate(results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Document: {r.document['text']}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

In [None]:
"""

Document Rank: 1, Document Index: 3
Document: Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.
Relevance Score: 0.99

Document Rank: 2, Document Index: 1
Document: The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.
Relevance Score: 0.30

Document Rank: 3, Document Index: 5
Document: Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.
Relevance Score: 0.27

"""

In [None]:
import os
from llama_index.postprocessor.cohere_rerank import CohereRerank

cohere_rerank = CohereRerank(api_key=os.environ['COHERE_API_KEY'], top_n=2)

In [None]:
query_engine = vector_index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[cohere_rerank],
)

response = query_engine.query(
    "What did Sam Altman do in this essay?",
)
print(response)