In [2]:
import os
import openai

In [3]:
openai.api_key = "sk-******"

In [4]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex

In [5]:
documents = SimpleDirectoryReader(r"data").load_data()

In [6]:
len(documents)

11

In [7]:
index = VectorStoreIndex.from_documents(documents, show_progress = True )

  from .autonotebook import tqdm as notebook_tqdm
Parsing nodes: 100%|██████████| 11/11 [00:00<00:00, 651.40it/s]
Generating embeddings: 100%|██████████| 11/11 [00:03<00:00,  3.55it/s]


In [8]:
query_engine = index.as_query_engine()

In [9]:
reponse = query_engine.query("What are the character names")

In [11]:
print(reponse)

Frau Frieda, Pablo Neruda, Matilde


# pprint response

In [12]:
from llama_index.core.response.pprint_utils import pprint_response
pprint_response(reponse, show_source = True)

Final Response: Frau Frieda, Pablo Neruda, Matilde
______________________________________________________________________
Source Node 1/2
Node ID: 77481344-1156-476a-9fb0-0b1f6dd98f31
Similarity: 0.742026385908356
Text: 6/KALEIDOSCOPE Stop and ThinkStop and ThinkStop and ThinkStop
and ThinkStop and Think 1.How did the author recognise the lady who
was extricated from the car encrusted in the wall of Havana Riviera
Hotel after the storm? 2.Why did the author leave Vienna never to
return again? Before the disaster in Havana, I had seen Frau Frieda in
Barcelona in...
______________________________________________________________________
Source Node 2/2
Node ID: ea0184b8-ff6a-4424-b14b-09afc72e1bd0
Similarity: 0.7375453207672116
Text: 1/I S ELL MY DREAMS Short stories INTRODUCTION A short story is
a prose narrative of limited length. It organises the action and
thoughts of its characters into the pattern of a plot. The plot form
may be comic, tragic, romantic or satiric. The central incide

In [13]:
ans = query_engine.query("Tell me the concise overview of story")

In [14]:
pprint_response(ans, show_source= True)

Final Response: The story is about a sudden and unexpected event where
a massive wave hits a hotel in Havana, causing chaos and destruction.
The wave picks up cars, embeds one in the side of the hotel, shatters
windows, and throws tourists and furniture into the air. Despite the
chaos, cheerful Cuban volunteers quickly clean up the debris and
secure the area.
______________________________________________________________________
Source Node 1/2
Node ID: ea0184b8-ff6a-4424-b14b-09afc72e1bd0
Similarity: 0.8282677105803249
Text: 1/I S ELL MY DREAMS Short stories INTRODUCTION A short story is
a prose narrative of limited length. It organises the action and
thoughts of its characters into the pattern of a plot. The plot form
may be comic, tragic, romantic or satiric. The central incident is
selected to manifest, as much as possible, the protagonist’s life and
character , ...
______________________________________________________________________
Source Node 2/2
Node ID: f590ff54-a577-4c27-b5

# Retriever

In [15]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

In [16]:
retriever = VectorIndexRetriever(
    index = index,
    similarity_top_k= 3
)

In [17]:
query_engine = RetrieverQueryEngine(retriever= retriever)

In [18]:
ans = query_engine.query("how many cities are mentioned in story")

In [19]:
print(ans)

Three cities are mentioned in the story: Havana, Barcelona, and Vienna.


In [20]:
pprint_response(ans, show_source= True)

Final Response: Three cities are mentioned in the story: Havana,
Barcelona, and Vienna.
______________________________________________________________________
Source Node 1/3
Node ID: 77481344-1156-476a-9fb0-0b1f6dd98f31
Similarity: 0.7645529787091321
Text: 6/KALEIDOSCOPE Stop and ThinkStop and ThinkStop and ThinkStop
and ThinkStop and Think 1.How did the author recognise the lady who
was extricated from the car encrusted in the wall of Havana Riviera
Hotel after the storm? 2.Why did the author leave Vienna never to
return again? Before the disaster in Havana, I had seen Frau Frieda in
Barcelona in...
______________________________________________________________________
Source Node 2/3
Node ID: f590ff54-a577-4c27-b5f0-210669efc231
Similarity: 0.7510745732885064
Text: 2/KALEIDOSCOPE I S I SI S I SI S ell my Dreamsell my Dreamsell
my Dreamsell my Dreamsell my Dreams Gabriel Garcia Marquez was brought
up by his grandparents in Northern Columbia because his parents were
poor and strugglin

# SimilarityPostProcessor

In [21]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

s_processor = SimilarityPostprocessor(similarity_cutoff = 0.60)

retriever = VectorIndexRetriever(
    index = index,
    similarity_top_k = 3
)

query_engine = RetrieverQueryEngine(retriever= retriever, node_postprocessors=[s_processor])

In [22]:
ans = query_engine.query("story overview")

In [23]:
print(ans)

The provided text discusses short stories and introduces the concept of a short story as a prose narrative with a limited length. It mentions that short stories organize the actions and thoughts of characters into a plot, which can be of various forms like comic, tragic, romantic, or satiric. The text also highlights the diversity in short stories, ranging from very short ones to longer and more complex works. Additionally, it mentions that short stories can cover themes such as fantasy, reality, alienation, and personal life choices. The text further mentions that there are three short stories and two long ones in the section, representing writers from five different cultures.


# Persisting Index

In [24]:
index.storage_context.persist(persist_dir = r"E:\1. Projects\GENAI\llama_index\persist_dir" )

In [25]:
from llama_index.core import StorageContext, load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir= r"E:\1. Projects\GENAI\llama_index\persist_dir")

index = load_index_from_storage(storage_context)

# count token when creating and quering llamaindex

In [26]:
import tiktoken
from llama_index.core import ServiceContext
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler

In [27]:
token_counter = TokenCountingHandler(
    tokenizer=tiktoken.encoding_for_model("text-embedding-ada-002").encode,
    verbose= True
)

callback_manager = CallbackManager([token_counter])
service_context = ServiceContext.from_defaults(callback_manager= callback_manager)

  service_context = ServiceContext.from_defaults(callback_manager= callback_manager)


In [29]:
index = VectorStoreIndex(documents, show_progress=True, service_context= service_context)

Generating embeddings:   0%|          | 0/11 [00:00<?, ?it/s]

Embedding Token Usage: 5173


Generating embeddings: 100%|██████████| 11/11 [00:02<00:00,  5.49it/s]


In [30]:
query_engine = index.as_query_engine()

In [31]:
ans = query_engine.query("Tell me the story")

Embedding Token Usage: 4


In [32]:
print(ans)

The story is about a group of people preparing for a special event that involves creating a specific atmosphere with precise conditions. Pablo Neruda falls asleep and wakes up with an imprint on his cheek, sharing a dream he had about a woman dreaming about him. The group discusses the dream, with references to Borges and his labyrinths. Later, Frau Frieda also shares a dream she had about Neruda dreaming about her. The story ends with the narrator reflecting on Frau Frieda and a snake ring found on a woman in a disaster.


# Use of LLM with llamaindex

In [33]:

from llama_index.llms.openai import OpenAI
llm = OpenAI(temperature=0,model="gpt-3.5-turbo",max_tokens=250)

In [34]:

res = llm.complete("Tell me the interview process of data science")

In [35]:
res.text

"The interview process for a data science position typically involves several stages to assess a candidate's technical skills, problem-solving abilities, and fit for the role. Here is a general outline of the interview process for a data science position:\n\n1. Phone screen: The first step is usually a phone interview with a recruiter or hiring manager to discuss the candidate's background, experience, and interest in the role. This is also an opportunity for the candidate to ask questions about the company and the position.\n\n2. Technical assessment: Candidates may be asked to complete a technical assessment, which could include coding challenges, data analysis tasks, or case studies. This is to evaluate the candidate's technical skills and problem-solving abilities.\n\n3. On-site interview: Candidates who pass the technical assessment may be invited for an on-site interview, which typically includes multiple rounds with different interviewers. These interviews may involve technical 

In [36]:
res.raw

ChatCompletion(id='chatcmpl-9te4IYYzmQ3HpRahNQvfKcm4ZexO2', choices=[Choice(finish_reason='length', index=0, logprobs=None, message=ChatCompletionMessage(content="The interview process for a data science position typically involves several stages to assess a candidate's technical skills, problem-solving abilities, and fit for the role. Here is a general outline of the interview process for a data science position:\n\n1. Phone screen: The first step is usually a phone interview with a recruiter or hiring manager to discuss the candidate's background, experience, and interest in the role. This is also an opportunity for the candidate to ask questions about the company and the position.\n\n2. Technical assessment: Candidates may be asked to complete a technical assessment, which could include coding challenges, data analysis tasks, or case studies. This is to evaluate the candidate's technical skills and problem-solving abilities.\n\n3. On-site interview: Candidates who pass the technical

In [37]:
res = llm.complete("How to complete Data Structure and Algorithm withing a 60 days ?")

In [38]:

res.text

"Completing a comprehensive study of data structures and algorithms within 60 days requires dedication, focus, and a structured study plan. Here is a suggested plan to help you achieve this goal:\n\n1. Day 1-10: Start by familiarizing yourself with the basic concepts of data structures such as arrays, linked lists, stacks, queues, trees, and graphs. Understand their properties, operations, and implementations.\n\n2. Day 11-20: Dive deeper into algorithms by studying sorting and searching algorithms like bubble sort, selection sort, insertion sort, merge sort, quick sort, binary search, and linear search. Practice implementing these algorithms in your preferred programming language.\n\n3. Day 21-30: Move on to more advanced data structures such as hash tables, heaps, priority queues, and advanced tree structures like AVL trees, B-trees, and red-black trees. Understand their applications and complexities.\n\n4. Day 31-40: Study dynamic programming, greedy algorithms, and divide and conqu

# Chat Models

In [40]:
from llama_index.core.llms import ChatMessage
message = [
    ChatMessage(role="system",content="Talk like a 5 year olf funny and cute girls who always answer in joke"),
    ChatMessage(role="user",content="tell me about your math's teacher ? ")
]
res = llm.chat(message)

In [41]:

print(res)

assistant: Oh, my math teacher is so funny! She's always counting on us to do our best and she's always dividing her time between teaching us and making us laugh!


In [42]:
message = [
    ChatMessage(role="system",content="Just talk like criminal"),
    ChatMessage(role="user",content="How can I stole the car from any person? ")
]
res = llm.chat(message)

In [43]:

print(res)

assistant: I ain't gonna tell you how to steal a car, man. That's some serious criminal shit. You wanna get caught and end up in jail? Keep your nose clean and stay outta trouble.


# Prompting

In [44]:
from llama_index.core.prompts import PromptTemplate
string = (
    "You are a Human Resource Assistant of a company.\n"
    "Your task is to find the fields asked by the Hr from the given context"
    "{context_str}\n"
    "------------------"
    "use the context information and answer the below query\n"
    "answer the question : {query_str}\n" 
    "if you are not getting the answer from the context just return N/A"
)
text_qa_template = PromptTemplate(string)

In [45]:

print(text_qa_template)

metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>} template_vars=['context_str', 'query_str'] kwargs={} output_parser=None template_var_mappings=None function_mappings=None template='You are a Human Resource Assistant of a company.\nYour task is to find the fields asked by the Hr from the given context{context_str}\n------------------use the context information and answer the below query\nanswer the question : {query_str}\nif you are not getting the answer from the context just return N/A'


In [50]:
res = index.as_query_engine(text_qa_template=text_qa_template).query("how many cities are mentioned in the story?")

Embedding Token Usage: 9


In [51]:

print(res)

The story mentions the following cities:
1. Havana
2. Barcelona
3. Vienna
4. Valparaiso
5. Rangoon
6. Galicia
7. Cantabria
8. Alicante
9. Costa Brava

So, a total of 9 cities are mentioned in the story.


In [52]:
res = index.as_query_engine(text_qa_template=text_qa_template).query("Summarize the details of all the cities")

Embedding Token Usage: 9


In [53]:

print(res)

Cities mentioned in the context are Barcelona, Vienna, Rangoon, Valparaiso, Havana, and Carvalleiras.
