In [30]:
import warnings
warnings.filterwarnings("ignore")
import os
os.environ['CURL_CA_BUNDLE'] = ""

In [31]:
from llama_index.llms.ollama import Ollama

# llm = Ollama(model="llama3:8b-instruct-q8_0", request_timeout=120.0, temperature=0.1)
llm = Ollama(model="llama3.1:8b-instruct-q8_0", request_timeout=150.0, temperature=0.1)

llm.temperature = 0.1

In [32]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding()

In [33]:
from llama_index.core import Settings
from llama_index.core import PromptTemplate
from llama_index.core.agent import ReActAgent
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.vector_stores import MetadataFilters
from llama_index.core import VectorStoreIndex
from llama_index.core.vector_stores.types import ExactMatchFilter, MetadataFilters, MetadataFilter
from llama_index.core.vector_stores import FilterOperator, FilterCondition
from typing import List, Optional
from llama_index.core.vector_stores import FilterCondition
from llama_index.core.tools import FunctionTool

Settings.llm = llm
Settings.embed_model = embed_model

In [34]:
import nest_asyncio

nest_asyncio.apply()

In [35]:
transcript_directory = r"./data/Sandy_blogspot_pdf//"

# Add filename, post_title and post_year as metadata to each chunk associated with a document/transcript
filename_fn = lambda filename: {'post_title': os.path.splitext(os.path.basename(filename))[0].split('__')[0],
                               'post_year': os.path.splitext(os.path.basename(filename))[0].split('__')[1]}

documents = SimpleDirectoryReader(transcript_directory, filename_as_id=True, 
                                  file_metadata=filename_fn).load_data()

In [36]:
documents[12].metadata
# ----------------------
# {'page_label': '1',
#  'file_name': 'A decade of transition__2020.pdf',
#  'post_title': 'A decade of transition',
#  'post_year': '2020'}

{'page_label': '1',
 'file_name': 'A decade of transition__2020.pdf',
 'post_title': 'A decade of transition',
 'post_year': '2020'}

In [37]:
splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)
sandy_index = VectorStoreIndex(nodes)

In [38]:
# print(nodes[0].get_content(metadata_mode='all'))

In [39]:
filters = MetadataFilters(
    filters=[
        MetadataFilter(key="post_year", value="2018"),
        MetadataFilter(key="post_title", value="Dreams Do Come True")
    ],
    condition=FilterCondition.OR 
)

query_engine_test = sandy_index.as_query_engine(
    similarity_top_k=5,
    filters=filters,
)

In [40]:
response = query_engine_test.retrieve(
    "What is mentioned about Ford EcoSport?"
)

# response = query_engine_test.query(
#     "What is mentioned about Ford EcoSport?"
# )
# print(str(response))

# print(str(response))

In [41]:
for i in response:
    print(i.metadata)

{'page_label': '4', 'file_name': 'Car Drives - So Far__2018.pdf', 'post_title': 'Car Drives - So Far', 'post_year': '2018'}
{'page_label': '1', 'file_name': 'Road Trip to Pondicherry__2018.pdf', 'post_title': 'Road Trip to Pondicherry', 'post_year': '2018'}
{'page_label': '1', 'file_name': 'Car Drives - So Far__2018.pdf', 'post_title': 'Car Drives - So Far', 'post_year': '2018'}
{'page_label': '3', 'file_name': 'Car Drives - So Far__2018.pdf', 'post_title': 'Car Drives - So Far', 'post_year': '2018'}
{'page_label': '3', 'file_name': 'Dreams Do Come True__2018.pdf', 'post_title': 'Dreams Do Come True', 'post_year': '2018'}


In [42]:
# for n in response.source_nodes:
#     print(n.metadata)

### Define a Auto-Retrival Tool

In [43]:
def post_specific(input: str, post_year: Optional[List[str]] = [], post_title: Optional[List[str]] = [], conditionType: Optional[str] = 'AND') -> str:
    metadata_dicts = [
        {'key': 'post_year', 'value': p} for p in post_year
    ] + [
        {'key': 'post_title', 'value': p} for p in post_title
    ]
       
    if conditionType == 'OR':
        conditionType = FilterCondition.OR
    else:
        conditionType = FilterCondition.AND
    
    query_engine = sandy_index.as_query_engine(
        similarity_top_k=5,
        filters=MetadataFilters.from_dicts(
            metadata_dicts,
            condition=conditionType
        )
    )
    
    response = query_engine.query(input)
    return response


post_specific_tool = FunctionTool.from_defaults(
    name='post_specific',
    description=(
                "Use this tool to fetch year or title specific blog articles.\
                Provide the original query as input to the tool to get relevant chunks"
            ),
    fn=post_specific
)

In [44]:
sandy_engine = sandy_index.as_query_engine(
    similarity_top_k=5
)

In [47]:
query_engine_tools = [
    QueryEngineTool(
        query_engine=sandy_engine,
        metadata=ToolMetadata(
            name="sandy",
            description=(
                "Provides generic information about Sandy or author blog post articles. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    ),
    post_specific_tool
]

In [48]:
context = """\
You are helpful assistant and provided with blog posts articles.
Answer the questions asked based on the blogposts.
"""

agent = ReActAgent.from_tools(
    query_engine_tools,
    llm=llm,
    verbose=True,
    context=context
)

In [49]:
{'page_label': '1', 'file_name': 'Dreams Do Come True__2018.pdf', 'post_title': 'Dreams Do Come True', 'post_year': '2018'}
{'page_label': '5', 'file_name': 'Inch by Inch__2018.pdf', 'post_title': 'Inch by Inch', 'post_year': '2018'}
{'page_label': '1', 'file_name': 'Car Drives - So Far__2018.pdf', 'post_title': 'Car Drives - So Far', 'post_year': '2018'}
{'page_label': '3', 'file_name': 'Inch by Inch__2018.pdf', 'post_title': 'Inch by Inch', 'post_year': '2018'}
{'page_label': '2', 'file_name': 'Inch by Inch__2018.pdf', 'post_title': 'Inch by Inch', 'post_year': '2018'}

{'page_label': '2',
 'file_name': 'Inch by Inch__2018.pdf',
 'post_title': 'Inch by Inch',
 'post_year': '2018'}

In [52]:
%%time
agent.reset()
response = agent.query("Give me name of the article which was published in 2017 and again in 2021?")

print('\n\n---'+str(response))

> Running step 2d8644e7-b919-402b-bab5-a329f543fdeb. Step input: Give me name of the article which was published in 2017 and again in 2021?
[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me answer the question.
Action: post_specific
Action Input: {'input': 'article published in 2017 and again in 2021', 'post_year': ['2017', '2021'], 'conditionType': 'AND'}
[0m[1;3;34mObservation: Empty Response
[0m> Running step 6a7268c7-983a-402c-8eb7-bb9c2e4dc511. Step input: None
[1;3;38;5;200mThought: The tool did not provide any relevant information. I need to try another approach.
Action: post_specific
Action Input: {'input': 'article published in 2017 and again in 2021', 'post_title': [], 'conditionType': 'AND'}
[0m[1;3;34mObservation: There is no article that was published in both 2017 and 2021. However, there are two articles from different years with similar themes.

One article, titled "Are You Giving Your Best", was published in 2017.

In [None]:
# Dreams Do Come True - was published in 2018 - so we call this plus all articles in 2020

In [55]:
%%time
agent.reset()
response = agent.query("Give one line summary for each of the following articles - \
1. articles published in 2020 and \
2. article titled - 'Dreams Do Come True'?")

print('\n\n---'+str(response))

> Running step 1be30c00-dce7-4d2e-9ede-2e8720ba264b. Step input: Give one line summary for each of the following articles - 1. articles published in 2020 and 2. article titled - 'Dreams Do Come True'?
[1;3;38;5;200mThought: The user has asked me to provide a one-line summary for two specific articles. I need to use a tool to help me answer the question.
Action: post_specific
Action Input: {'input': "Provide a one-line summary for each of the following articles - 1. articles published in 2020 and 2. article titled - 'Dreams Do Come True'?", 'post_year': ['2020'], 'post_title': ['Dreams Do Come True'], 'conditionType': 'OR'}
[0m[1;3;34mObservation: Here are the summaries:

1. Articles published in 2020:
The author reflects on their experiences with rejections in 2019, including job opportunities, higher studies, and marriage proposals, and how they have grown from these setbacks.

2. Article titled - 'Dreams Do Come True':
The author shares stories of how their childhood dreams and wi