In [1]:
from llama_index.readers.file import PandasExcelReader
import importlib
from pathlib import Path

base_reader = PandasExcelReader()
base_docs = base_reader.load_data(Path("Employee_Sample_Data.xlsx"))

In [2]:
from llama_index.llms.together import TogetherLLM
from llama_index.core import Settings

together_llm = TogetherLLM(model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", 
api_key="87cac61702f938de91651d60977c8a2978163b011f9ba6e991f0e954b9c2c2ce")
Settings.llm = together_llm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
Settings.chunk_size = 1024
Settings.chunk_overlap = 100
Settings.context_window = 8100

In [4]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)
Settings.embed_model = embed_model

In [5]:
import nest_asyncio
nest_asyncio.apply()

In [6]:
from llama_parse import LlamaParse
api_key = "llx-wL9ha5jpi9ZJJXl6eJHhyudfEpbl3braaUoQsMH3kwKluajm"

parser = LlamaParse(
    api_key=api_key,
    result_type="markdown",
)
documents = parser.load_data("./Tv_catalogue.xlsx")

Started parsing the file under job_id c6bdaa59-f9e3-4fca-b0dd-f7517b492474


In [7]:

# %pip install llama-index-vector-stores-faiss

In [8]:
# !pip install llama-index

In [9]:
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()

In [10]:
# %pip install llama-index-vector-stores-qdrant llama-index-readers-file llama-index-embeddings-fastembed llama-index-llms-openai

In [11]:
from llama_index.core import (
    SimpleDirectoryReader,
    StorageContext,
    VectorStoreIndex,
    ServiceContext,
)
from llama_index.vector_stores.qdrant import QdrantVectorStore
import qdrant_client

# Create a folder for local Qdrant Client
client = qdrant_client.QdrantClient(path="qdrant_excel")

# Create a folder text_collection Qdrant VectorStore
text_store = QdrantVectorStore(client=client, collection_name="text_collection")
storage_context = StorageContext.from_defaults(vector_store=text_store)
service_context = ServiceContext.from_defaults(llm=together_llm, chunk_size=1024, chunk_overlap=100, embed_model=embed_model)

# VectorStoreIndex will return index object
index2 = VectorStoreIndex.from_documents(documents, embed_model=embed_model, storage_context=storage_context)

  service_context = ServiceContext.from_defaults(llm=together_llm, chunk_size=1024, chunk_overlap=100, embed_model=embed_model)


In [12]:
from llama_index.core.postprocessor import SentenceTransformerRerank
# Initializing reranking model
rerank = SentenceTransformerRerank(model="cross-encoder/ms-marco-MiniLM-L-2-v2", top_n=7)



In [13]:
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core.prompts import ChatPromptTemplate

In [14]:
chat_text_qa_msgs = [
# Defines the system role message for the chat interaction like guidelines and tone for the AI model.
ChatMessage(
    role=MessageRole.SYSTEM,
    content=("""
        You are a AI Chatbot assistance to answer the question for the excel data i have given.
        Your goal is to answer questions as accurately as possible based on the instructions and context provided.
        After answering to the query at the end tell 'Let me Know if You Have any Queries'
        If the context is Outside the Excel date Don't answer related to the question.
        Just say that, 'The asked question is outside the context of the Excel data, I will answer to the questions related to the excel only'.
        If the context is Outside the Excel Don't try to make up an answer.
        For general questions like 'How are you?' or 'Who are you?', respond accordingly,"""
    ),
),
# Defines the system user message for the chat interaction like query and passing relevent chunks to the AI model.
ChatMessage(
    role=MessageRole.USER,
    content=(
        "Context information is below.\n"
        "---------------------\n"
        "{context_str}\n"
        "---------------------\n"
        "Given the context information and not prior knowledge, "
        "answer the question: {query_str} in bullet points or numbered list where appropriate.\n"
    ),
),
                    ]

In [15]:
text_qa_template = ChatPromptTemplate(chat_text_qa_msgs)

In [16]:
query_engine = index2.as_query_engine(similarity_top_k=3, node_postprocessors=[
                                                            rerank], text_qa_template=text_qa_template)

In [17]:
user_query = "How can you help me"
response__2 = query_engine.query(user_query)
response__2

Response(response='I can help you with the following:\n\n1. **Answering questions about the TV data**: I can provide information about the TVs listed in the context, such as their prices, display sizes, screen types, resolutions, and other features.\n2. **Filtering and sorting data**: I can help you filter the data based on specific criteria, such as brand, price range, or screen size, and sort the results in a specific order.\n3. **Comparing TVs**: I can assist you in comparing the features and prices of different TVs listed in the context.\n4. **Providing summary statistics**: I can provide summary statistics, such as the average price of TVs from a specific brand or the number of TVs with a certain feature.\n5. **Answering specific questions**: I can answer specific questions about the data, such as "What is the price of the Samsung 43 Inches Full HD LED TV?" or "Which TV has the highest speaker output RMS?"\n\nLet me know if you have any specific questions or if there\'s anything e

In [18]:
user_query = "List the Tv brands available"
response__2 = query_engine.query(user_query)
response__2

Response(response='Based on the provided Excel data, the TV brands available are:\n\n1. Samsung\n2. LG\n3. Panasonic\n\nLet me know if you have any queries.', source_nodes=[NodeWithScore(node=TextNode(id_='67fdb07b-767b-4e07-b4c7-5612450e477f', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='4fedd84d-4b58-4186-a7f6-d2fcd004708d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='ff2825b404f1c0f63a1e3740c7ce34b1d334eb810a1a9d8ab42bb3a1c8038323'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='d78a9c08-fd8c-4b43-bc49-305eb27dc4bf', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='21505b245aee5c2278cfaa7fb12b7e7ed3bf2f8b1590deae4a301318c3cb6168')}, text='|Brand    |Product name                                                            |Model                                            |Model Name    |Price  |Display Size|Screen Type|Resolution Standard|No

In [19]:
user_query = "What was the maximum, minimum and average price of the TV"
response__2 = query_engine.query(user_query)
response__2

Response(response='Based on the provided Excel data, here are the maximum, minimum, and average prices of the TVs:\n\n* Maximum Price: ₹ 599,900 (Samsung 75 Inches Ultra HD (4K) LED Smart TV (75MU7000, Black))\n* Minimum Price: ₹ 14,499 (Samsung 32 Inches HD Ready LED Smart TV (Series 4 UA32N4305ARXXL, Black))\n* Average Price: ₹ 143,919.47 (calculated by summing up all the prices and dividing by the total number of TVs)\n\nLet me know if you have any queries.', source_nodes=[NodeWithScore(node=TextNode(id_='67fdb07b-767b-4e07-b4c7-5612450e477f', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='4fedd84d-4b58-4186-a7f6-d2fcd004708d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='ff2825b404f1c0f63a1e3740c7ce34b1d334eb810a1a9d8ab42bb3a1c8038323'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='d78a9c08-fd8c-4b43-bc49-305eb27dc4bf', node_type=<ObjectType.TE

In [20]:
user_query = "Which brand has the highest number of TV models listed?"
response__2 = query_engine.query(user_query)
response__2

Response(response='Based on the provided context information, the answer to the question is:\n\n* LG has the highest number of TV models listed with 11 models.\n* Samsung has 10 models listed.\n* Panasonic has 3 models listed.\n\nLet me know if you have any queries.', source_nodes=[NodeWithScore(node=TextNode(id_='67fdb07b-767b-4e07-b4c7-5612450e477f', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='4fedd84d-4b58-4186-a7f6-d2fcd004708d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='ff2825b404f1c0f63a1e3740c7ce34b1d334eb810a1a9d8ab42bb3a1c8038323'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='d78a9c08-fd8c-4b43-bc49-305eb27dc4bf', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='21505b245aee5c2278cfaa7fb12b7e7ed3bf2f8b1590deae4a301318c3cb6168')}, text='|Brand    |Product name                                                            |Model     

In [21]:
user_query = "List all the model name in the TV Brand Panasonic"
response__2 = query_engine.query(user_query)
response__2

Response(response='Here are the model names of Panasonic TVs listed in bullet points:\n\n• TH-65FX800D\n• TH-32ES480DX\n• TH-32FS600D\n• TH-55FX800D\n• TH-55FX600D\n• TH-49FX730D\n• TH-50FS600D\n• TH-55FX650D\n\nLet me know if you have any queries.', source_nodes=[NodeWithScore(node=TextNode(id_='67fdb07b-767b-4e07-b4c7-5612450e477f', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='4fedd84d-4b58-4186-a7f6-d2fcd004708d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='ff2825b404f1c0f63a1e3740c7ce34b1d334eb810a1a9d8ab42bb3a1c8038323'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='d78a9c08-fd8c-4b43-bc49-305eb27dc4bf', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='21505b245aee5c2278cfaa7fb12b7e7ed3bf2f8b1590deae4a301318c3cb6168')}, text='|Brand    |Product name                                                            |Model                       

In [22]:
user_query = "List all the model name in the TV Brand Sony"
response__2 = query_engine.query(user_query)
response__2

Response(response='Here are the model names of Sony TVs listed in bullet points:\n\n• KD-55X8200E\n• KD-55X9000H\n• KLV-40W562D\n• KDL-43W800D\n• KLV-40W672E\n• KD-65X9300E\n• KD-49X7500E\n\nLet me know if you have any queries.', source_nodes=[NodeWithScore(node=TextNode(id_='67fdb07b-767b-4e07-b4c7-5612450e477f', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='4fedd84d-4b58-4186-a7f6-d2fcd004708d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='ff2825b404f1c0f63a1e3740c7ce34b1d334eb810a1a9d8ab42bb3a1c8038323'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='d78a9c08-fd8c-4b43-bc49-305eb27dc4bf', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='21505b245aee5c2278cfaa7fb12b7e7ed3bf2f8b1590deae4a301318c3cb6168')}, text='|Brand    |Product name                                                            |Model                                            

In [23]:

%pip install llama-index-llms-groq

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.
