In [1]:
from dotenv import load_dotenv
load_dotenv()
import os

In [2]:
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings

client = NVIDIAEmbeddings(
  model="nvidia/llama-3.2-nv-embedqa-1b-v2", 
  api_key=os.getenv("NVIDIA_API_KEY"),
  truncate="NONE", 
  )

embedding = client.embed_query("What is the capital of France?")
print(embedding)

[-0.01519775390625, 0.040557861328125, -0.01161956787109375, 0.0278472900390625, 0.04833984375, 0.0193634033203125, -0.032623291015625, -0.0029201507568359375, -0.03680419921875, 0.003387451171875, -0.0019197463989257812, -0.01422119140625, -0.01528167724609375, -0.0010986328125, 0.00099945068359375, 0.017333984375, 0.015106201171875, 0.021514892578125, 0.01508331298828125, -0.004764556884765625, -0.0150909423828125, 0.005458831787109375, -0.0290985107421875, 0.0301055908203125, -0.03314208984375, -0.0019369125366210938, -0.02838134765625, -0.034759521484375, 0.02508544921875, 0.006561279296875, 0.0124664306640625, 0.0196533203125, -0.01392364501953125, 0.035491943359375, -0.0228729248046875, 0.0016717910766601562, 0.0316162109375, -0.01372528076171875, 0.023529052734375, 0.016326904296875, -0.0548095703125, -0.0094757080078125, -0.0188751220703125, -0.03106689453125, -0.01397705078125, -0.020355224609375, -0.034210205078125, 0.03253173828125, -0.00921630859375, 0.015228271484375, 0.01

In [3]:
from langchain_core.documents import Document

docmuents = [
    Document(id="1", page_content="Ate pizza for 20$ in New York City."),
        Document(id="2", page_content="movie for 10 on 10-01-2025"),
        Document(id="3", page_content="food for 5 yesterday"),
]

In [4]:
from langchain.vectorstores import Chroma

# Directory to store Chroma DB
persist_directory = "../vector_db"

# Initialize Chroma with embedding model
vector_db = Chroma(embedding_function=client, persist_directory=persist_directory)


  vector_db = Chroma(embedding_function=client, persist_directory=persist_directory)


In [5]:
vector_db.add_texts([doc.page_content for doc in docmuents])

['3a1384b1-b130-4374-8e34-12c8b613914c',
 'd6f6e5b3-4b88-4b93-b45a-5217ff60da1b',
 '26f19eff-551f-41a1-b742-7ff7419c5bca']

In [6]:
vector_db.similarity_search("what i spend for 20", k=3)

[Document(metadata={}, page_content='Ate pizza for 20$ in New York City.'),
 Document(metadata={}, page_content='food for 5 yesterday'),
 Document(metadata={}, page_content='movie for 10 on 10-01-2025')]

In [7]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq
load_dotenv()

os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')

In [8]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, VectorDBQA

In [9]:
llm = ChatGroq(model='llama-3.3-70b-versatile')

In [10]:
query_prompt = PromptTemplate(
    template=(
        "You are an assistant for a personal expense management tool. "
        "The user may ask questions about their expenses. Interpret the user's query accurately, "
        "extract relevant details, and frame a clear search query for retrieving data "
        "for your information, today's date is {current_date}."
        "keep the output consice"
        "\n\n"
        "User Query: {query} \n\n"
    ),
    input_variables=["query","current_date"]
)
chain = LLMChain(prompt=query_prompt, llm=llm)

  chain = LLMChain(prompt=query_prompt, llm=llm)


In [11]:
qa_chain = VectorDBQA.from_chain_type(
    llm=llm,
    vectorstore=vector_db,
    chain_type="stuff"  # Combine content into the response
)



In [12]:
def query_expenses(user_query):
    # Use LLM to refine query
    refined_query = chain.run(query=user_query, current_date="11-01-2025")

    # Perform semantic search
    results = qa_chain.run(refined_query)

    return results

In [13]:
query = "total how much money did i spend?"

# Process query
response = query_expenses(query)

  refined_query = chain.run(query=user_query, current_date="11-01-2025")
Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


In [14]:
response

'Based on the provided context, let\'s assume the expenses data is stored in a table called "expenses" with the following columns:\n\n- id (primary key)\n- user_id\n- amount\n- category\n- date\n\nAnd the data is:\n\n| id | user_id | amount | category | date       |\n|----|---------|--------|----------|------------|\n| 1  | [user_id] | 20    | food     | (unknown)  |\n| 2  | [user_id] | 5     | food     | 14-01-2025 |\n| 3  | [user_id] | 10    | movie    | 10-01-2025 |\n\nThe SQL query you provided:\n\n```sql\nSELECT SUM(amount) FROM expenses WHERE user_id = [user_id]\n```\n\nWill return the sum of all expenses for the given user_id across all categories and time periods.\n\nThe result would be:\n\n35 (20 for pizza + 5 for food + 10 for movie)\n\nNote: The date for the pizza expense is not provided, so it\'s not possible to retrieve the exact date of the pizza purchase.'

In [15]:
json_res = {"userId": "987h78", "action": "CREATE", "amount": 30.0, "category": "Food", "date": "15-01-2025", "description": "pizza expense of 30 on today (15-01-2025)"}

In [16]:
from langchain_core.documents import Document

doc = Document(
    page_content=json_res["description"],
    metadata={
        "userId": json_res["userId"],
        "action": json_res["action"],
        "amount": json_res["amount"],
        "category": json_res["category"],
        "date": json_res["date"]
    }
)

In [17]:
doc

Document(metadata={'userId': '987h78', 'action': 'CREATE', 'amount': 30.0, 'category': 'Food', 'date': '15-01-2025'}, page_content='pizza expense of 30 on today (15-01-2025)')

In [18]:
vector_db.add_documents([doc])

['19e60c2a-2423-4fec-b47a-255efa7f3e67']

In [19]:
vector_db.similarity_search_with_relevance_scores("what i spend today (15-01-2025)", k=3)

  vector_db.similarity_search_with_relevance_scores("what i spend today (15-01-2025)", k=3)


[(Document(metadata={'action': 'CREATE', 'amount': 30.0, 'category': 'Food', 'date': '15-01-2025', 'userId': '987h78'}, page_content='pizza expense of 30 on today (15-01-2025)'),
  0.18445400992034278),
 (Document(metadata={}, page_content='movie for 10 on 10-01-2025'),
  -0.05016164506345899),
 (Document(metadata={}, page_content='food for 5 yesterday'),
  -0.13889979253722662)]