In [1]:
import pandas as pd
from langchain_core.documents import Document

In [2]:
df = pd.read_csv('data/ford.csv')
df.head()

Unnamed: 0,model,year,price,transmission,mileage,fuelType,tax,mpg,engineSize
0,Fiesta,2017,12000,Automatic,15944,Petrol,150,57.7,1.0
1,Focus,2018,14000,Manual,9083,Petrol,150,57.7,1.0
2,Focus,2017,13000,Manual,12456,Petrol,150,57.7,1.0
3,Fiesta,2019,17500,Manual,10460,Petrol,145,40.3,1.5
4,Fiesta,2019,16500,Automatic,1482,Petrol,145,48.7,1.0


In [3]:
df.columns

Index(['model', 'year', 'price', 'transmission', 'mileage', 'fuelType', 'tax',
       'mpg', 'engineSize'],
      dtype='object')

In [4]:
df.shape

(17966, 9)

In [5]:
df = df[['model', 'year', 'price', 'transmission', 'mileage', 'fuelType','engineSize']]
df.head()

Unnamed: 0,model,year,price,transmission,mileage,fuelType,engineSize
0,Fiesta,2017,12000,Automatic,15944,Petrol,1.0
1,Focus,2018,14000,Manual,9083,Petrol,1.0
2,Focus,2017,13000,Manual,12456,Petrol,1.0
3,Fiesta,2019,17500,Manual,10460,Petrol,1.5
4,Fiesta,2019,16500,Automatic,1482,Petrol,1.0


In [105]:
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [117]:
df['price'] = df['price'].astype(float)

In [118]:
product_list = []
for index,row in df.iterrows():
    object = {
            "model": row["model"],
            "year": row["year"],
            "price": row["price"]
    }
    
    product_list.append(object)

In [120]:
product_list[0]

{'model': 'Fiesta', 'year': 2017, 'price': 12000.0}

In [121]:
len(product_list)

17966

In [122]:
product_list[0]

{'model': 'Fiesta', 'year': 2017, 'price': 12000.0}

In [123]:
from langchain_core.documents import Document

In [125]:
docs = []
for entry in product_list:
    metadata = {"price": entry['price'],"year":entry['year']}
    page_content= entry['model']
    doc = Document(page_content=page_content, metadata= metadata)
    docs.append(doc) 

In [126]:
docs[0]

Document(metadata={'price': 12000.0, 'year': 2017}, page_content='Fiesta')

In [127]:
len(docs)

17966

In [None]:
GROQ_API_KEY = "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"
ASTRA_DB_API_ENDPOINT = "https://6d54ca08-c508-45df-ad5b-0f122ce29c11-us-east-2.apps.astra.datastax.com"
ASTRA_DB_APPLICATION_TOKEN = "AstraCS:csCACaOZiZZWBDkWjLvlJQaz:a056cc5ee7331aa529cb6c642487b8301c002bc0dc9bb8b07c0faa6b20fb0124"
ASTRA_DB_KEYSPACE = "default_keyspace"
HF_TOKEN = "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"

In [129]:
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

In [130]:
embedding = HuggingFaceInferenceAPIEmbeddings(api_key=HF_TOKEN,model_name="BAAI/bge-base-en-v1.5")

In [131]:
from langchain_astradb import AstraDBVectorStore

In [103]:
vstore = AstraDBVectorStore(
    embedding = embedding,
    collection_name = "ford_bot",
    api_endpoint = ASTRA_DB_API_ENDPOINT,
    token = ASTRA_DB_APPLICATION_TOKEN,
    namespace = ASTRA_DB_KEYSPACE 
)

In [134]:
#insert_ids = vstore.add_documents(docs)
# for doc in docs:
#     try:
#        insert_ids = vstore.add_documents([doc])
#     except ValueError as e:
#         print(f"Error in document: {doc}")
#         print(e)

In [135]:
from langchain_groq import ChatGroq

In [136]:
model = ChatGroq(groq_api_key = GROQ_API_KEY, model = "llama-3.1-70b-versatile", temperature=0.5)

In [137]:
retriever_prompt = ("Given a chat history and the latest user question which might reference context in the chat history,"
    "formulate a standalone question which can be understood without the chat history."
    "Do NOT answer the question, just reformulate it if needed and otherwise return it as is.")

In [74]:
retriever = vstore.as_retriever(search_kwargs = {"k":3})

In [138]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [139]:
contextualize_q_prompt = ChatPromptTemplate.from_messages([
    ("system", retriever_prompt),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
])

In [140]:
history_aware_retriever = create_history_aware_retriever(model,retriever,contextualize_q_prompt)

In [141]:
# PRODUCT_BOT_TEMPLATE = """
#     Your ecommercebot bot is an expert in product recommendations and customer queries.
#     It analyzes product model and price to provide accurate and helpful responses.
#     Ensure your answers are relevant to the product context and refrain from straying off-topic.
#     Your responses should be concise and informative.

#     CONTEXT:
#     {context}

#     QUESTION: {input}

#     YOUR ANSWER:

#     """

In [174]:
PRODUCT_BOT_TEMPLATE = """
    List the prices of the best Ford car models in bullet points. For each model, include:
    - Car model name
    - Price range
    - Unique feature or key selling point
    CONTEXT:
    {context}

    QUESTION: {input}

    YOUR ANSWER:

 """

In [175]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", PRODUCT_BOT_TEMPLATE),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}")
    ]
)

In [176]:
question_answer_chain = create_stuff_documents_chain(model, qa_prompt)

In [177]:
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [178]:
chat_history = []

In [179]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [180]:
store = {}

In [181]:
store

{}

In [182]:
def get_session_history(session_id: str)-> BaseChatMessageHistory:
  if session_id not in store:
    store[session_id]= ChatMessageHistory()
  return store[session_id]

In [183]:
chat_with_memory = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_message_key = "input",
    history_messages_key = "chat_history",
    output_messages_key = "answer"
)

In [151]:
chat_with_memory.invoke(
    {"input":"can you tell me the best ford model?"},
    config={
        "configurable": {"session_id": "murali"}
    },  
)["answer"]

"The best Ford model is subjective and depends on personal preferences. However, the Ford Mustang is a popular and iconic model known for its performance, style, and affordability. It's available in various trims, including the EcoBoost, GT, and Shelby GT500, offering options for different budgets and driving experiences."

In [184]:
chat_with_memory.invoke(
    {"input":"can you tell me the best ford car price?"},
    config={
        "configurable": {"session_id": "murali"}
    },  
)["answer"]

'Here are some of the best Ford car models, along with their prices and unique features:\n\n* **Ford Mustang Shelby GT500**: $72,900 - $80,000\n\t+ Unique feature: 760-horsepower supercharged V8 engine, making it one of the fastest production cars on the market.\n* **Ford Mustang Mach 1**: $52,400 - $60,000\n\t+ Unique feature: A high-performance variant of the Mustang, with a 460-horsepower V8 engine and unique styling cues.\n* **Ford Mustang GT Convertible**: $44,400 - $55,000\n\t+ Unique feature: A convertible version of the iconic Mustang, with a 460-horsepower V8 engine and a retractable soft top.\n* **Ford Mustang EcoBoost**: $27,400 - $35,000\n\t+ Unique feature: A more affordable and fuel-efficient version of the Mustang, with a 310-horsepower turbocharged engine.\n* **Ford Mustang GT Fastback**: $36,400 - $45,000\n\t+ Unique feature: A high-performance version of the Mustang, with a 460-horsepower V8 engine and a sleek fastback design.\n\nNote: Prices may vary depending on loc