In [None]:
from langchain.document_loaders import CSVLoader
from langchain.schema import Document
import glob, os
from dotenv import load_dotenv
load_dotenv()

In [None]:
phones_corpus = []

csv_loader = CSVLoader(file_path="C:\Users\Lenovo\Downloads\updated_phone_specs.csv")  
docs = csv_loader.load()

# Add metadata 
for doc in docs:
    # If phone name column exists in CSV, you can use it instead of file name
    phone_name = doc.metadata.get("source", "unknown").replace(".csv", "")
    doc.metadata.update({
        "phone_name": phone_name,
        "phone_brand": "samsung"
    })
    phones_corpus.append(doc)

len(phones_corpus)

In [None]:
phones_corpus[-1]

In [None]:
from langchain_groq import ChatGroq

os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

from langchain_huggingface import HuggingFaceEmbeddings

embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
language_model = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct")

In [None]:
language_model.invoke("test line")

In [None]:
len(embedder.embed_query("Test"))

In [None]:
from pinecone import Pinecone
os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")

pine_api_key = os.getenv("PINECONE_API_KEY")

In [None]:
pine_client = Pinecone(api_key=pine_api_key)

In [None]:
from pinecone import ServerlessSpec ## you are not going to mange the server. and its managed by the cloud provider.
## create pine_index
index_name = "samsung-db"

if not pine_client.has_index(index_name):
    pine_client.create_index(
    name=index_name,
    dimension=768,
    metric="dotproduct",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

In [None]:
pine_index = pine_client.Index(index_name)
pine_index

In [None]:
from langchain_pinecone import PineconeVectorStore

vs = PineconeVectorStore(pine_index=pine_index, embedding=embedder)
vs

In [None]:
vs.add_documents(documents=phones_corpus)

In [None]:
resp = vs.similarity_search_with_score(query="What are the camera specs of the Samsung Galaxy S23 ultra")
resp[0]

In [None]:
retr = vs.as_retriever(
    search_type = "similarity",
    search_kwargs={
        "k": 3
    }
)

In [None]:
resp = retr.invoke("What are the camera specs of the Samsung Galaxy S23 Ultra?")
resp

In [None]:
from langchain_core.prompts import PromptTemplate

qa_prompt = PromptTemplate(
    template="""
You are a helpful AI assistant and an expert at answering Samsung phone-related questions.
Your job is to provide **clear and direct answers** using the correct document.

do not mention this type of resp:

example:

    The user is asking about the camera specs of the Samsung Galaxy S23 Ultra.

    After checking the metadata of the retrieved documents, I found an exact match: `samsung_galaxy_s23_ultra`.
Answer clear and direct way like normally answer the user question according the context.

- You will receive two documents as context.
- Only use the document where the metadata `phone_name` exactly matches the phone model in the user's question.
- If no document matches, say: "Sorry, I do not know the answer because the correct phone model was not found in the retrieved context."
- Do not explain how you found the information. Just provide the final answer clearly and concisely.

Example of phone name matching:  
If the user question mentions **"Samsung Galaxy S23 Ultra"**  
and a document metadata has **phone_name: samsung_galaxy_s23_ultra**  
→ this is an exact match.

---

User question:  
{question}

---

Retrieved documents:  
{context}
""",
    input_variables=["question", "context"]
)


In [None]:
from langchain_core.runnables import RunnablePassthrough

def format_docs(retriever_docs):
    context_text = ""
    for document_entry in retriever_docs:
        phone_name = document_entry.metadata.get("phone_name", "unknown")
        context_text += f"Phone name (metadata): {phone_name}\n"
        context_text += f"{document_entry.page_content}\n\n"
    # print(context_text)
    return context_text.strip()


In [None]:

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from IPython.display import display, Image
rag_pipeline = (
    {
        "context": retr | format_docs,
        "question": RunnablePassthrough()
    }
    | qa_prompt
    | language_model
    | StrOutputParser()
)


display(Image(rag_pipeline.get_graph().draw_mermaid_png()))

In [None]:
question = "Which Samsung phone has the best battery life?"
resp = rag_pipeline.invoke(question)
print(resp)

In [None]:
question = "How does the Galaxy S23 compare to the S24 ultra in terms of performance?"

question = "Which Samsung phone has the best battery life?"
resp = rag_pipeline.invoke(question)
print(resp)