In [2]:
import os
import json # Import the json module
from dotenv import load_dotenv
from langchain_community.document_loaders import TextLoader # We'll still use this for individual text entries
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import FastEmbedEmbeddings
from langchain_groq import ChatGroq
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.schema import Document # Import Document for creating LangChain documents

In [3]:
load_dotenv()

True

In [5]:
# --- 2. Load Your FAQ Data (Updated for your JSON structure) ---
faq_data = []
try:
    with open('Ecommerce_FAQ_Chatbot_dataset.json', 'r', encoding='utf-8') as f:
        full_json_content = json.load(f)
        # Access the list of questions using the 'questions' key
        json_questions_list = full_json_content.get('questions', [])

        if not json_questions_list:
            print("Warning: 'questions' key not found or is empty in faq.json.")

        for item in json_questions_list:
            # Ensure 'item' is a dictionary before trying to .get() from it
            if isinstance(item, dict):
                content = f"Question: {item.get('question', '')}\nAnswer: {item.get('answer', '')}"
                faq_data.append(Document(page_content=content, metadata=item))
            else:
                print(f"Skipping malformed item in JSON (not a dictionary): {item}")

except FileNotFoundError:
    print("Error: 'faq.json' not found. Please create this file with your FAQ content.")
    exit()
except json.JSONDecodeError:
    print("Error: Could not decode 'faq.json'. Please ensure it's a valid JSON file.")
    exit()

documents = faq_data

In [6]:
# --- 3. Split the Documents into Chunks ---
# This part remains largely the same, as it operates on LangChain Document objects.
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)

In [7]:
# --- 4. Create Embeddings and a Vector Store ---
embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
vectorstore = FAISS.from_documents(docs, embeddings)

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/706 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model_optimized.onnx:   0%|          | 0.00/66.5M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

[32m2025-07-07 15:42:40.655[0m | [31m[1mERROR   [0m | [36mfastembed.common.model_management[0m:[36mdownload_model[0m:[36m430[0m - [31m[1mCould not download model from HuggingFace: [WinError 1314] A required privilege is not held by the client: '..\\..\\blobs\\0d7726d0cdccb62ee17c03bd1595cff07199b8f8' -> 'C:\\Users\\hp\\AppData\\Local\\Temp\\fastembed_cache\\models--qdrant--bge-small-en-v1.5-onnx-q\\snapshots\\52398278842ec682c6f32300af41344b1c0b0bb2\\config.json' Falling back to other sources.[0m
[32m2025-07-07 15:42:40.655[0m | [31m[1mERROR   [0m | [36mfastembed.common.model_management[0m:[36mdownload_model[0m:[36m452[0m - [31m[1mCould not download model from either source, sleeping for 3.0 seconds, 2 retries left.[0m


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

In [8]:
# --- 5. Build the Conversational Retrieval Chain with Groq ---
qa = ConversationalRetrievalChain.from_llm(
    llm=ChatGroq(temperature=0.7, model_name="meta-llama/llama-4-scout-17b-16e-instruct"),
    retriever=vectorstore.as_retriever(),
    return_source_documents=True,
    verbose = True
)

In [9]:
# --- 6. Start the Conversation ---
chat_history = []
print("Welcome to the FAQ Bot powered by Groq! Ask me anything. Type 'exit' to end.")

while True:
    query = input("You: ")
    if query.lower() in ['exit', 'quit']:
        break

    result = qa.invoke({"question": query, "chat_history": chat_history})
    answer = result['answer']

    print(f"Bot: {answer}")
    chat_history.append((query, answer))

Welcome to the FAQ Bot powered by Groq! Ask me anything. Type 'exit' to end.


You:  What payment method is accepted?




[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Question: What payment methods do you accept?
Answer: We accept major credit cards, debit cards, and PayPal as payment methods for online orders.

Question: Are my personal and payment details secure?
Answer: Yes, we take the security of your personal and payment details seriously. We use industry-standard encryption and follow strict security protocols to ensure your information is protected.

Question: Can I order by phone?
Answer: Unfortunately, we do not accept orders over the phone. Please place your order through our website for a smooth and secure transaction.

Question: Can I request an invoice for my order?
Answer: Yes, an invoice is usually included with your or

You:  Are website is going to save my card details?




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What payment method is accepted?
Assistant: We accept major credit cards, debit cards, and PayPal as payment methods for online orders.
Follow Up Input: Are website is going to save my card details?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Question: Can I order a product if it is listed as 'discontinued' but still visible on the website?
Answer: If a product is listed as 'discontinued' but still visible on the websi

You:  How do i need to check wheather product is available or not?




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:

Human: What payment method is accepted?
Assistant: We accept major credit cards, debit cards, and PayPal as payment methods for online orders.
Human: Are website is going to save my card details?
Assistant: Yes, we take the security of your personal and payment details seriously. We use industry-standard encryption and follow strict security protocols to ensure your information is protected.
Follow Up Input: How do i need to check wheather product is available or not?
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the

You:  exit
