In [1]:
### 1. Imports and Setup




In [2]:
import pandas as pd
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_google_genai import GoogleGenerativeAI
import os
import warnings
import pickle

# --- Configuration ---
print("Starting...")
warnings.filterwarnings("ignore", category=DeprecationWarning) 
os.environ["GOOGLE_API_KEY"] = "AIzaSyD3ozLcAIQ-0Asew66iUoVY1xdancVRtDk"
FAISS_INDEX_PATH = "faiss_index"

Starting...


In [3]:
def load_and_process_data():
    """Loads data from Excel and processes it into a list of document strings."""
    print("Loading data from Excel files...")
    try:
        df_data = pd.read_excel("data.xlsx")
        df_forcast = pd.read_excel("Forcast.xlsx")
        print("Data loaded successfully.")
    except FileNotFoundError as e:
        print(f"Error loading Excel file: {e}")
        return None
    except Exception as e:
        print(f"An error occurred while loading data: {e}")
        return None

    documents = []
    print("Processing historical data into documents...")
    for _, row in df_data.iterrows():
        documents.append(
            f"Historical Sale: Site={row.get('Site', 'N/A')}, Date={row.get('Date', 'N/A')}, "
            f"Invoice={row.get('Invoice Number', 'N/A')}, Customer={row.get('Customer Code', 'N/A')} ({row.get('Name', 'N/A')}), "
            f"Item={row.get('Item Code', 'N/A')} ({row.get('Item Description', 'N/A')}), Quantity={row.get('Quantity', 'N/A')}"
        )

    print("Processing forecast data into documents...")
    for _, row in df_forcast.iterrows():
        documents.append(
            f"Forecast: Item Code={row.get('Item Code', 'N/A')}, "
            f"Forecasted Sales Quantity={row.get('Forcasted Sales qty', 'N/A')}"
        ) 
    
    print(f"Data processing complete. Created {len(documents)} documents.")
    return documents

documents = load_and_process_data()

Loading data from Excel files...
Data loaded successfully.
Processing historical data into documents...
Processing forecast data into documents...
Data processing complete. Created 678049 documents.


In [None]:
def create_or_load_vectorstore(docs):
    """Creates a new FAISS vector store or loads it from disk."""
    print("Initializing embeddings model...")
    embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    
    if os.path.exists(FAISS_INDEX_PATH):
        print(f"Loading existing FAISS vector store from '{FAISS_INDEX_PATH}'...")
        # The allow_dangerous_deserialization flag is needed for FAISS with pickle
        vectorstore = FAISS.load_local(FAISS_INDEX_PATH, embeddings_model, allow_dangerous_deserialization=True)
        print("FAISS vector store loaded.")
    else:
        if not docs:
            print("No documents to process. Cannot create vector store.")
            return None
        print("Creating new FAISS vector store...")
        vectorstore = FAISS.from_texts(texts=docs, embedding=embeddings_model)
        print(f"Saving FAISS vector store to '{FAISS_INDEX_PATH}'...")
        vectorstore.save_local(FAISS_INDEX_PATH)
        print("FAISS vector store created and saved.")
    return vectorstore

vector_store = create_or_load_vectorstore(documents)

Initializing embeddings model...


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


Creating new FAISS vector store...


In [None]:
def initialize_qa_chain(vectorstore):
    """Initializes the question-answering chain."""
    if not vectorstore:
        return None
    print("Initializing language model...")
    llm = GoogleGenerativeAI(model="gemini-pro")
    retriever = vectorstore.as_retriever(search_kwargs={'k': 5}) # Retrieve top 5 results
    qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
    print("Question-answering chain created.")
    return qa_chain

qa_chain = initialize_qa_chain(vector_store)

In [None]:
if qa_chain:
    print("\nQA Agent is ready. Ask your questions.")
    
    # --- Example Question ---
    question = "What is the total quantity sold for item code NBOMOATS16X350GPB?"
    print(f"You: {question}")
    
    try:
        answer = qa_chain.invoke({"query": question})
        print(f"Agent: {answer['result']}")
    except Exception as e:
        print(f"An error occurred: {e}")

else:
    print("Could not initialize QA Agent.")