In [2]:
#!pip install -Uq langchain langchain_community unstructured[md] faiss-cpu transformers accelerate

In [3]:
import os
from langchain_community.document_loaders import UnstructuredMarkdownLoader, CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [4]:
# Step 2: Load and process data
md_files = ["/content/employee_handbook.md",
            "/content/engineering_master_doc.md",
            "/content/financial_summary.md",
            "/content/market_report_q4_2024.md",
            "/content/marketing_report_2024.md",
            "/content/marketing_report_q1_2024.md",
            "/content/marketing_report_q2_2024.md",
            "/content/marketing_report_q3_2024.md",
            "/content/quarterly_financial_report.md"]

csv_file = "/content/hr_data.csv"


In [5]:
department_mapping = {
    "/content/employee_handbook.md": "General",
    "/content/engineering_master_doc.md": "Engineering",
    "/content/financial_summary.md": "Finance",
    "/content/market_report_q4_2024.md": "Marketing",
    "/content/marketing_report_2024.md": "Marketing",
    "/content/marketing_report_q1_2024.md": "Marketing",
    "/content/marketing_report_q2_2024.md": "Marketing",
    "/content/marketing_report_q3_2024.md": "Marketing",
    "/content/quarterly_financial_report.md": "Finance",
    "/content/hr_data.csv": "HR"
}


In [6]:

# Load Markdown files
documents = []
for file_path in md_files:
    loader = UnstructuredMarkdownLoader(file_path)
    docs = loader.load()
    for doc in docs:
        doc.metadata["department"] = department_mapping.get(file_path, "Unknown")
    documents.extend(docs)

In [7]:
documents

 Document(metadata={'source': '/content/financial_summary.md', 'department': 'Finance'}, page_content="Financial Report for FinSolve Technologies Inc. - 2024\n\nExecutive Summary:\n\n2024 marked a year of both opportunity and challenge for FinSolve Technologies. Despite a robust revenue increase, we saw significant pressure in certain expense categories, notably vendor-related costs and software subscriptions. However, these pressures were balanced by cost-saving measures in operational efficiency, strong gross margin performance, and strategic investment in growth areas. The company is well-positioned to continue scaling its core offerings, but focused attention on cost optimization will be essential for maintaining profitability in the coming years.\n\nYear-Over-Year (YoY) Analysis:\n\nFinSolve Technologies's revenue grew by 25% in 2024, driven largely by the global expansion of its services, especially in Asia and Europe. This was accompanied by a 10% increase in vendor-related expe

In [8]:
# Split Markdown documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_docs = text_splitter.split_documents(documents)

In [9]:
# Load CSV file
csv_loader = CSVLoader(file_path=csv_file)
csv_docs = csv_loader.load()
for doc in csv_docs:
    doc.metadata["department"] = department_mapping.get(csv_file, "Unknown")

In [10]:
# Combine all documents
all_docs = split_docs + csv_docs

In [11]:
all_docs

[Document(metadata={'source': '/content/employee_handbook.md', 'department': 'General'}, page_content='Employee Handbook\n\nTable of Contents\n\nWelcome & Introduction\n\nEmployee Onboarding & Benefits\n\nLeave Policies\n\nWork Hours & Attendance\n\nCode of Conduct & Workplace Behavior\n\nHealth & Safety\n\nCompensation & Payroll\n\nReimbursement Policies\n\nTraining & Development\n\nPerformance & Feedback\n\nPrivacy & Data Security\n\nExit Policy\n\nFAQs\n\nMiscellaneous\n\nWelcome & Introduction\n\nCompany Vision and Mission'),
 Document(metadata={'source': '/content/employee_handbook.md', 'department': 'General'}, page_content='Company Vision and Mission\n\nAt FinSolve Technologies, our vision is to empower businesses and individuals through innovative technology solutions. Our mission is to deliver high-quality, sustainable products and services that create value for our stakeholders.\n\nCore Values\n\nIntegrity: We act with honesty and transparency.\n\nRespect: We value diversity 

In [12]:
# Step 3: Create vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(all_docs, embeddings)

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


In [14]:
import torch
# Step 4: Set up the LLM
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"  )
llm_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=1000)
llm = HuggingFacePipeline(pipeline=llm_pipeline)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=llm_pipeline)


In [20]:
# Step 5: Implement RBAC and RAG
user_department = input("Enter your department (e.g., HR, Finance): ")
filtered_docs = [doc for doc in all_docs if doc.metadata.get("department") == user_department]
if not filtered_docs:
    print(f"No documents found for department: {user_department}")
else:
    filtered_vectorstore = FAISS.from_documents(filtered_docs, embeddings)
    retriever = filtered_vectorstore.as_retriever()
    rag_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

Enter your department (e.g., HR, Finance): Marketing


In [21]:
# Step 6: Run the chatbot
while True:
    user_query = input("Ask a question (or type 'exit' to quit): ")
    if user_query.lower() == "exit":
        break
    result = rag_chain({"query": user_query})
    print("Answer:", result["result"])

Ask a question (or type 'exit' to quit): What are the key higlights of q4


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Answer: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Q4 - Marketing Overview

In Q4 2024, our marketing strategy centered on three core pillars: finalizing market penetration in key demographics, fostering stronger customer relationships through personalized engagement, and laying the groundwork for sustained growth in 2025. Key initiatives included:

Targeted Campaigns: Launched multi-channel campaigns across digital, social media, and traditional platforms to capture end-of-year demand.

Q4 - Projections & Targets

Our projections for Q4 2024 were ambitious yet grounded in historical performance and market analysis. The key targets were:

Customer Acquisition Target: 220,000 new customers

Focused on expanding our retail and enterprise customer base through targeted campaigns and promotional offers.

Revenue Target: $11 million

Driven by increased sales volume durin

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Answer: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Q3 - Projections & Targets

Our Q3 2024 targets were designed to balance growth in new markets with the retention of existing customers. The key projections were:

Customer Acquisition Target: 180,000 new customers

Focused on acquiring customers in Latin America (50%) and existing markets (50%) through digital and offline channels.

Revenue Target: $7.5 million

Driven by increased transaction volume from loyalty programs and new market sales.

Marketing Spend: $2 million

Q3 - Marketing Overview

In Q3 2024, our marketing strategy revolved around two key priorities: strengthening customer retention to ensure long-term loyalty and penetrating Latin American markets to diversify our customer base. Key initiatives included:

Customer Retention Programs: Rolled out enhanced loyalty programs to reward repeat customers a