In [7]:
# 0) Imports 
import os
import pandas as pd 
from pathlib import Path

# A LangChain container that holds text and metadata
from langchain.schema import Document  

# Splits long text into overlapping chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

# fast vector index for similarity serach
from langchain_community.vectorstores import FAISS

# calls OpenAI's embedding API to convert text in to vectors 
#wraps OpenAI's chat completion model
from langchain_openai import OpenAIEmbeddings,ChatOpenAI

# safely dienes prompts with placeholders
from langchain.prompts import PromptTemplate

#Prebuilt chain that wires together retriver + LLM
from langchain.chains import RetrievalQA

In [8]:
with open(r"C:\Users\fayab\Desktop\AI\GENAI\API_Keys\OPENAI_API_KEY.txt") as f:
    OPENAI_API_KEY=f.read().strip()

In [10]:
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [11]:
df = pd.read_csv(r"C:\Users\fayab\Desktop\AI\GENAI\Customer_Support_Training_Dataset.csv")

In [12]:
docs = [
    Document(page_content = f"instruction: {row['instruction']} I response: {row['response']}")
    for _, row in df.iterrows()
]

In [13]:
# 3. Chunking 
splitter = RecursiveCharacterTextSplitter(
    chunk_size =500,
    chunk_overlap=20)
chunks = splitter.split_documents(docs)

In [14]:
# 4. embeddings 
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [16]:
#create a FAISS index 
vectordb = FAISS.from_documents(chunks,embeddings)
retriever = vectordb.as_retriever(search_kwargs={"k":4})

In [17]:
# 5. Prompt and LLM setup
prompt = PromptTemplate.from_template("""You are a concise support assistant.
    Answer only using the context. If unsure, say: "I don't know based on provided documents."
    Cite sources like [Doc 1].

    Context:
    {context}

    Question:
    {question}

    Answer:"""
)

In [25]:
# use an OpenAI model
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2)

# crate the final RetrievelAQ chain with embedding + LLM
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt},
    return_source_documents=True
)

In [30]:
# 6. simple chat 
def chat():
    print("\n Chat with the RAGbot ( type 'exit' to stop)")
    while True:
        q = input("\nYou:").strip()
        if q.lower() in {"exit","quit"}:
            break
        result = qa({"query": q})
        print("\nAssistant:\n", result["result"])

        print("\nSource used:")
        for i, doc in enumerate(result.get("source_documents", []), start=1):
            print(f"[Doc {i}]")
            print(doc.page_content[:200], "...\n")