In [None]:
import os
import shutil
from dotenv import load_dotenv
import gradio as gr
import requests
from typing import List, Dict

In [None]:
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain.schema import Document
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [None]:
MODEL = "gpt-4o-mini"
DB_DIR = "vector_db"
RELEVANCE_THRESHOLD = 0.3

In [None]:
load_dotenv(override=True)
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "your-api-key")
SERPER_API_KEY = os.getenv("SERPER_API_KEY", "your-serper-api-key")

In [None]:
folders = ["knowledge-base"]
documents = []

for folder in folders:
    loader = DirectoryLoader(
        folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs={"encoding": "utf-8"}
    )
    documents.extend(loader.load())

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)
print(f"Loaded {len(chunks)} chunks from knowledge base.")

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

# Reset vector db
if os.path.exists(DB_DIR):
    shutil.rmtree(DB_DIR)

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=DB_DIR)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

In [None]:
collection = vectorstore._collection
count = collection.count()

sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"There are {count:,} vectors with {dimensions:,} dimensions in the vector store")

In [None]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

llm = ChatOpenAI(model=MODEL, temperature=0)

prompt = ChatPromptTemplate.from_template("""
Answer the question based on the context below in a natural, conversational way.
If you cannot answer based on the context, say so briefly.

Context: {context}

Question: {input}

Answer:""")

document_chain = create_stuff_documents_chain(llm, prompt)
qa_chain = create_retrieval_chain(retriever, document_chain)

In [None]:
def serper_search(query: str, num_results: int = 5) -> List[Dict]:
    """Perform web search using Serper API with FAST University Karachi context"""
    url = "https://google.serper.dev/search"
    
    # Add FAST University Karachi context to make searches more relevant
    enhanced_query = f"{query} FAST University Karachi"
    
    payload = {
        "q": enhanced_query,
        "num": num_results
    }
    
    headers = {
        "X-API-KEY": SERPER_API_KEY,
        "Content-Type": "application/json"
    }
    
    try:
        response = requests.post(url, json=payload, headers=headers, timeout=10)
        response.raise_for_status()
        results = response.json()
        
        search_results = []
        if "organic" in results:
            for result in results["organic"]:
                search_results.append({
                    "title": result.get("title", ""),
                    "snippet": result.get("snippet", ""),
                    "link": result.get("link", "")
                })
        
        return search_results
    except Exception as e:
        print(f"Error in Serper search: {e}")
        return []

In [None]:
def is_rag_response_relevant(source_documents: List[Document], answer: str, threshold: float = RELEVANCE_THRESHOLD) -> bool:
    """
    Determine if RAG retrieved relevant documents and provided a useful answer.
    Adjusted to trust RAG more (60% confidence in RAG).
    """
    if not source_documents:
        return False
    
    total_content_length = sum(len(doc.page_content.strip()) for doc in source_documents)
    if total_content_length < 80:  # Lower threshold to trust RAG more
        return False
    
    # Only reject if answer explicitly says it cannot answer
    strong_refusal_phrases = [
        "cannot answer",
        "don't have information",
        "no information",
        "context does not provide",
        "not available in the context"
    ]
    
    answer_lower = answer.lower()
    for phrase in strong_refusal_phrases:
        if phrase in answer_lower:
            return False
    
    # Trust short answers more (reduced from 50 to 30)
    if len(answer.strip()) < 30:
        return False
    
    return True

In [None]:
def answer_with_web_search(question: str, search_results: List[Dict]) -> str:
    """Use LLM to answer question based on web search results in a conversational way"""
    llm = ChatOpenAI(model=MODEL, temperature=0.3)
    
    context = "\n\n".join([
        f"Source: {r['title']}\n{r['snippet']}\nURL: {r['link']}"
        for r in search_results
    ])
    
    prompt = f"""Based on the web search results below, answer the question naturally as if you're having a conversation.
Don't mention that you're using web search results. Just provide a helpful, conversational answer.

Question: {question}

Search Results:
{context}

Answer:"""
    
    response = llm.invoke(prompt)
    return response.content