#This commented code creates a file for vectorstore.faiss which is the vector embeddings of the pdf and website data, this takes time so I have uploaded the embedding files which can be directly uploaded in colab to get the result


In [15]:
# import PyPDF2
# import requests
# from bs4 import BeautifulSoup
# from langchain_community.vectorstores import FAISS
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_community.embeddings import HuggingFaceEmbeddings
# import warnings
# warnings.filterwarnings('ignore')

# def extract_text_from_pdf(pdf_path):
#     extracted_text = ""
#     try:
#         with open(pdf_path, 'rb') as pdf_file:
#             reader = PyPDF2.PdfReader(pdf_file)
#             for page_number, page in enumerate(reader.pages, start=1):
#                 text = page.extract_text()
#                 extracted_text += f"\n--- Page {page_number} ---\n{text}"
#     except Exception as e:
#         print(f"Error reading PDF file: {e}")
#     return extracted_text

# def extract_text_from_website(url):
#     try:
#         response = requests.get(url, verify=False)
#         response.raise_for_status()
#         soup = BeautifulSoup(response.content, 'html.parser')
#         text_content = soup.get_text(separator="\n", strip=True)
#         return text_content
#     except requests.exceptions.RequestException as e:
#         print(f"Error fetching the webpage: {e}")
#         return None
#     except Exception as e:
#         print(f"An error occurred: {e}")
#         return None

# def create_vectorstore():
#     # File paths and URLs
#     pdf_path = "GATE2025_InformationBrochure.pdf"
#     faq_url = "https://gate2025.iitr.ac.in/faqs.html"

#     # Extract text from PDF
#     print("Extracting text from PDF...")
#     pdf_text = extract_text_from_pdf(pdf_path)

#     # Extract text from website
#     print("Extracting text from website...")
#     website_text = extract_text_from_website(faq_url)

#     # Combine texts
#     print("Combining texts...")
#     joined_text = pdf_text + "\n\n" + website_text if website_text else pdf_text

#     # Create text splitter
#     text_splitter = RecursiveCharacterTextSplitter(
#         chunk_size=1000,
#         chunk_overlap=200
#     )

#
#     print("Splitting text into chunks...")
#     chunks = text_splitter.split_text(joined_text)

#
#     print("Creating embeddings...")
#     embeddings = HuggingFaceEmbeddings()

#
#     print("Creating and saving vectorstore...")
#     vectorstore = FAISS.from_texts(chunks, embeddings)
#     vectorstore.save_local("vectorstore.faiss")

#     print("Vectorstore created and saved successfully!")

# if __name__ == "__main__":
#     create_vectorstore()


In [14]:
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage, AIMessage
import pickle
import os
from datetime import datetime

class SimpleGateAgent:
    def __init__(self, groq_api_key):
        self.groq_api_key = groq_api_key
        self.memory = ConversationBufferMemory(
            memory_key="chat_history",
            output_key="answer",
            return_messages=True
        )
        self.load_agent()

    def load_agent(self):
        print("Loading existing vectorstore...")
        embeddings = HuggingFaceEmbeddings()

        self.vectorstore = FAISS.load_local(
            "vectorstore.faiss/",
            embeddings,
            allow_dangerous_deserialization=True
        )

        llm = ChatGroq(
            temperature=0.1,
            groq_api_key=self.groq_api_key,
            model_name="mixtral-8x7b-32768"
        )

        condense_question_prompt = PromptTemplate.from_template(
            """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
            You must strictly check if the question is related to GATE exam. The question must explicitly be about GATE exam, its procedures, dates, eligibility, or any other GATE-specific topic.
            If the question is not explicitly about GATE exam, return exactly "OUT_OF_CONTEXT".
            Do not try to find indirect connections to GATE exam.

            Examples:
            - "When is GATE exam?" -> "When is the GATE exam scheduled?"
            - "What are fees?" -> "What are the GATE exam fees?"
            - "When is fifa worldcup?" -> "OUT_OF_CONTEXT"
            - "Tell me about sports" -> "OUT_OF_CONTEXT"

            Chat History:
            {chat_history}

            Follow Up Input: {question}

            Standalone question:"""
        )

        qa_prompt = PromptTemplate.from_template(
            """You are a GATE exam support agent. You must ONLY answer questions related to the GATE exam using the provided context.
            Pay special attention to information that might be in tables or structured formats in the text.
            Look for dates, schedules, and numerical information that might be presented in tabular form.
            If you find the information in the context, be very specific in your answer.
            If you truly cannot find a relevant answer in the context, or if the question is not about GATE exam,
            respond with exactly "This is out of context question."

            Context: {context}

            Question: {question}

            Chat History: {chat_history}

            Answer:"""
        )

        self.qa_chain = ConversationalRetrievalChain.from_llm(
            llm=llm,
            retriever=self.vectorstore.as_retriever(
                search_kwargs={
                    'k': 5,
                    'fetch_k': 8,
                    'score_threshold': 0.5
                }
            ),
            memory=self.memory,
            return_source_documents=True,
            verbose=False,
            condense_question_prompt=condense_question_prompt,
            combine_docs_chain_kwargs={'prompt': qa_prompt}
        )

    def get_response(self, query):
        try:
            result = self.qa_chain({
                "question": query,
                "chat_history": self.memory.chat_memory.messages
            })

            if result["question"] == "OUT_OF_CONTEXT" or "This is out of context question" in result["answer"]:
                return "This is out of context question"

            if result["source_documents"]:
                sources = "\nReferences:\n" + "\n".join([f"- Page {doc.metadata.get('page', 'N/A')}" for doc in result["source_documents"]])
                return result["answer"] + sources
            return result["answer"]

        except Exception as e:
            return f"An error occurred: {str(e)}"

    def show_chat_history(self):
        messages = self.memory.chat_memory.messages
        if not messages:
            return "No chat history available."

        history = "\nChat History:\n" + "="*50 + "\n"
        for msg in messages:
            if isinstance(msg, HumanMessage):
                history += f"User ({msg.additional_kwargs.get('timestamp', 'N/A')}): {msg.content}\n"
            elif isinstance(msg, AIMessage):
                history += f"Agent ({msg.additional_kwargs.get('timestamp', 'N/A')}): {msg.content}\n"
            history += "-"*50 + "\n"
        return history

    def save_chat_history(self):
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"chat_history_{timestamp}.txt"

        with open(filename, 'w', encoding='utf-8') as f:
            f.write(self.show_chat_history())
        return f"Chat history saved to {filename}"

    def introduce(self):
        return """Hello! I am your GATE Support Agent. I can help you with information about the GATE exam based on the official information brochure and FAQs.
        Please feel free to ask any questions about GATE exam procedures, eligibility, registration, or other related topics.

        Commands available:
        - Type 'history' to view chat history
        - Type 'save' to save chat history to file
        - Type 'exit' to end conversation

        How may I assist you today?"""

def main():
    groq_api_key = "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"

    agent = SimpleGateAgent(groq_api_key)

    print("\n" + "="*50)
    print(agent.introduce())
    print("="*50 + "\n")

    while True:
        query = input("\nYour question (or type 'exit' to end): ").lower()

        if query == 'exit':
            print("\nThank you for using GATE Support Agent. Goodbye!")
            break
        elif query == 'history':
            print(agent.show_chat_history())
            continue
        elif query == 'save':
            print(agent.save_chat_history())
            continue


        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        human_msg = HumanMessage(content=query, additional_kwargs={"timestamp": current_time})

        response = agent.get_response(query)

        ai_msg = AIMessage(content=response, additional_kwargs={"timestamp": current_time})

        agent.memory.chat_memory.messages.extend([human_msg, ai_msg])

        print("\nAgent:", response)

if __name__ == "__main__":
    main()

Loading existing vectorstore...

Hello! I am your GATE Support Agent. I can help you with information about the GATE exam based on the official information brochure and FAQs. 
        Please feel free to ask any questions about GATE exam procedures, eligibility, registration, or other related topics. 
        
        Commands available:
        - Type 'history' to view chat history
        - Type 'save' to save chat history to file
        - Type 'exit' to end conversation
        
        How may I assist you today?


Your question (or type 'exit' to end): when is fifa worldcup?

Agent: This is out of context question

Your question (or type 'exit' to end): when is gate exam?

Agent: The GATE exam is typically scheduled in February every year. However, the exact date may vary and it's best to check the official GATE website for the most accurate information. Please note that the context provided does not contain the specific date for the upcoming GATE exam.

Your question (or type 'e