In [17]:
# Step 1: Import required libraries
from langchain_community.chat_models import ChatOllama
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA  
from PyPDF2 import PdfReader
from langchain.schema import Document


In [None]:
# Step 2: Function to load and extract text from the PDF
def extract_text_from_pdf(pdf_path):
    try:
        pdf_reader = PdfReader(pdf_path)
        text = "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
        if not text:
            raise ValueError("No text found in the PDF!")
        return text
    except Exception as e:
        print(f"Error loading PDF: {e}")
        return None

In [19]:
# Step 4: Function to initialize the language model
def initialize_llm(model_name="llama3.1"):
    return ChatOllama(model=model_name, keep_alive="3h", temperature=0)

In [26]:
llm.invoke("hello!!")

AIMessage(content='Hello!!! How are you today? Is there something I can help you with or would you like to chat?', additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2025-03-22T18:09:42.2618829Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 4098981200, 'load_duration': 67567500, 'prompt_eval_count': 12, 'prompt_eval_duration': 745879000, 'eval_count': 23, 'eval_duration': 3279744000}, id='run-529cb6fb-4508-45ee-b53c-c62425aaf9c1-0')

In [28]:
llm.invoke("What is the location of VIT Chennai?")

AIMessage(content='The location of Vellore Institute of Technology (VIT) Chennai is in Taramani, Chennai, Tamil Nadu, India. Specifically, it is situated at:\n\nVIT Bhopal Campus\nTaramani,\nChennai - 600113.\n\nPlease note that VIT has multiple campuses across the country, including Vellore, Chennai, and Bhopal.', additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2025-03-22T18:10:20.5851648Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 12276715000, 'load_duration': 64216700, 'prompt_eval_count': 19, 'prompt_eval_duration': 1016335000, 'eval_count': 77, 'eval_duration': 11193216000}, id='run-1849a12c-1490-4493-b28a-03303ad583f6-0')

In [21]:
# Step 5: Function to process text and create vector embeddings
def create_vector_store(text):
    docs = [Document(page_content=text, metadata={"source": "faculty_information"})]
    text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
    chunked_documents = text_splitter.split_documents(docs)

    print(f"Total Chunks Created: {len(chunked_documents)}")
    embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=False)
    db = FAISS.from_documents(chunked_documents, embeddings)
    return db

In [22]:
# Step 6: Function to set up the RAG model
def setup_qa_system(vector_store, llm):
    retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={'k': 4})
    return RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")

In [23]:
# Step 7: Function to query the chatbot
def ask_question(qa_system, question):
    response = qa_system({"query": question})
    return response["result"]

In [25]:

# ---- MAIN EXECUTION ----
if __name__ == "__main__":
    print("WELCOME TO VIT ASSIST CHATBOT")

    # Load PDF and initialize components
    pdf_path = "data/VITC_FACULTY.pdf"
    extracted_text = extract_text_from_pdf(pdf_path)

    if extracted_text:
        llm = initialize_llm()
        vector_store = create_vector_store(extracted_text)
        qa_system = setup_qa_system(vector_store, llm)

        # Example Queries
        questions = ["HELLO", "Sreedevi V T has cabin detail", "What is the location of VIT Chennai?"]
        for q in questions:
            print(f"\nUSER: {q}")
            print(f"BOT: {ask_question(qa_system, q)}")
    else:
        print("Chatbot setup failed due to PDF extraction error.")

WELCOME TO VIT ASSIST CHATBOT
Total Chunks Created: 1


  embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=False)



USER: HELLO


  response = qa_system({"query": question})


BOT: Hello! How can I assist you today? You had some faculty details from VIT Chennai that you wanted to refer to, right?

USER: Sreedevi V T has cabin detail
BOT: Dr. Sreedevi V T has a cabin detail of AB1, 7th floor, Annex, 7th Cabin.

USER: What is the location of VIT Chennai?
BOT: I don't know. The provided context only contains faculty details and does not mention the location of VIT Chennai.
