In [None]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import PyPDFLoader
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

# Load environment variables
_ = load_dotenv(find_dotenv())
openai_api_key = os.getenv('OPENAI_API_KEY')
if not openai_api_key:
    raise ValueError("OPENAI_API_KEY not found in environment variables.")
    
#loading data
def load_pdf_to_retriever(file_path):
    """Loads a PDF file and prepares a retriever using embeddings."""
    loader = PyPDFLoader(file_path)
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    docs = text_splitter.split_documents(documents)

    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
    vector_db = DocArrayInMemorySearch.from_documents(docs, embeddings)

    return vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

#building conversation
def build_conversational_chain(retriever):
    """Creates a conversational retrieval chain with memory."""
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
    return ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

#starting chat
def start_chat(chatbot):
    """Starts a command-line chatbot interface."""
    print("Welcome to the PDF Question-Answering Chatbot!")
    print("Type 'exit' to quit.")
    while True:
        user_input = input("You: ")
        if user_input.strip().lower() == 'exit':
            print("Chatbot: Thanks for chatting! Goodbye!")
            break
        response = chatbot({"question": user_input})
        print("Chatbot:", response.get("answer", "Sorry, I couldn't process that."))

#function call        
if __name__ == "__main__":
    pdf_file_path = "docs/cs229_lectures/MachineLearning-Lecture01.pdf"  # Update path as needed
    retriever = load_pdf_to_retriever(pdf_file_path)
    chatbot = build_conversational_chain(retriever)
    start_chat(chatbot)


Welcome to the PDF Question-Answering Chatbot!
Type 'exit' to quit.
You: What is the document about?
Chatbot: The document seems to be about a computer science class, specifically discussing the prerequisites for the class and mentioning topics like big-O notation, data structures, and programming skills.
