In [1]:
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from dotenv import load_dotenv

In [2]:
# Load environment variables from .env
load_dotenv()
import os
openai_api_key = os.getenv("OPENAI_API_KEY")

In [3]:
# Load all PDFs in the data folder
data_folder = '../data'
documents = []
for file_name in os.listdir(data_folder):
    if file_name.endswith('.pdf'):
        loader = PyPDFLoader(os.path.join(data_folder, file_name))
        documents.extend(loader.load())

In [4]:
# Split text into manageable chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)

In [5]:
# Split text into manageable chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)

In [6]:
# Create embeddings and vector store
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
vectorstore = Chroma.from_documents(docs, embeddings)

In [7]:
# Set up the LLM and Retrieval-based QA chain
llm = ChatOpenAI(model="gpt-4", openai_api_key=openai_api_key)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever())

In [None]:
# Chatbot interaction
while True:
    query = input("Ask a question about CTSE lecture notes: ")
    if query.lower() == "exit":
        break
    try:
        response = qa_chain.invoke({"query": query})
        print(f"Answer: {response['result']}")
    except Exception as e:
        print(f"An error occurred: {e}")