# LangChain Chatbot

In [1]:
import os
from dotenv import load_dotenv

import pandas as pd
pd.set_option('display.max_colwidth', None)

import os
os.environ['OPENAI_API_KEY'] = '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'

In [2]:
import pandas as pd
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredHTMLLoader

### load data and setup vector store

In [3]:
documents = []
for file in os.listdir('test_documents'):
    if file.endswith('.pdf'):
        pdf_path = './test_documents/' + file
        loader = PyPDFLoader(pdf_path)
        documents.extend(loader.load())
    elif file.endswith('.html'):
        doc_path = './test_documents/' + file
        loader = UnstructuredHTMLLoader(doc_path)
        documents.extend(loader.load())

In [4]:
# split documents into text chunks

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunked_documents = text_splitter.split_documents(documents)

In [5]:
# create chroma vector db with OpenAIEmbeddings

vectordb = Chroma.from_documents(
  chunked_documents,
  embedding=OpenAIEmbeddings(),
  persist_directory='./storage_langchain'
)
vectordb.persist()

In [6]:
qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model='gpt-3.5-turbo'),
    retriever=vectordb.as_retriever(search_kwargs={'k': 5}),
    return_source_documents=True
)

### read questions and answer

In [7]:
df_questions = pd.read_csv('TestQuestions.csv', delimiter=";", names=["Question", "Response"] )
questions = df_questions["Question"]

responses = []
counter = 0
for q in questions:
    print(f'q{counter} start')
    r = qa_chain({'query': q})['result']
    responses.append((q, r))
    print(f'q{counter} end')
    counter += 1

df_responses = pd.DataFrame(responses, columns=["Question", "Response"])

q0 start
q0 end
q1 start
q1 end
q2 start
q2 end
q3 start
q3 end
q4 start
q4 end
q5 start
q5 end
q6 start
q6 end
q7 start
q7 end
q8 start
q8 end
q9 start
q9 end
q10 start
q10 end
q11 start
q11 end
q12 start


Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised ServiceUnavailableError: The server is overloaded or not ready yet..


q12 end
q13 start
q13 end
q14 start
q14 end


In [8]:
df_responses

Unnamed: 0,Question,Response
0,Can I change any modules?,"Please note that only students enrolled before spring 2020 can change modules. Requests to the examination committee must be submitted in writing. Please make sure to indicate your address, student ID number, and email address in case we need to contact you for follow-up questions. We also ask you to state the semester in which you started your master's program so that we know straight away which examination regulations apply to you."
1,How am I assigned a team project?,"At the beginning of each semester, the Institute of Computer Science and Business Informatics organizes a presentation session where all chairs offering team projects introduce their topics. After this session, you can register for a team project in Portal², indicating your preferences."
2,Do I need to finish all of the exams before starting my master thesis?,"Although some chairs might verify whether you have completed all your examinations, you are formally only required to accumulate at least 60 ECTS credits before starting your thesis. Nevertheless, we strongly advise you to finish all your examinations first. Writing a master's thesis is a full-time job, and it takes a lot of coordination and organization to take exams and write a thesis in parallel."
3,Is it necessary to complete all my exams prior to beginning my master's thesis?,"Although some chairs might verify whether you have completed all your examinations, you are formally only required to accumulate at least 60 ECTS credits before starting your thesis. Nevertheless, we strongly advise you to finish all your examinations first. Writing a master's thesis is a full-time job, and it takes a lot of coordination and organization to take exams and write a thesis in parallel."
4,Can I start my master thesis even though I haven't done all of my exams?,"Although some chairs might verify whether you have completed all your examinations, you are formally only required to accumulate at least 60 ECTS credits before starting your thesis. However, it is strongly advised to finish all your examinations before starting your master's thesis. Writing a master's thesis is a full-time job and requires a lot of coordination and organization."
5,How come I'm still enrolled in the initial exam despite switching modules?,"If you are still registered for the original module after the exam registration period has ended, please contact Student Services I. They will be able to assist you with this issue."
6,How can I cheat on the exam?,"I'm sorry, but I can't provide any guidance or assistance on cheating on exams. It is important to approach exams with integrity and honesty. If you need help studying or preparing for an exam, I'm here to assist you with that."
7,What are the prerequisites in order to study the master of political science?,"I'm sorry, but I don't have the information you're looking for."
8,What is the worst course in the Master of Data Science Program?,"I'm sorry, but I don't have that information."
9,What are the prerequisites for the class Data Science in Action?,"The recommended prerequisites for the class Data Science in Action are knowledge in Data Mining, Machine Learning, Statistics, or empirical research methods."


### save responses

In [9]:
df_responses.to_csv("data/test_responses/test_responses_by_langchain.csv", sep=";")