# LangChain Chatbot (with chat history)

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

import pandas as pd
pd.set_option('display.max_colwidth', None)

In [2]:
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredHTMLLoader, UnstructuredPDFLoader
from langchain import PromptTemplate

In [3]:
import langchain
langchain.debug = True

### load data and setup vector store

In [4]:
documents = []
for file in os.listdir('data/test_documents'):
    if file.endswith('.pdf'):
        pdf_path = './data/test_documents/' + file
        print(f'Loading {pdf_path}')
        loader = UnstructuredPDFLoader(pdf_path)
        documents.extend(loader.load())
    elif file.endswith('.html'):
        doc_path = './data/test_documents/' + file
        print(f'Loading {doc_path}')
        loader = UnstructuredHTMLLoader(doc_path)
        documents.extend(loader.load())

Loading ./data/test_documents/Extension of Deadlines Master Business Infromatics.html
Loading ./data/test_documents/Extension of Deadlines MMDS.html
Loading ./data/test_documents/General Questions Master Business Informatics.html
Loading ./data/test_documents/General Questions MMDS.html
Loading ./data/test_documents/Learning Agreements Master Business Informatics.html
Loading ./data/test_documents/Learning Agreements MMDS.html
Loading ./data/test_documents/Master Business Informatics info start page.html
Loading ./data/test_documents/Master Thesis Information.html
Loading ./data/test_documents/MMDS info start page.html
Loading ./data/test_documents/Modue_Catalog_MSc_Wifo_23_24.pdf
Loading ./data/test_documents/Module_Catalog_Appendix_MMDS_23_24.pdf
Loading ./data/test_documents/Module_Catalog_MMDS_23_24.pdf
Loading ./data/test_documents/PO_MMDS_20.pdf
Loading ./data/test_documents/PO_MSc_Wifo_18.pdf
Loading ./data/test_documents/Recognition of Coursework and Examinations Master Busines

In [6]:
# split documents into text chunks

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunked_documents = text_splitter.split_documents(documents)

In [7]:
# create chroma vector db with OpenAIEmbeddings

vectordb = Chroma.from_documents(
  chunked_documents,
  embedding=OpenAIEmbeddings(),
  persist_directory='./storage_langchain'
)
vectordb.persist()

  warn_deprecated(


### create QA Chain

In [8]:
template = """

If the question does not provide a specific study program, dont answer the question and ask what the student is studying.
Else use the following pieces of context to answer the question at the end.

To answer the question execute these steps:
1 - list the context
2 - focus on words like "optional" or "can" for your answer
3 - answer the question. Do not use information outside of the context to answer the question.

Your answer should have this format:

context:
answer:

------------------------
Context: {context}

Question: {question}

"""

custom_prompt = PromptTemplate.from_template(template)

In [16]:
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=ChatOpenAI(model='gpt-3.5-turbo', temperature=0),
    retriever=vectordb.as_retriever(search_kwargs={'k': 5}),
    return_source_documents=True,
    combine_docs_chain_kwargs={"prompt": custom_prompt}
)

# used to store previous questions and answers
chat_history = []

In [10]:
# print(qa_chain.combine_docs_chain.llm_chain.prompt.messages[0].prompt.template)

### Test

In [17]:
query = "Do I have to do any fundamental courses?"

result = qa_chain({'question': query, 'chat_history': chat_history})
chat_history.append((query, result['answer']))
print(result['answer'])

[32;1m[1;3m[chain/start][0m [1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:
[0m{
  "question": "Do I have to do any fundamental courses?",
  "chat_history": []
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Do I have to do any fundamental courses?",
  "context": "Section 28 – The Fundamentals Subject Area\n\n(1) In the subject area “Fundamentals” students can decide which examinations they want to register for; if the chosen examinations are passed, students can obtain a maximum of 14 ECTS credits and have these recognized for the master's examination.\n\n(2) 1It is the students’ responsibility to choose from the available modules and examinations. ²The available elective modules fro

[36;1m[1;3m[llm/end][0m [1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [3.66s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "context: \nIn the subject area \"Fundamentals\", students can decide which examinations they want to register for and obtain a maximum of 14 ECTS credits if they pass the chosen examinations. The available elective modules and their respective ECTS credits can be found in the module catalog.\n\nanswer: \nIt is optional for students to do fundamental courses.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "context: \nIn the subject

In [19]:
query = "Does that apply if i am studying the master of data science?"

result = qa_chain({'question': query, 'chat_history': chat_history})
chat_history.append((query, result['answer']))
print(result['answer'])

[32;1m[1;3m[chain/start][0m [1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:
[0m{
  "question": "Does that apply if i am studying the master of data science?",
  "chat_history": [
    [
      "Do I have to do any fundamental courses?",
      "context: \nIn the subject area \"Fundamentals\", students can decide which examinations they want to register for and obtain a maximum of 14 ECTS credits if they pass the chosen examinations. The available elective modules and their respective ECTS credits can be found in the module catalog.\n\nanswer: \nIt is optional for students to do fundamental courses."
    ],
    [
      "Does the same apply if I am studying Business Informatics?",
      "context: The master's program in Business Informatics combines the fields of computer science and business administration. The program covers five major areas, including Fundamentals Computer Science.\n\nanswer: No, it is not optional for students studying Business Informatics 

[36;1m[1;3m[llm/end][0m [1m[1:chain:ConversationalRetrievalChain > 5:chain:StuffDocumentsChain > 6:chain:LLMChain > 7:llm:ChatOpenAI] [2.38s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "context: The degree program in data science offers fundamental courses and advanced courses. \n\nanswer: Yes, it is optional for students studying the master of data science to do fundamental courses.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "context: The degree program in data science offers fundamental courses and advanced courses. \n\nanswer: Yes, it is optional for students studying the master of data science to do funda

In [18]:
query = "Does the same apply if I am studying Business Informatics?"

result = qa_chain({'question': query, 'chat_history': chat_history})
chat_history.append((query, result['answer']))
print(result['answer'])

[32;1m[1;3m[chain/start][0m [1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:
[0m{
  "question": "Does the same apply if I am studying Business Informatics?",
  "chat_history": [
    [
      "Do I have to do any fundamental courses?",
      "context: \nIn the subject area \"Fundamentals\", students can decide which examinations they want to register for and obtain a maximum of 14 ECTS credits if they pass the chosen examinations. The available elective modules and their respective ECTS credits can be found in the module catalog.\n\nanswer: \nIt is optional for students to do fundamental courses."
    ]
  ]
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:ConversationalRetrievalChain > 2:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Does the same apply if I am studying Business Informatics?",
  "chat_history": "\nHuman: Do I have to do any fundamental courses?\nAssistant: context: \nIn the subject area \"Fundamentals\", students can decide 

[36;1m[1;3m[llm/end][0m [1m[1:chain:ConversationalRetrievalChain > 5:chain:StuffDocumentsChain > 6:chain:LLMChain > 7:llm:ChatOpenAI] [4.56s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "context: The master's program in Business Informatics combines the fields of computer science and business administration. The program covers five major areas, including Fundamentals Computer Science.\n\nanswer: No, it is not optional for students studying Business Informatics to do fundamental courses. The Fundamentals Computer Science courses are a required part of the program and serve to further develop the core competences needed as a business IT specialist.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
 