# LangChain Chatbot - with Additional LLM-generated Metadata

In [1]:
import os
import pandas as pd
from dotenv import load_dotenv, find_dotenv
from functools import partial
from copy import deepcopy

from langchain.document_loaders import UnstructuredHTMLLoader, UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chains.combine_documents import collapse_docs, split_list_of_docs
from langchain.chat_models import ChatOpenAI
from langchain.document_transformers.openai_functions import create_metadata_tagger
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.schema import Document, StrOutputParser
from langchain_core.prompts import format_document
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

from openai.error import InvalidRequestError

pd.set_option('display.max_colwidth', None)

_ = load_dotenv(find_dotenv())

In [2]:
import langchain
#langchain.debug = True

## Load Data

In [3]:
documents = []
doc_directory = 'data/test_documents'

for file in os.listdir(doc_directory):
    if file.endswith('.pdf'):
        pdf_path = f'./{doc_directory}/' + file
        print(f'Loading {pdf_path}')
        loader = UnstructuredPDFLoader(pdf_path)
        documents.extend(loader.load())
    elif file.endswith('.html'):
        doc_path = f'./{doc_directory}/' + file
        print(f'Loading {doc_path}')
        loader = UnstructuredHTMLLoader(doc_path)
        documents.extend(loader.load())

Loading ./data/test_documents/Extension of Deadlines MMDS.html
Loading ./data/test_documents/Extension of Deadlines Master Business Infromatics.html
Loading ./data/test_documents/General Questions MMDS.html
Loading ./data/test_documents/General Questions Master Business Informatics.html
Loading ./data/test_documents/Learning Agreements MMDS.html
Loading ./data/test_documents/Learning Agreements Master Business Informatics.html
Loading ./data/test_documents/MMDS info start page.html
Loading ./data/test_documents/Master Business Informatics info start page.html
Loading ./data/test_documents/Master Thesis Information.html
Loading ./data/test_documents/Modue_Catalog_MSc_Wifo_23_24.pdf
Loading ./data/test_documents/Module_Catalog_Appendix_MMDS_23_24.pdf
Loading ./data/test_documents/Module_Catalog_MMDS_23_24.pdf
Loading ./data/test_documents/PO_MMDS_20.pdf
Loading ./data/test_documents/PO_MSc_Wifo_18.pdf
Loading ./data/test_documents/Recognition of Coursework and Examinations MMDS.html
Load

In [4]:
# clean text from tab characters "\t"
for d in documents:
    d.page_content = d.page_content.replace("\t", "")

## Add Metadata

- source
- tag every document either with **general** or with relevant **studyprograms**
- (summary for documents)

In [5]:
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125")

metadata_prompt = ChatPromptTemplate.from_template(
    """Extract relevant information from the following document.
The document is related to the University of Mannheim. Some documents are only relevant to a specific \
study program, while others provide general information about the University of several study \
programs at once (use the "general" tag). If you think the document is relevant to a specific study \
program which is not in the list use the "other" tag.

{input}
"""
)

schema = {
    "properties": {
        "study_program": {
            "type": "string",
            "enum": [
                "B.Sc. Business Informatics",
                "M.Sc. Business Informatics",
                "B.Sc. Mathematics in Business and Economics",
                "M.Sc. Mathematics in Business and Economics",
                "Mannheim Master in Data Science",
                "general",
                "other"
            ],
            "description": "The study program this document is relevant to"
        },
        "short_description": {
            "type": "string",
            "description": "A short summary that describes what information can be found in this document in at most 3 sentences"
        },
    },
    "required": ["study_program", "short_description"]
}

document_transformer = create_metadata_tagger(metadata_schema=schema, llm=llm, prompt=metadata_prompt)

  warn_deprecated(


In [6]:
# Prompt and method for converting Document -> str.
document_prompt = PromptTemplate.from_template("{page_content}")
partial_format_document = partial(format_document, prompt=document_prompt)


# A text splitter that recursively splits a document into multiple chunks until
# the maximum chunck size is below a predefined value (without overlap).

def get_num_tokens_single_doc(doc):
    return llm.get_num_tokens(partial_format_document(doc))

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 16000,
    chunk_overlap  = 0,
    length_function = llm.get_num_tokens,
    is_separator_regex = False,
)


# The chain we'll apply to each individual document.
# Returns a summary of the document.

map_chain = (
    {"context": partial_format_document}
    | PromptTemplate.from_template("Summarize this content:\n\n{context}")
    | llm
    | StrOutputParser()
)

# A wrapper chain to keep the original Document metadata
map_as_doc_chain = (
    RunnableParallel({"doc": RunnablePassthrough(), "content": map_chain})
    | (lambda x: Document(page_content=x["content"], metadata=x["doc"].metadata))
).with_config(run_name="Summarize (return doc)")


# The chain we'll repeatedly apply to collapse subsets of the documents
# into a consolidate document until the total token size of our
# documents is below some max size.

def format_docs(docs):
    return "\n\n".join(partial_format_document(doc) for doc in docs)

collapse_chain = (
    {"context": format_docs}
    | PromptTemplate.from_template("Collapse this content:\n\n{context}")
    | llm
    | StrOutputParser()
)

def get_num_tokens(docs):
    return llm.get_num_tokens(format_docs(docs))

def collapse(
    docs,
    config,
    token_max=16000,
):
    collapse_ct = 1
    while get_num_tokens(docs) > token_max:
        config["run_name"] = f"Collapse {collapse_ct}"
        invoke = partial(collapse_chain.invoke, config=config)
        split_docs = split_list_of_docs(docs, get_num_tokens, token_max)
        docs = [collapse_docs(_docs, invoke) for _docs in split_docs]
        collapse_ct += 1
    return docs


# The chain we'll use to combine our individual document summaries
# (or summaries over subset of documents if we had to collapse the map results)
# into a final summary.

reduce_chain = (
    {"context": format_docs}
    | PromptTemplate.from_template("Combine these summaries:\n\n{context}")
    | llm
    | StrOutputParser()
).with_config(run_name="Reduce")


# The final full chain for summarizing documents
map_reduce_summarizer = (text_splitter.split_documents | map_as_doc_chain.map() | collapse | reduce_chain).with_config(
    run_name="Map reduce"
)


def get_metadata(docs, metadata_tagger, summarizer, llm, max_tokens):
    document_prompt = PromptTemplate.from_template("{page_content}")
    partial_format_document = partial(format_document, prompt=document_prompt)
    docs_transformed = []
    for i, doc in enumerate(docs):
        print(f"Processing document {i}/{len(docs)-1}")
        # directly apply metadata tagger if number of tokens is below threshold
        if llm.get_num_tokens(partial_format_document(doc)) <= max_tokens:
            print(f"\tApply metadata tagger...")
            doc_w_metadata = metadata_tagger.transform_documents([doc])[0]
            docs_transformed.append(doc_w_metadata)
        # otherwise, summarize document first to get metadata
        else:
            print(f"\tSummarize document...")
            summary = summarizer.invoke([doc])
            summary_doc = Document(page_content=summary, metadata=doc.metadata)
            print(f"\tApply metadata tagger...")
            summary_w_metadata = metadata_tagger.transform_documents([summary_doc])[0]
            doc_w_metadata = Document(page_content=doc.page_content, metadata=summary_w_metadata.metadata)
            docs_transformed.append(doc_w_metadata)
    print("done!")
    return docs_transformed

In [7]:
documents_w_metadata = get_metadata(documents, metadata_tagger=document_transformer, summarizer=map_reduce_summarizer, llm=llm, max_tokens=16000)

Processing document 0/15
	Apply metadata tagger...
Processing document 1/15
	Apply metadata tagger...
Processing document 2/15
	Apply metadata tagger...
Processing document 3/15
	Apply metadata tagger...
Processing document 4/15
	Apply metadata tagger...
Processing document 5/15
	Apply metadata tagger...
Processing document 6/15
	Apply metadata tagger...
Processing document 7/15
	Apply metadata tagger...
Processing document 8/15
	Apply metadata tagger...
Processing document 9/15
	Summarize document...
	Apply metadata tagger...
Processing document 10/15
	Apply metadata tagger...
Processing document 11/15
	Apply metadata tagger...
Processing document 12/15
	Summarize document...
	Apply metadata tagger...
Processing document 13/15
	Summarize document...
	Apply metadata tagger...
Processing document 14/15
	Apply metadata tagger...
Processing document 15/15
	Apply metadata tagger...
done!


In [8]:
for i, d in enumerate(documents_w_metadata):
    print(f"### document {i} ###")
    for k, v in d.metadata.items():
        print(f"{k}: {v}")
    print()

### document 0 ###
study_program: Mannheim Master in Data Science
short_description: Information about requesting deadline extensions and the structure of the Mannheim Master in Data Science program.
source: ./data/test_documents/Extension of Deadlines MMDS.html

### document 1 ###
study_program: M.Sc. Business Informatics
short_description: Information about deadline extensions, degree plans, course schedules, examination regulations, and advisory services for the Master's program in Business Informatics at the University of Mannheim.
source: ./data/test_documents/Extension of Deadlines Master Business Infromatics.html

### document 2 ###
study_program: Mannheim Master in Data Science
short_description: This document provides information about the Mannheim Master in Data Science program at the University of Mannheim, including details on module changes, team projects, individual projects, studying abroad, degree plans, course schedules, examination regulations, the examination committ

## Split Documents and Create Embeddings

In [9]:
# split documents into text chunks

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunked_documents = text_splitter.split_documents(documents_w_metadata)

In [10]:
chunked_documents[1]

Document(page_content="How can I request a deadline extension?Extension requests must be submitted to the examination committee in writing. Please make sure to indicate your address, student ID number and e-mail address in case we need to contact you for follow-up questions. We also ask you to state the semester in which you started your master's program so that we know straight away which examination regulations apply to you.The examination regulations stipulate that students need to resit any examinations they fail or did not take due to illness at the earliest opportunity. If this is the case, you will be automatically registered by Student Services (mandatory registration). Further information is provided by Student Services.Your request must include a detailed justification explaining why you should be granted an extension of the applicable deadline.Please make sure to attach any documents justifying your claim (if applicable to the individual case). Documents of this kind may inc

In [11]:
for d in chunked_documents:
    d.page_content = f"""Study Program: {d.metadata['study_program']}
Short Description: {d.metadata['short_description']}
Content: {d.page_content}
------------------------
"""

In [13]:
print(chunked_documents[1].page_content)

Study Program: M.Sc. Business Informatics
Short Description: Information about the Master's Program in Business Informatics
Content: How can I request a deadline extension?Extension requests must be submitted to the examination committee in writing. Please make sure to indicate your address, student ID number and e-mail address in case we need to contact you for follow-up questions. We also ask you to state the semester in which you started your master's program so that we know straight away which examination regulations apply to you.The examination regulations stipulate that students need to resit any examinations they fail or did not take due to illness at the earliest opportunity. If this is the case, you will be automatically registered by Student Services (mandatory registration). Further information is provided by Student Services.Your request must include a detailed justification explaining why you should be granted an extension of the applicable deadline.Please make sure to att

In [14]:
# create chroma vector db with OpenAIEmbeddings

vectordb = Chroma.from_documents(
  chunked_documents,
  embedding=OpenAIEmbeddings(),
  persist_directory='./storage_langchain_w_metadata'
)
vectordb.persist()

  warn_deprecated(


In [15]:
qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model='gpt-3.5-turbo', temperature=0),
    retriever=vectordb.as_retriever(search_kwargs={'k': 3}),
    return_source_documents=True
    #chain_type_kwargs={"prompt": custom_prompt}
)

## Read Questions and Answer

In [16]:
q = "Can I change any modules?"

r = qa_chain({'query': q})
print(r['result'])

# print('\nSources:')
# for source_doc in r['source_documents']:
#     print(source_doc)
#     print('====================\n')

  warn_deprecated(


[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Can I change any modules?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Can I change any modules?",
  "context": "Study Program: Mannheim Master in Data Science\nShort Description: Information about module changes, team projects, individual projects, studying abroad, degree plans, course schedules, examination regulations, advisory service, and master's thesis.\nContent: Can I change any modules?\r\n\r\n\r\n\r\n\r\n\r\nPlease note that only students enrolled before spring\r\n2020 can change modules.\r\nRequests\r\nto the examination committee must be submitted in writing.\r\nPlease make\r\nsure to indicate your address, student ID number an

In [17]:
df_questions = pd.read_csv('TestQuestions.csv', delimiter=";", names=["Question", "Response"] )
questions = df_questions["Question"]

responses = []
counter = 0
for q in questions:
    print(f'q{counter} start')
    r = qa_chain({'query': q})['result']
    responses.append((q, r))
    print(f'q{counter} end')
    counter += 1

df_responses = pd.DataFrame(responses, columns=["Question", "Response"])

q0 start
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Question"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Question",
  "context": "Study Program: M.Sc. Business Informatics\nShort Description: This document provides a comprehensive overview of the examination regulations for the Master's Program 'Business Informatics' at a university.\nContent: (5) 1When grading term papers or an equivalent, examination committee and examiners are authorized to use software to detect plagiarism. 2For the grading of their work, students shall submit a digital data file and a hard copy of their work to the examiners. 3In accordance with section 3 subsection 6 of the Data Protection Act of the Land of

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Can I change any modules?",
  "context": "Study Program: Mannheim Master in Data Science\nShort Description: Information about module changes, team projects, individual projects, studying abroad, degree plans, course schedules, examination regulations, advisory service, and master's thesis.\nContent: Can I change any modules?\r\n\r\n\r\n\r\n\r\n\r\nPlease note that only students enrolled before spring\r\n2020 can change modules.\r\nRequests\r\nto the examination committee must be submitted in writing.\r\nPlease make\r\nsure to indicate your address, student ID number and e-mail\r\naddress in\r\ncase we need to contact you for follow-up questions. We also ask\r\nyou to\r\nstate the semester in which you

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "How am I assigned a team project?",
  "context": "Study Program: Mannheim Master in Data Science\nShort Description: Information about module changes, team projects, individual projects, studying abroad, degree plans, course schedules, examination regulations, advisory service, and master's thesis.\nContent: Why am I still registered for the original exam after a\r\nmodule change?\r\n\r\n\r\n\r\n\r\n\r\nIn case you are still registered for the original module after\r\nthe exam registration period has ended, please contact Student Services I.\n\nHow am I assigned a team project? \r\n\r\n\r\n\r\n\r\n\r\nAt\r\nthe beginning of each semester, the Institute of Computer\r\nScience and\r\nBusiness Informatics

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Do I need to finish all of the exams before starting my master thesis?",
  "context": "Study Program: Mannheim Master in Data Science\nShort Description: Information about module changes, team projects, individual projects, studying abroad, degree plans, course schedules, examination regulations, advisory service, and master's thesis.\nContent: Do I need to finish all of my exams before starting my\r\nmaster's thesis? \r\n\r\n\r\n\r\n\r\n\r\nAlthough\r\nsome chairs might verify whether you have completed all your\r\nexaminations, you are formally only required to accumulate\r\nat least 60 ECTS\r\ncredits before starting your thesis. Nevertheless, we strongly\r\nadvise\r\nyou to finish all your examinat

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Is it necessary to complete all my exams prior to beginning my master's thesis?",
  "context": "Study Program: Mannheim Master in Data Science\nShort Description: Information about module changes, team projects, individual projects, studying abroad, degree plans, course schedules, examination regulations, advisory service, and master's thesis.\nContent: Do I need to finish all of my exams before starting my\r\nmaster's thesis? \r\n\r\n\r\n\r\n\r\n\r\nAlthough\r\nsome chairs might verify whether you have completed all your\r\nexaminations, you are formally only required to accumulate\r\nat least 60 ECTS\r\ncredits before starting your thesis. Nevertheless, we strongly\r\nadvise\r\nyou to finish all your

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Can I start my master thesis even though I haven't done all of my exams?",
  "context": "Study Program: Mannheim Master in Data Science\nShort Description: Information about module changes, team projects, individual projects, studying abroad, degree plans, course schedules, examination regulations, advisory service, and master's thesis.\nContent: Do I need to finish all of my exams before starting my\r\nmaster's thesis? \r\n\r\n\r\n\r\n\r\n\r\nAlthough\r\nsome chairs might verify whether you have completed all your\r\nexaminations, you are formally only required to accumulate\r\nat least 60 ECTS\r\ncredits before starting your thesis. Nevertheless, we strongly\r\nadvise\r\nyou to finish all your examin

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "How come I'm still enrolled in the initial exam despite switching modules?",
  "context": "Study Program: Mannheim Master in Data Science\nShort Description: Information about module changes, team projects, individual projects, studying abroad, degree plans, course schedules, examination regulations, advisory service, and master's thesis.\nContent: Why am I still registered for the original exam after a\r\nmodule change?\r\n\r\n\r\n\r\n\r\n\r\nIn case you are still registered for the original module after\r\nthe exam registration period has ended, please contact Student Services I.\n\nHow am I assigned a team project? \r\n\r\n\r\n\r\n\r\n\r\nAt\r\nthe beginning of each semester, the Institute of Comput

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "How can I cheat on the exam?",
  "context": "Study Program: M.Sc. Business Informatics\nShort Description: This document provides a comprehensive overview of the examination regulations for the Master's Program 'Business Informatics' at a university.\nContent: Part 4: Violation of the Examination Regulations\n\nSection 35 – Cheating and other Misconduct\n\n(1) 1If a student manipulates or tries to manipulate his or her examination by cheating or using unauthorized resources or by influencing the examiner or supervisor in favor of himself or herself or a third person, the respective examination or further examinations may be graded \"failed\" or the student may be excluded from the examination depending

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What are the prerequisites in order to study the master of political science?",
  "context": "Study Program: Mannheim Master in Data Science\nShort Description: This document provides information on the examination regulations for the Master's Program Mannheim Master in Data Science (M.Sc.) at the University of Mannheim.\nContent: to deepen their qualifications and competencies after they completed a subject area and thus register for the other examinations in this subject area as elective examinations which they did not complete or fail yet if, at this point in time, they can still have further elective examinations recognized for the master's examination. 4Aside from the additional requirements for s

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is the worst course in the Master of Data Science Program?",
  "context": "Study Program: Mannheim Master in Data Science\nShort Description: Module catalog for the Mannheim Master in Data Science program\nContent: Lectures with exercises\n\nMathematics C\n\nMaster\n\n5\n\n28 hours lectures 122 hours self-studies\n\nReinforcement Learning\n\nImplementation of standard algorithms in reinforcement learning\n\nBandit algorithms (UCB) • TD algorithms (Q-learning, TD) • Policy gradient algorithms (SAC, PPO)\n\nMK1, M02, M03\n\nMF1, MF3 (cf, “Erläuterungen zu den Abkürzungen”)\n\nBlackboard, Slides\n\nOriginal articles\n\nLectures, programmig tasks\n\nwritten exam\n\n\n\n90 min\n\nEnglish\n\nirregular\

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What are the prerequisites for the class Data Science in Action?",
  "context": "Study Program: Mannheim Master in Data Science\nShort Description: Information about recognition of coursework and examinations, required documents, and where to submit them.\nContent: support, over text analytics and natural language processing, to\r\nadvanced social science methods such as cross-sectional and\r\nlongitudinal\r\ndata analysis. The range of methodological courses is enhanced\r\nby\r\ncourses on optimization, visualization, mathematics and\r\ninformation, and\r\nalgebraic statistics.\r\nResponsible Data Science (3 – 10 ECTS)\r\nThere\r\nare great concerns about the use of data. A variety of\r\nstakeholders 

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [813ms] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "I don't know the answer to that question.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "I don't know the answer to that question.",
            "additional_kwargs": {}
          }
        }
      }
    ]
  ],
  "llm_output": {
    "token_usage": {
      "prompt_tokens": 829,
      "completion_tokens": 10,
      "total_tokens": 839
    },
    "model_name": "gpt-3.5-turbo",
    "system_fingerprint": null
  },
  "run": null
}
[36;1m[1;3m[chain/en

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [762ms] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "Data Security and Privacy is offered in the Fall semester.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "Data Security and Privacy is offered in the Fall semester.",
            "additional_kwargs": {}
          }
        }
      }
    ]
  ],
  "llm_output": {
    "token_usage": {
      "prompt_tokens": 785,
      "completion_tokens": 11,
      "total_tokens": 796
    },
    "model_name": "gpt-3.5-turbo",
    "system_fingerprint": null
  },
  "r

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.56s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "Yes, it is highly recommended to attend the web data integration lecture in the same semester as the web data integration project. The schedules of both courses are aligned to each other.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "Yes, it is highly recommended to attend the web data integration lecture in the same semester as the web data integration project. The schedules of both courses are aligned to each other.",
            "additional_k

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [2.46s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "Yes, according to the provided information, you are required to earn at least 18 ECTS credits in the Fundamentals Business Administration subject area. These courses are designed to develop core competences needed as a business IT specialist and provide you with knowledge in areas such as Accounting and Taxation, Banking, Finance and Insurance, Management, Marketing, and Operations Management.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "Yes, a

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.67s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "To recognize a module from your previous study, you can contact the chair offering the equivalent local course in your degree program. They will be able to provide you with information on whether the module can be directly recognized.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "To recognize a module from your previous study, you can contact the chair offering the equivalent local course in your degree program. They will be able to provide you 

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [3.89s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "According to the provided information, if you are a student with a disability or chronic illness, you can request disadvantage compensation. The examination committee is obliged to take the recommendation of the Commissioner and Counselor for disabled students and students with chronic illnesses into account in their decision-making process. However, the specific exam conditions for dyslexic students are not mentioned in the given context. It would be best to consult the examination regulations or contact the Commissioner and Counselor for disabled students and students with chronic illnesses at your university for more information on the accommodations available for dyslexic students.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": nul

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.77s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "Based on the provided context, there is no specific information about studying more than one study program. It is recommended to contact the relevant department or student services at the University of Mannheim for more accurate and detailed information regarding studying multiple study programs.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "Based on the provided context, there is no specific information about studying more than one study progra

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [6.34s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "The regular examination dates for oral examinations and similar forms of examinations are at the start of the semester break. The alternate examination dates are before the following lecture period begins or at the latest during the first weeks. There should be at least three weeks between announcing results from the examinations taken on the regular dates and the alternate examination dates. The alternate examination dates count for the semester in which the regular dates occurred. For supervised, written examinations, the regular examination dates are also at the start of the semester break, and the alternate examination dates are before the following lecture period begins or at the latest during the first week. The same three-week gap between announcing results applies, an

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [2.21s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "Based on the provided context, it is not clear whether the study program requires specific key qualifications. It is recommended to refer to the program's official documentation or contact the University of Mannheim for more information on the required qualifications for the M.Sc. Business Informatics program.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "Based on the provided context, it is not clear whether the study program requires specific 

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.34s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "Die Information über das Semester, in dem Lineare Algebra 1 angeboten wird, ist nicht in den gegebenen Kontexten enthalten.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "Die Information über das Semester, in dem Lineare Algebra 1 angeboten wird, ist nicht in den gegebenen Kontexten enthalten.",
            "additional_kwargs": {}
          }
        }
      }
    ]
  ],
  "llm_output": {
    "token_usage": {
      "prompt_tokens": 976,
      "co

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [1.78s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "Based on the provided information, it is not explicitly mentioned which courses from the Master of Data Science program have no prerequisites. Therefore, it is not possible to provide a list of courses that require no prerequisites.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "Based on the provided information, it is not explicitly mentioned which courses from the Master of Data Science program have no prerequisites. Therefore, it is not possib

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [3.13s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "You can find information about studying abroad for a semester on the website of the School of Business Informatics and Mathematics and the University of Mannheim. They provide a list of partner universities where you can study abroad. You will need to verify if these universities offer courses related to data science. Additionally, you can find more helpful information on the web page of the International Office of the University of Mannheim. If you have any questions, you can contact Juliane Roth.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
      

[36;1m[1;3m[llm/end][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain > 5:llm:ChatOpenAI] [3.44s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "Yes, it is possible to request an extension for your degree if you are facing difficulties due to personal circumstances such as taking care of a family member. You will need to submit a written request to the examination committee, including a detailed justification explaining why you should be granted an extension of the applicable deadline. It is also recommended to attach any documents justifying your claim, such as medical certificates or official letters from public authorities/institutions. The examination committee will then consider your request and notify you of their decision.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "ty

In [18]:
df_responses

Unnamed: 0,Question,Response
0,Question,What is the purpose of using plagiarism detection software in the examination regulations for the Master's Program 'Business Informatics'?
1,Can I change any modules?,"Yes, you can change modules. However, please note that the ability to change modules may depend on the specific study program and the semester in which you enrolled. It is recommended to check the examination regulations and contact the responsible examination committee for more information on how to request a module change."
2,How am I assigned a team project?,"At the beginning of each semester, the Institute of Computer Science and Business Informatics organizes a presentation session where all chairs offering team projects introduce their topics. After this session, you can register for a team project in Portal², indicating your preferences."
3,Do I need to finish all of the exams before starting my master thesis?,"Although some chairs might verify whether you have completed all your examinations, you are formally only required to accumulate at least 60 ECTS credits before starting your thesis. Nevertheless, we strongly advise you to finish all your examinations first. Writing a master's thesis is a full-time job, and it takes a lot of coordination and organization to take exams and write a thesis in parallel."
4,Is it necessary to complete all my exams prior to beginning my master's thesis?,"Although some chairs might verify whether you have completed all your examinations, you are formally only required to accumulate at least 60 ECTS credits before starting your thesis. Nevertheless, it is strongly advised to finish all your examinations first. Writing a master's thesis is a full-time job and requires a lot of coordination and organization, so it is recommended to focus solely on the thesis without the added pressure of exams."
5,Can I start my master thesis even though I haven't done all of my exams?,"Although some chairs might verify whether you have completed all your examinations, you are formally only required to accumulate at least 60 ECTS credits before starting your thesis. Nevertheless, we strongly advise you to finish all your examinations first. Writing a master's thesis is a full-time job, and it takes a lot of coordination and organization to take exams and write a thesis in parallel."
6,How come I'm still enrolled in the initial exam despite switching modules?,"According to the provided information, if you are still registered for the original module after the exam registration period has ended, you should contact Student Services I for assistance. They will be able to help you resolve the issue and update your registration for the correct module."
7,How can I cheat on the exam?,"I'm sorry, but I can't assist with that."
8,What are the prerequisites in order to study the master of political science?,"I'm sorry, but the provided context does not mention anything about the prerequisites for studying the Master of Political Science."
9,What is the worst course in the Master of Data Science Program?,"I'm sorry, but I don't have enough information to determine which course is considered the worst in the Master of Data Science Program."


### Save Responses

In [None]:
# df_responses.to_csv("test_responses_by_langchain_w_metadata.csv", sep=";")