# LangChain Chatbot - with Additional LLM-generated Metadata

In [1]:
import os
import pandas as pd
from dotenv import load_dotenv, find_dotenv
from functools import partial
from copy import deepcopy

from langchain.document_loaders import UnstructuredHTMLLoader, UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chains.combine_documents import collapse_docs, split_list_of_docs
from langchain.chat_models import ChatOpenAI
from langchain.document_transformers.openai_functions import create_metadata_tagger
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.schema import Document, StrOutputParser
from langchain_core.prompts import format_document
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

from openai.error import InvalidRequestError

pd.set_option('display.max_colwidth', None)

_ = load_dotenv(find_dotenv())

In [2]:
# import langchain
#langchain.debug = True

In [3]:
import time

## Load Data

In [4]:
documents = []
doc_directory = 'data/scraped_data'

for file in os.listdir(doc_directory):
    if file.endswith('.pdf'):
        pdf_path = f'./{doc_directory}/' + file
        print(f'Loading {pdf_path}')
        loader = UnstructuredPDFLoader(pdf_path)
        documents.extend(loader.load())
    elif file.endswith('.html'):
        doc_path = f'./{doc_directory}/' + file
        print(f'Loading {doc_path}')
        loader = UnstructuredHTMLLoader(doc_path)
        documents.extend(loader.load())

Loading ./data/scraped_data/101123_MK_B.Sc._Wima_2023_2024._Appendix.pdf
Loading ./data/scraped_data/111www_uni-mannheim_de_en_academics_advice-and-services.html
Loading ./data/scraped_data/131123_MK_M.Sc._Wima___Mathe_2023_2024._Appendix.pdf
Loading ./data/scraped_data/160407_Praesentation_Wima_Master.pdf
Loading ./data/scraped_data/2019_MasterInfoPra__si.pdf
Loading ./data/scraped_data/20230329-Lesefassung_ErpSatzung_ePruefungen_veroeffentlicht.pdf
Loading ./data/scraped_data/310823_MK_M.Sc._Wima___Mathe__2023_24.pdf
Loading ./data/scraped_data/Aktualisiert_Stundenplan_MMDS_HWS2023.pdf
Loading ./data/scraped_data/Antrag_Anerkennung_B.Sc.Wifo_Stand_2023.pdf
Loading ./data/scraped_data/Antrag_Anerkennung_M.Sc.Wifo_MMDS_2023.pdf
Loading ./data/scraped_data/Antrag_auf_Rueckerstattung.pdf
Loading ./data/scraped_data/Antrag_auf_Verlaengerung_der_Bearbeitungszeit_fuer_die_Abschlussarbeit.pdf
Loading ./data/scraped_data/Antrag_Befreiung_neu.pdf
Loading ./data/scraped_data/Antrag_Modulwechsel

Loading ./data/scraped_data/Stundenplan_LAG_FSS20_Sem02.pdf
Loading ./data/scraped_data/Stundenplan_LAG_FSS20_Sem04.pdf
Loading ./data/scraped_data/Stundenplan_LAG_FSS20_Sem06.pdf
Loading ./data/scraped_data/Stundenplan_LAG_HWS22_Sem01.pdf
Loading ./data/scraped_data/Stundenplan_LAG_HWS22_Sem03_27092022.pdf
Loading ./data/scraped_data/Stundenplan_LAG_HWS22_Sem05_27092022.pdf
Loading ./data/scraped_data/Stundenplan_MSc_Wifo_HWS2023.pdf
Loading ./data/scraped_data/Stundenplan_Wifo_HWS23_Sem01.pdf
Loading ./data/scraped_data/Stundenplan_Wifo_HWS23_Sem03.pdf
Loading ./data/scraped_data/Stundenplan_Wifo_HWS23_Sem05.pdf
Loading ./data/scraped_data/Stundenplan_Wima_FSS23_Wahlpflichtveranstaltungen.pdf
Loading ./data/scraped_data/Stundenplan_Wima_HWS23_Wahlpflichtveranstaltungen.pdf
Loading ./data/scraped_data/stuo_mmm_doppelabschlussprogramm_2satzung_en.pdf
Loading ./data/scraped_data/unbedenklichkeitsbescheinigung.pdf
Loading ./data/scraped_data/unbedenklichkeitsbescheinigung_en.pdf
Loading 

Loading ./data/scraped_data/www.uni-mannheim.de_studium_im-studium_studienorganisation_immatrikulation.html
Loading ./data/scraped_data/www.uni-mannheim.de_studium_im-studium_studienorganisation_parallelstudium.html
Loading ./data/scraped_data/www.uni-mannheim.de_studium_im-studium_studienorganisation_rueckmeldung.html
Loading ./data/scraped_data/www.uni-mannheim.de_studium_termine_semesterzeiten.html
Loading ./data/scraped_data/www.uni-mannheim.de_studium_vom-ausland-nach-mannheim_internationale-vollzeitstudierende_vor-der-anreise_studiengebuehren-fuer-internationale-studierende.html
Loading ./data/scraped_data/www.wim.uni-mannheim.de_en_academics_contact-and-advising.html
Loading ./data/scraped_data/www.wim.uni-mannheim.de_en_academics_organizing-your-studies.html
Loading ./data/scraped_data/www.wim.uni-mannheim.de_en_academics_organizing-your-studies_bsc-business-informatics.html
Loading ./data/scraped_data/www.wim.uni-mannheim.de_en_academics_organizing-your-studies_bsc-business-in

In [5]:
# clean text from tab characters "\t"
for d in documents:
    d.page_content = d.page_content.replace("\t", "")

## Add Metadata

- source
- tag every document either with **general** or with relevant **studyprograms**
- (summary for documents)

In [6]:
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125")

metadata_prompt = ChatPromptTemplate.from_template(
    """Extract relevant information from the following document.
The document is related to the University of Mannheim. Some documents are only relevant to a specific \
study program, while others provide general information about the University of several study \
programs at once (use the "general" tag). If you think the document is relevant to a specific study \
program which is not in the list use the "other" tag.

{input}
"""
)

schema = {
    "properties": {
        "study_program": {
            "type": "string",
            "enum": [
                "B.Sc. Business Informatics",
                "M.Sc. Business Informatics",
                "B.Sc. Mathematics in Business and Economics",
                "M.Sc. Mathematics in Business and Economics",
                "Mannheim Master in Data Science",
                "general",
                "other"
            ],
            "description": "The study program this document is relevant to"
        },
        "short_description": {
            "type": "string",
            "description": "A short summary that describes what information can be found in this document in at most 3 sentences"
        },
    },
    "required": ["study_program", "short_description"]
}

document_transformer = create_metadata_tagger(metadata_schema=schema, llm=llm, prompt=metadata_prompt)

  warn_deprecated(


In [7]:
# Prompt and method for converting Document -> str.
document_prompt = PromptTemplate.from_template("{page_content}")
partial_format_document = partial(format_document, prompt=document_prompt)


# A text splitter that recursively splits a document into multiple chunks until
# the maximum chunck size is below a predefined value (without overlap).

def get_num_tokens_single_doc(doc):
    return llm.get_num_tokens(partial_format_document(doc))

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 16000,
    chunk_overlap  = 0,
    length_function = llm.get_num_tokens,
    is_separator_regex = False,
)


# The chain we'll apply to each individual document.
# Returns a summary of the document.

map_chain = (
    {"context": partial_format_document}
    | PromptTemplate.from_template("Summarize this content:\n\n{context}")
    | llm
    | StrOutputParser()
)

# A wrapper chain to keep the original Document metadata
map_as_doc_chain = (
    RunnableParallel({"doc": RunnablePassthrough(), "content": map_chain})
    | (lambda x: Document(page_content=x["content"], metadata=x["doc"].metadata))
).with_config(run_name="Summarize (return doc)")


# The chain we'll repeatedly apply to collapse subsets of the documents
# into a consolidate document until the total token size of our
# documents is below some max size.

def format_docs(docs):
    return "\n\n".join(partial_format_document(doc) for doc in docs)

collapse_chain = (
    {"context": format_docs}
    | PromptTemplate.from_template("Collapse this content:\n\n{context}")
    | llm
    | StrOutputParser()
)

def get_num_tokens(docs):
    return llm.get_num_tokens(format_docs(docs))

def collapse(
    docs,
    config,
    token_max=16000,
):
    collapse_ct = 1
    while get_num_tokens(docs) > token_max:
        config["run_name"] = f"Collapse {collapse_ct}"
        invoke = partial(collapse_chain.invoke, config=config)
        split_docs = split_list_of_docs(docs, get_num_tokens, token_max)
        docs = [collapse_docs(_docs, invoke) for _docs in split_docs]
        collapse_ct += 1
    return docs


# The chain we'll use to combine our individual document summaries
# (or summaries over subset of documents if we had to collapse the map results)
# into a final summary.

reduce_chain = (
    {"context": format_docs}
    | PromptTemplate.from_template("Combine these summaries:\n\n{context}")
    | llm
    | StrOutputParser()
).with_config(run_name="Reduce")


# The final full chain for summarizing documents
map_reduce_summarizer = (text_splitter.split_documents | map_as_doc_chain.map() | collapse | reduce_chain).with_config(
    run_name="Map reduce"
)


def get_metadata(docs, docs_transformed, metadata_tagger, summarizer, llm, max_tokens):
    document_prompt = PromptTemplate.from_template("{page_content}")
    partial_format_document = partial(format_document, prompt=document_prompt)
    for i, doc in enumerate(docs):
        print(f"Processing document {i}/{len(docs)-1}")
        # directly apply metadata tagger if number of tokens is below threshold
        if llm.get_num_tokens(partial_format_document(doc)) <= max_tokens:
            print(f"\tApply metadata tagger...")
            doc_w_metadata = metadata_tagger.transform_documents([doc])[0]
            docs_transformed.append(doc_w_metadata)
        # otherwise, summarize document first to get metadata
        else:
            print(f"\tSummarize document...")
            
            # sleep if rate limit on tokens per minutes is reached
            try:
                summary = summarizer.invoke([doc])
            except:
                time.sleep(15)
            
            summary_doc = Document(page_content=summary, metadata=doc.metadata)
            print(f"\tApply metadata tagger...")
            summary_w_metadata = metadata_tagger.transform_documents([summary_doc])[0]
            doc_w_metadata = Document(page_content=doc.page_content, metadata=summary_w_metadata.metadata)
            docs_transformed.append(doc_w_metadata)
    print("done!")
    return docs_transformed

In [8]:
documents_w_metadata = []

In [12]:
documents_w_metadata = get_metadata(documents, documents_w_metadata, metadata_tagger=document_transformer, summarizer=map_reduce_summarizer, llm=llm, max_tokens=16000)

Processing document 0/261
	Apply metadata tagger...
Processing document 1/261
	Apply metadata tagger...
Processing document 2/261
	Apply metadata tagger...
Processing document 3/261
	Apply metadata tagger...
Processing document 4/261
	Apply metadata tagger...
Processing document 5/261
	Apply metadata tagger...


KeyboardInterrupt: 

In [34]:
len(documents_w_metadata)

261

In [14]:
len(documents)

262

In [None]:
documents[131]
# gave this error:
# JSONDecodeError: Expecting ',' delimiter

In [33]:
documents_w_metadata = get_metadata(documents[259:], documents_w_metadata, metadata_tagger=document_transformer, summarizer=map_reduce_summarizer, llm=llm, max_tokens=16000)

Processing document 0/2
	Apply metadata tagger...
Processing document 1/2
	Apply metadata tagger...
Processing document 2/2
	Apply metadata tagger...
done!


In [35]:
for i, d in enumerate(documents_w_metadata):
    print(f"### document {i} ###")
    for k, v in d.metadata.items():
        print(f"{k}: {v}")
    print()
    if i==10:
        break

### document 0 ###
study_program: B.Sc. Mathematics in Business and Economics
short_description: This document provides information about the module 'Elemente der Funktionentheorie' in the Bachelor of Science in Business Mathematics program at the University of Mannheim for the academic year HWS 2023 / FSS 2024.
source: ./data/scraped_data/101123_MK_B.Sc._Wima_2023_2024._Appendix.pdf

### document 1 ###
study_program: general
short_description: This document provides information about academic advice and services offered at the University of Mannheim, including support for prospective students, program ambassadors, learning and study techniques, financing options, changing programs, support for students with disabilities, family responsibilities, and queer students.
source: ./data/scraped_data/111www_uni-mannheim_de_en_academics_advice-and-services.html

### document 2 ###
study_program: M.Sc. Business Informatics
short_description: This document provides information on the Modulkatalo

In [44]:
len(documents_w_metadata)

261

In [45]:
documents_w_metadata[0]

Document(page_content='Bachelor of Science (B.Sc.)\n\n„Wirtschaftsmathematik“\n\nder Universität Mannheim\n\n– Modulkatalog –\n\nAppendix\n\nAkademisches Jahr\n\nHWS 2023 / FSS 2024\n\n1\n\nDie folgenden Veranstaltungen wurden nach Veröffentlichung des Modulkatalogs dem Kursprogramm hinzugefügt.\n\nModulnr\n\nName des Moduls\n\nSemester\n\nSprache\n\nMAA 409\n\nElemente der Funktionentheorie\n\nFSS\n\nDeutsch\n\nECTS\n\n5\n\nSeite\n\n3\n\n2\n\nMAA 409\n\nForm der Veranstaltung\n\nTyp der Veranstaltung\n\nModulniveau\n\nECTS\n\nArbeitsaufwand\n\nVorausgesetzte Kenntnisse\n\nLehrinhalte\n\nLern- und Kompetenzziele\n\nMedienformen\n\nBegleitende Literatur\n\nElemente der Funktionentheorie Introductory complex analysis\n\nVorlesung mit Übung\n\nWahlpflichtveranstaltung Mathematik A\n\nBachelor\n\n5\n\nPräsenzstudium: 42 h pro Semester (3 SWS)\n\nEigenstudium: 77 h pro Semester • davon Vor- und Nachbereitung der Veranstaltung und freies\n\nSelbststudium: 63 h pro Semester\n\ndavon Vorbereit

In [46]:
documents_w_metadata

[Document(page_content='Bachelor of Science (B.Sc.)\n\n„Wirtschaftsmathematik“\n\nder Universität Mannheim\n\n– Modulkatalog –\n\nAppendix\n\nAkademisches Jahr\n\nHWS 2023 / FSS 2024\n\n1\n\nDie folgenden Veranstaltungen wurden nach Veröffentlichung des Modulkatalogs dem Kursprogramm hinzugefügt.\n\nModulnr\n\nName des Moduls\n\nSemester\n\nSprache\n\nMAA 409\n\nElemente der Funktionentheorie\n\nFSS\n\nDeutsch\n\nECTS\n\n5\n\nSeite\n\n3\n\n2\n\nMAA 409\n\nForm der Veranstaltung\n\nTyp der Veranstaltung\n\nModulniveau\n\nECTS\n\nArbeitsaufwand\n\nVorausgesetzte Kenntnisse\n\nLehrinhalte\n\nLern- und Kompetenzziele\n\nMedienformen\n\nBegleitende Literatur\n\nElemente der Funktionentheorie Introductory complex analysis\n\nVorlesung mit Übung\n\nWahlpflichtveranstaltung Mathematik A\n\nBachelor\n\n5\n\nPräsenzstudium: 42 h pro Semester (3 SWS)\n\nEigenstudium: 77 h pro Semester • davon Vor- und Nachbereitung der Veranstaltung und freies\n\nSelbststudium: 63 h pro Semester\n\ndavon Vorberei

In [47]:
import pickle

In [48]:
file_path = "documents_w_metadata.pkl"

In [49]:
with open(file_path, "wb") as f:
    pickle.dump(documents_w_metadata, f)

## Split Documents and Create Embeddings

In [36]:
# split documents into text chunks

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunked_documents = text_splitter.split_documents(documents_w_metadata)

In [37]:
chunked_documents[1]

Document(page_content='Selbststudium: 63 h pro Semester\n\ndavon Vorbereitung für die Prüfung: 14 h pro Semester\n\nAnalysis I & II, Lineare Algebra I\n\nKomplexe Differenzierbarkeit • holomorphe und meromorphe Funktionen • Residuenkalkül\n\nFachkompetenz: • Wegintegrale im Komplexen (BK1) • Potenzreihenkalkül (BK1) • Fundamentalsatz der Algebra (BK1) • Cauchyscher Integralsatz und Integralformel (BF1, BK1) • Residuensatz (BK1, BO3)\n\nMethodenkompetenz: • Zusammenhang zwischen reeller und komplexer\n\nDifferenzierbarkeit (BF1, BO2) • Berechnen von Residuen (BO3) • Berechnen von reellen Integralen mit dem Residuensatz (BF1,\n\nBO3)\n\nVerständnis von lokalen Eigenschaften holomorpher Funktionen (BF1, BO2)\n\nPersonale Kompetenz: • Teamarbeit (BF4)\n\nPräsentationen mit Tafelanschrieb, Beamer und Folien', metadata={'study_program': 'B.Sc. Mathematics in Business and Economics', 'short_description': "This document provides information about the module 'Elemente der Funktionentheorie' in 

In [38]:
for d in chunked_documents:
    d.page_content = f"""Study Program: {d.metadata['study_program']}
Short Description: {d.metadata['short_description']}
Content: {d.page_content}
------------------------
"""

In [39]:
print(chunked_documents[1].page_content)

Study Program: B.Sc. Mathematics in Business and Economics
Short Description: This document provides information about the module 'Elemente der Funktionentheorie' in the Bachelor of Science in Business Mathematics program at the University of Mannheim for the academic year HWS 2023 / FSS 2024.
Content: Selbststudium: 63 h pro Semester

davon Vorbereitung für die Prüfung: 14 h pro Semester

Analysis I & II, Lineare Algebra I

Komplexe Differenzierbarkeit • holomorphe und meromorphe Funktionen • Residuenkalkül

Fachkompetenz: • Wegintegrale im Komplexen (BK1) • Potenzreihenkalkül (BK1) • Fundamentalsatz der Algebra (BK1) • Cauchyscher Integralsatz und Integralformel (BF1, BK1) • Residuensatz (BK1, BO3)

Methodenkompetenz: • Zusammenhang zwischen reeller und komplexer

Differenzierbarkeit (BF1, BO2) • Berechnen von Residuen (BO3) • Berechnen von reellen Integralen mit dem Residuensatz (BF1,

BO3)

Verständnis von lokalen Eigenschaften holomorpher Funktionen (BF1, BO2)

Personale Kompetenz

In [40]:
# # create chroma vector db with OpenAIEmbeddings

# vectordb = Chroma.from_documents(
#   chunked_documents,
#   embedding=OpenAIEmbeddings(),
#   persist_directory='./storage_scaled_w_metadata_2'
# )


  warn_deprecated(
Retrying langchain_community.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for text-embedding-ada-002 in organization org-pWqF0162ShAmQYFRz23kiL1H on tokens per min (TPM): Limit 1000000, Used 787943, Requested 306141. Please try again in 5.645s. Visit https://platform.openai.com/account/rate-limits to learn more..


RateLimitError: Rate limit reached for text-embedding-ada-002 in organization org-pWqF0162ShAmQYFRz23kiL1H on tokens per min (TPM): Limit 1000000, Used 694144, Requested 306141. Please try again in 17ms. Visit https://platform.openai.com/account/rate-limits to learn more.

In [41]:
persist_directory='./storage_scaled_w_metadata_2'

vectordb = Chroma(
    persist_directory=persist_directory, 
    embedding_function=OpenAIEmbeddings()
)

In [42]:
total_len = len(chunked_documents)

for i, d in enumerate(chunked_documents):
    print(f'embedding chunk {i}/{total_len}')
    try:
        vectordb.add_documents([d])
    except:
        print("waiting 15s")
        time.sleep(15)
        vectordb.add_documents([d])

embedding chunk 0/6715
embedding chunk 1/6715
embedding chunk 2/6715
embedding chunk 3/6715
embedding chunk 4/6715
embedding chunk 5/6715
embedding chunk 6/6715
embedding chunk 7/6715
embedding chunk 8/6715
embedding chunk 9/6715
embedding chunk 10/6715
embedding chunk 11/6715
embedding chunk 12/6715
embedding chunk 13/6715
embedding chunk 14/6715
embedding chunk 15/6715
embedding chunk 16/6715
embedding chunk 17/6715
embedding chunk 18/6715
embedding chunk 19/6715
embedding chunk 20/6715
embedding chunk 21/6715
embedding chunk 22/6715
embedding chunk 23/6715
embedding chunk 24/6715
embedding chunk 25/6715
embedding chunk 26/6715
embedding chunk 27/6715
embedding chunk 28/6715
embedding chunk 29/6715
embedding chunk 30/6715
embedding chunk 31/6715
embedding chunk 32/6715
embedding chunk 33/6715
embedding chunk 34/6715
embedding chunk 35/6715
embedding chunk 36/6715
embedding chunk 37/6715
embedding chunk 38/6715
embedding chunk 39/6715
embedding chunk 40/6715
embedding chunk 41/6715
em

embedding chunk 333/6715
embedding chunk 334/6715
embedding chunk 335/6715
embedding chunk 336/6715
embedding chunk 337/6715
embedding chunk 338/6715
embedding chunk 339/6715
embedding chunk 340/6715
embedding chunk 341/6715
embedding chunk 342/6715
embedding chunk 343/6715
embedding chunk 344/6715
embedding chunk 345/6715
embedding chunk 346/6715
embedding chunk 347/6715
embedding chunk 348/6715
embedding chunk 349/6715
embedding chunk 350/6715
embedding chunk 351/6715
embedding chunk 352/6715
embedding chunk 353/6715
embedding chunk 354/6715
embedding chunk 355/6715
embedding chunk 356/6715
embedding chunk 357/6715
embedding chunk 358/6715
embedding chunk 359/6715
embedding chunk 360/6715
embedding chunk 361/6715
embedding chunk 362/6715
embedding chunk 363/6715
embedding chunk 364/6715
embedding chunk 365/6715
embedding chunk 366/6715
embedding chunk 367/6715
embedding chunk 368/6715
embedding chunk 369/6715
embedding chunk 370/6715
embedding chunk 371/6715
embedding chunk 372/6715


embedding chunk 661/6715
embedding chunk 662/6715
embedding chunk 663/6715
embedding chunk 664/6715
embedding chunk 665/6715
embedding chunk 666/6715
embedding chunk 667/6715
embedding chunk 668/6715
embedding chunk 669/6715
embedding chunk 670/6715
embedding chunk 671/6715
embedding chunk 672/6715
embedding chunk 673/6715
embedding chunk 674/6715
embedding chunk 675/6715
embedding chunk 676/6715
embedding chunk 677/6715
embedding chunk 678/6715
embedding chunk 679/6715
embedding chunk 680/6715
embedding chunk 681/6715
embedding chunk 682/6715
embedding chunk 683/6715
embedding chunk 684/6715
embedding chunk 685/6715
embedding chunk 686/6715
embedding chunk 687/6715
embedding chunk 688/6715
embedding chunk 689/6715
embedding chunk 690/6715
embedding chunk 691/6715
embedding chunk 692/6715
embedding chunk 693/6715
embedding chunk 694/6715
embedding chunk 695/6715
embedding chunk 696/6715
embedding chunk 697/6715
embedding chunk 698/6715
embedding chunk 699/6715
embedding chunk 700/6715


Retrying langchain_community.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIError: Bad gateway. {"error":{"code":502,"message":"Bad gateway.","param":null,"type":"cf_bad_gateway"}} 502 {'error': {'code': 502, 'message': 'Bad gateway.', 'param': None, 'type': 'cf_bad_gateway'}} {'Date': 'Thu, 15 Feb 2024 18:10:26 GMT', 'Content-Type': 'application/json', 'Content-Length': '84', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '855f81e68c285d8c-FRA', 'alt-svc': 'h3=":443"; ma=86400'}.


embedding chunk 828/6715
embedding chunk 829/6715
embedding chunk 830/6715
embedding chunk 831/6715
embedding chunk 832/6715
embedding chunk 833/6715
embedding chunk 834/6715
embedding chunk 835/6715
embedding chunk 836/6715
embedding chunk 837/6715
embedding chunk 838/6715
embedding chunk 839/6715
embedding chunk 840/6715
embedding chunk 841/6715
embedding chunk 842/6715
embedding chunk 843/6715
embedding chunk 844/6715
embedding chunk 845/6715
embedding chunk 846/6715
embedding chunk 847/6715
embedding chunk 848/6715
embedding chunk 849/6715
embedding chunk 850/6715
embedding chunk 851/6715
embedding chunk 852/6715
embedding chunk 853/6715
embedding chunk 854/6715
embedding chunk 855/6715
embedding chunk 856/6715
embedding chunk 857/6715
embedding chunk 858/6715
embedding chunk 859/6715
embedding chunk 860/6715
embedding chunk 861/6715
embedding chunk 862/6715
embedding chunk 863/6715
embedding chunk 864/6715
embedding chunk 865/6715
embedding chunk 866/6715
embedding chunk 867/6715


embedding chunk 1150/6715
embedding chunk 1151/6715
embedding chunk 1152/6715
embedding chunk 1153/6715
embedding chunk 1154/6715
embedding chunk 1155/6715
embedding chunk 1156/6715
embedding chunk 1157/6715
embedding chunk 1158/6715
embedding chunk 1159/6715
embedding chunk 1160/6715
embedding chunk 1161/6715
embedding chunk 1162/6715
embedding chunk 1163/6715
embedding chunk 1164/6715
embedding chunk 1165/6715
embedding chunk 1166/6715
embedding chunk 1167/6715
embedding chunk 1168/6715
embedding chunk 1169/6715
embedding chunk 1170/6715
embedding chunk 1171/6715
embedding chunk 1172/6715
embedding chunk 1173/6715
embedding chunk 1174/6715
embedding chunk 1175/6715
embedding chunk 1176/6715
embedding chunk 1177/6715
embedding chunk 1178/6715
embedding chunk 1179/6715
embedding chunk 1180/6715
embedding chunk 1181/6715
embedding chunk 1182/6715
embedding chunk 1183/6715
embedding chunk 1184/6715
embedding chunk 1185/6715
embedding chunk 1186/6715
embedding chunk 1187/6715
embedding ch

embedding chunk 1466/6715
embedding chunk 1467/6715
embedding chunk 1468/6715
embedding chunk 1469/6715
embedding chunk 1470/6715
embedding chunk 1471/6715
embedding chunk 1472/6715
embedding chunk 1473/6715
embedding chunk 1474/6715
embedding chunk 1475/6715
embedding chunk 1476/6715
embedding chunk 1477/6715
embedding chunk 1478/6715
embedding chunk 1479/6715
embedding chunk 1480/6715
embedding chunk 1481/6715
embedding chunk 1482/6715
embedding chunk 1483/6715
embedding chunk 1484/6715
embedding chunk 1485/6715
embedding chunk 1486/6715
embedding chunk 1487/6715
embedding chunk 1488/6715
embedding chunk 1489/6715
embedding chunk 1490/6715
embedding chunk 1491/6715
embedding chunk 1492/6715
embedding chunk 1493/6715
embedding chunk 1494/6715
embedding chunk 1495/6715
embedding chunk 1496/6715
embedding chunk 1497/6715
embedding chunk 1498/6715
embedding chunk 1499/6715
embedding chunk 1500/6715
embedding chunk 1501/6715
embedding chunk 1502/6715
embedding chunk 1503/6715
embedding ch

embedding chunk 1782/6715
embedding chunk 1783/6715
embedding chunk 1784/6715
embedding chunk 1785/6715
embedding chunk 1786/6715
embedding chunk 1787/6715
embedding chunk 1788/6715
embedding chunk 1789/6715
embedding chunk 1790/6715
embedding chunk 1791/6715
embedding chunk 1792/6715
embedding chunk 1793/6715
embedding chunk 1794/6715
embedding chunk 1795/6715
embedding chunk 1796/6715
embedding chunk 1797/6715
embedding chunk 1798/6715
embedding chunk 1799/6715
embedding chunk 1800/6715
embedding chunk 1801/6715
embedding chunk 1802/6715
embedding chunk 1803/6715
embedding chunk 1804/6715
embedding chunk 1805/6715
embedding chunk 1806/6715
embedding chunk 1807/6715
embedding chunk 1808/6715
embedding chunk 1809/6715
embedding chunk 1810/6715
embedding chunk 1811/6715
embedding chunk 1812/6715
embedding chunk 1813/6715
embedding chunk 1814/6715
embedding chunk 1815/6715
embedding chunk 1816/6715
embedding chunk 1817/6715
embedding chunk 1818/6715
embedding chunk 1819/6715
embedding ch

embedding chunk 2098/6715
embedding chunk 2099/6715
embedding chunk 2100/6715
embedding chunk 2101/6715
embedding chunk 2102/6715
embedding chunk 2103/6715
embedding chunk 2104/6715
embedding chunk 2105/6715
embedding chunk 2106/6715
embedding chunk 2107/6715
embedding chunk 2108/6715
embedding chunk 2109/6715
embedding chunk 2110/6715
embedding chunk 2111/6715
embedding chunk 2112/6715
embedding chunk 2113/6715
embedding chunk 2114/6715
embedding chunk 2115/6715
embedding chunk 2116/6715
embedding chunk 2117/6715
embedding chunk 2118/6715
embedding chunk 2119/6715
embedding chunk 2120/6715
embedding chunk 2121/6715
embedding chunk 2122/6715
embedding chunk 2123/6715
embedding chunk 2124/6715
embedding chunk 2125/6715
embedding chunk 2126/6715
embedding chunk 2127/6715
embedding chunk 2128/6715
embedding chunk 2129/6715
embedding chunk 2130/6715
embedding chunk 2131/6715
embedding chunk 2132/6715
embedding chunk 2133/6715
embedding chunk 2134/6715
embedding chunk 2135/6715
embedding ch

embedding chunk 2414/6715
embedding chunk 2415/6715
embedding chunk 2416/6715
embedding chunk 2417/6715
embedding chunk 2418/6715
embedding chunk 2419/6715
embedding chunk 2420/6715
embedding chunk 2421/6715
embedding chunk 2422/6715
embedding chunk 2423/6715
embedding chunk 2424/6715
embedding chunk 2425/6715
embedding chunk 2426/6715
embedding chunk 2427/6715
embedding chunk 2428/6715
embedding chunk 2429/6715
embedding chunk 2430/6715
embedding chunk 2431/6715
embedding chunk 2432/6715
embedding chunk 2433/6715
embedding chunk 2434/6715
embedding chunk 2435/6715
embedding chunk 2436/6715
embedding chunk 2437/6715
embedding chunk 2438/6715
embedding chunk 2439/6715
embedding chunk 2440/6715
embedding chunk 2441/6715
embedding chunk 2442/6715
embedding chunk 2443/6715
embedding chunk 2444/6715
embedding chunk 2445/6715
embedding chunk 2446/6715
embedding chunk 2447/6715
embedding chunk 2448/6715
embedding chunk 2449/6715
embedding chunk 2450/6715
embedding chunk 2451/6715
embedding ch

embedding chunk 2730/6715
embedding chunk 2731/6715
embedding chunk 2732/6715
embedding chunk 2733/6715
embedding chunk 2734/6715
embedding chunk 2735/6715
embedding chunk 2736/6715
embedding chunk 2737/6715
embedding chunk 2738/6715
embedding chunk 2739/6715
embedding chunk 2740/6715
embedding chunk 2741/6715
embedding chunk 2742/6715
embedding chunk 2743/6715
embedding chunk 2744/6715
embedding chunk 2745/6715
embedding chunk 2746/6715
embedding chunk 2747/6715
embedding chunk 2748/6715
embedding chunk 2749/6715
embedding chunk 2750/6715
embedding chunk 2751/6715
embedding chunk 2752/6715
embedding chunk 2753/6715
embedding chunk 2754/6715
embedding chunk 2755/6715
embedding chunk 2756/6715
embedding chunk 2757/6715
embedding chunk 2758/6715
embedding chunk 2759/6715
embedding chunk 2760/6715
embedding chunk 2761/6715
embedding chunk 2762/6715
embedding chunk 2763/6715
embedding chunk 2764/6715
embedding chunk 2765/6715
embedding chunk 2766/6715
embedding chunk 2767/6715
embedding ch

embedding chunk 3046/6715
embedding chunk 3047/6715
embedding chunk 3048/6715
embedding chunk 3049/6715
embedding chunk 3050/6715
embedding chunk 3051/6715
embedding chunk 3052/6715
embedding chunk 3053/6715
embedding chunk 3054/6715
embedding chunk 3055/6715
embedding chunk 3056/6715
embedding chunk 3057/6715
embedding chunk 3058/6715
embedding chunk 3059/6715
embedding chunk 3060/6715
embedding chunk 3061/6715
embedding chunk 3062/6715
embedding chunk 3063/6715
embedding chunk 3064/6715
embedding chunk 3065/6715
embedding chunk 3066/6715
embedding chunk 3067/6715
embedding chunk 3068/6715
embedding chunk 3069/6715
embedding chunk 3070/6715
embedding chunk 3071/6715
embedding chunk 3072/6715
embedding chunk 3073/6715
embedding chunk 3074/6715
embedding chunk 3075/6715
embedding chunk 3076/6715
embedding chunk 3077/6715
embedding chunk 3078/6715
embedding chunk 3079/6715
embedding chunk 3080/6715
embedding chunk 3081/6715
embedding chunk 3082/6715
embedding chunk 3083/6715
embedding ch

embedding chunk 3362/6715
embedding chunk 3363/6715
embedding chunk 3364/6715
embedding chunk 3365/6715
embedding chunk 3366/6715
embedding chunk 3367/6715
embedding chunk 3368/6715
embedding chunk 3369/6715
embedding chunk 3370/6715
embedding chunk 3371/6715
embedding chunk 3372/6715
embedding chunk 3373/6715
embedding chunk 3374/6715
embedding chunk 3375/6715
embedding chunk 3376/6715
embedding chunk 3377/6715
embedding chunk 3378/6715
embedding chunk 3379/6715
embedding chunk 3380/6715
embedding chunk 3381/6715
embedding chunk 3382/6715
embedding chunk 3383/6715
embedding chunk 3384/6715
embedding chunk 3385/6715
embedding chunk 3386/6715
embedding chunk 3387/6715
embedding chunk 3388/6715
embedding chunk 3389/6715
embedding chunk 3390/6715
embedding chunk 3391/6715
embedding chunk 3392/6715
embedding chunk 3393/6715
embedding chunk 3394/6715
embedding chunk 3395/6715
embedding chunk 3396/6715
embedding chunk 3397/6715
embedding chunk 3398/6715
embedding chunk 3399/6715
embedding ch

embedding chunk 3678/6715
embedding chunk 3679/6715
embedding chunk 3680/6715
embedding chunk 3681/6715
embedding chunk 3682/6715
embedding chunk 3683/6715
embedding chunk 3684/6715
embedding chunk 3685/6715
embedding chunk 3686/6715
embedding chunk 3687/6715
embedding chunk 3688/6715
embedding chunk 3689/6715
embedding chunk 3690/6715
embedding chunk 3691/6715
embedding chunk 3692/6715
embedding chunk 3693/6715
embedding chunk 3694/6715
embedding chunk 3695/6715
embedding chunk 3696/6715
embedding chunk 3697/6715
embedding chunk 3698/6715
embedding chunk 3699/6715
embedding chunk 3700/6715
embedding chunk 3701/6715
embedding chunk 3702/6715
embedding chunk 3703/6715
embedding chunk 3704/6715
embedding chunk 3705/6715
embedding chunk 3706/6715
embedding chunk 3707/6715
embedding chunk 3708/6715
embedding chunk 3709/6715
embedding chunk 3710/6715
embedding chunk 3711/6715
embedding chunk 3712/6715
embedding chunk 3713/6715
embedding chunk 3714/6715
embedding chunk 3715/6715
embedding ch

Retrying langchain_community.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIError: The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID req_af30f6c0b686079809d23b7dcf0d119d in your message.) {
  "error": {
    "message": "The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID req_af30f6c0b686079809d23b7dcf0d119d in your message.)",
    "type": "server_error",
    "param": null,
    "code": null
  }
}
 500 {'error': {'message': 'The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the reque

embedding chunk 3847/6715
embedding chunk 3848/6715
embedding chunk 3849/6715
embedding chunk 3850/6715
embedding chunk 3851/6715
embedding chunk 3852/6715
embedding chunk 3853/6715
embedding chunk 3854/6715
embedding chunk 3855/6715
embedding chunk 3856/6715
embedding chunk 3857/6715
embedding chunk 3858/6715
embedding chunk 3859/6715
embedding chunk 3860/6715
embedding chunk 3861/6715
embedding chunk 3862/6715
embedding chunk 3863/6715
embedding chunk 3864/6715
embedding chunk 3865/6715
embedding chunk 3866/6715
embedding chunk 3867/6715
embedding chunk 3868/6715
embedding chunk 3869/6715
embedding chunk 3870/6715
embedding chunk 3871/6715
embedding chunk 3872/6715
embedding chunk 3873/6715
embedding chunk 3874/6715
embedding chunk 3875/6715
embedding chunk 3876/6715
embedding chunk 3877/6715
embedding chunk 3878/6715
embedding chunk 3879/6715
embedding chunk 3880/6715
embedding chunk 3881/6715
embedding chunk 3882/6715
embedding chunk 3883/6715
embedding chunk 3884/6715
embedding ch

embedding chunk 4163/6715
embedding chunk 4164/6715
embedding chunk 4165/6715
embedding chunk 4166/6715
embedding chunk 4167/6715
embedding chunk 4168/6715
embedding chunk 4169/6715
embedding chunk 4170/6715
embedding chunk 4171/6715
embedding chunk 4172/6715
embedding chunk 4173/6715
embedding chunk 4174/6715
embedding chunk 4175/6715
embedding chunk 4176/6715
embedding chunk 4177/6715
embedding chunk 4178/6715
embedding chunk 4179/6715
embedding chunk 4180/6715
embedding chunk 4181/6715
embedding chunk 4182/6715
embedding chunk 4183/6715
embedding chunk 4184/6715
embedding chunk 4185/6715
embedding chunk 4186/6715
embedding chunk 4187/6715
embedding chunk 4188/6715
embedding chunk 4189/6715
embedding chunk 4190/6715
embedding chunk 4191/6715
embedding chunk 4192/6715
embedding chunk 4193/6715
embedding chunk 4194/6715
embedding chunk 4195/6715
embedding chunk 4196/6715
embedding chunk 4197/6715
embedding chunk 4198/6715
embedding chunk 4199/6715
embedding chunk 4200/6715
embedding ch

embedding chunk 4479/6715
embedding chunk 4480/6715
embedding chunk 4481/6715
embedding chunk 4482/6715
embedding chunk 4483/6715
embedding chunk 4484/6715
embedding chunk 4485/6715
embedding chunk 4486/6715
embedding chunk 4487/6715
embedding chunk 4488/6715
embedding chunk 4489/6715
embedding chunk 4490/6715
embedding chunk 4491/6715
embedding chunk 4492/6715
embedding chunk 4493/6715
embedding chunk 4494/6715
embedding chunk 4495/6715
embedding chunk 4496/6715
embedding chunk 4497/6715
embedding chunk 4498/6715
embedding chunk 4499/6715
embedding chunk 4500/6715
embedding chunk 4501/6715
embedding chunk 4502/6715
embedding chunk 4503/6715
embedding chunk 4504/6715
embedding chunk 4505/6715
embedding chunk 4506/6715
embedding chunk 4507/6715
embedding chunk 4508/6715
embedding chunk 4509/6715
embedding chunk 4510/6715
embedding chunk 4511/6715
embedding chunk 4512/6715
embedding chunk 4513/6715
embedding chunk 4514/6715
embedding chunk 4515/6715
embedding chunk 4516/6715
embedding ch

embedding chunk 4795/6715
embedding chunk 4796/6715
embedding chunk 4797/6715
embedding chunk 4798/6715
embedding chunk 4799/6715
embedding chunk 4800/6715
embedding chunk 4801/6715
embedding chunk 4802/6715
embedding chunk 4803/6715
embedding chunk 4804/6715
embedding chunk 4805/6715
embedding chunk 4806/6715
embedding chunk 4807/6715
embedding chunk 4808/6715
embedding chunk 4809/6715
embedding chunk 4810/6715
embedding chunk 4811/6715
embedding chunk 4812/6715
embedding chunk 4813/6715
embedding chunk 4814/6715
embedding chunk 4815/6715
embedding chunk 4816/6715
embedding chunk 4817/6715
embedding chunk 4818/6715
embedding chunk 4819/6715
embedding chunk 4820/6715
embedding chunk 4821/6715
embedding chunk 4822/6715
embedding chunk 4823/6715
embedding chunk 4824/6715
embedding chunk 4825/6715
embedding chunk 4826/6715
embedding chunk 4827/6715
embedding chunk 4828/6715
embedding chunk 4829/6715
embedding chunk 4830/6715
embedding chunk 4831/6715
embedding chunk 4832/6715
embedding ch

embedding chunk 5111/6715
embedding chunk 5112/6715
embedding chunk 5113/6715
embedding chunk 5114/6715
embedding chunk 5115/6715
embedding chunk 5116/6715
embedding chunk 5117/6715
embedding chunk 5118/6715
embedding chunk 5119/6715
embedding chunk 5120/6715
embedding chunk 5121/6715
embedding chunk 5122/6715
embedding chunk 5123/6715
embedding chunk 5124/6715
embedding chunk 5125/6715
embedding chunk 5126/6715
embedding chunk 5127/6715
embedding chunk 5128/6715
embedding chunk 5129/6715
embedding chunk 5130/6715
embedding chunk 5131/6715
embedding chunk 5132/6715
embedding chunk 5133/6715
embedding chunk 5134/6715
embedding chunk 5135/6715
embedding chunk 5136/6715
embedding chunk 5137/6715
embedding chunk 5138/6715
embedding chunk 5139/6715
embedding chunk 5140/6715
embedding chunk 5141/6715
embedding chunk 5142/6715
embedding chunk 5143/6715
embedding chunk 5144/6715
embedding chunk 5145/6715
embedding chunk 5146/6715
embedding chunk 5147/6715
embedding chunk 5148/6715
embedding ch

embedding chunk 5427/6715
embedding chunk 5428/6715
embedding chunk 5429/6715
embedding chunk 5430/6715
embedding chunk 5431/6715
embedding chunk 5432/6715
embedding chunk 5433/6715
embedding chunk 5434/6715
embedding chunk 5435/6715
embedding chunk 5436/6715
embedding chunk 5437/6715
embedding chunk 5438/6715
embedding chunk 5439/6715
embedding chunk 5440/6715
embedding chunk 5441/6715
embedding chunk 5442/6715
embedding chunk 5443/6715
embedding chunk 5444/6715
embedding chunk 5445/6715
embedding chunk 5446/6715
embedding chunk 5447/6715
embedding chunk 5448/6715
embedding chunk 5449/6715
embedding chunk 5450/6715
embedding chunk 5451/6715
embedding chunk 5452/6715
embedding chunk 5453/6715
embedding chunk 5454/6715
embedding chunk 5455/6715
embedding chunk 5456/6715
embedding chunk 5457/6715
embedding chunk 5458/6715
embedding chunk 5459/6715
embedding chunk 5460/6715
embedding chunk 5461/6715
embedding chunk 5462/6715
embedding chunk 5463/6715
embedding chunk 5464/6715
embedding ch

embedding chunk 5743/6715
embedding chunk 5744/6715
embedding chunk 5745/6715
embedding chunk 5746/6715
embedding chunk 5747/6715
embedding chunk 5748/6715
embedding chunk 5749/6715
embedding chunk 5750/6715
embedding chunk 5751/6715
embedding chunk 5752/6715
embedding chunk 5753/6715
embedding chunk 5754/6715
embedding chunk 5755/6715
embedding chunk 5756/6715
embedding chunk 5757/6715
embedding chunk 5758/6715
embedding chunk 5759/6715
embedding chunk 5760/6715
embedding chunk 5761/6715
embedding chunk 5762/6715
embedding chunk 5763/6715
embedding chunk 5764/6715
embedding chunk 5765/6715
embedding chunk 5766/6715
embedding chunk 5767/6715
embedding chunk 5768/6715
embedding chunk 5769/6715
embedding chunk 5770/6715
embedding chunk 5771/6715
embedding chunk 5772/6715
embedding chunk 5773/6715
embedding chunk 5774/6715
embedding chunk 5775/6715
embedding chunk 5776/6715
embedding chunk 5777/6715
embedding chunk 5778/6715
embedding chunk 5779/6715
embedding chunk 5780/6715
embedding ch

embedding chunk 6059/6715
embedding chunk 6060/6715
embedding chunk 6061/6715
embedding chunk 6062/6715
embedding chunk 6063/6715
embedding chunk 6064/6715
embedding chunk 6065/6715
embedding chunk 6066/6715
embedding chunk 6067/6715
embedding chunk 6068/6715
embedding chunk 6069/6715
embedding chunk 6070/6715
embedding chunk 6071/6715
embedding chunk 6072/6715
embedding chunk 6073/6715
embedding chunk 6074/6715
embedding chunk 6075/6715
embedding chunk 6076/6715
embedding chunk 6077/6715
embedding chunk 6078/6715
embedding chunk 6079/6715
embedding chunk 6080/6715
embedding chunk 6081/6715
embedding chunk 6082/6715
embedding chunk 6083/6715
embedding chunk 6084/6715
embedding chunk 6085/6715
embedding chunk 6086/6715
embedding chunk 6087/6715
embedding chunk 6088/6715
embedding chunk 6089/6715
embedding chunk 6090/6715
embedding chunk 6091/6715
embedding chunk 6092/6715
embedding chunk 6093/6715
embedding chunk 6094/6715
embedding chunk 6095/6715
embedding chunk 6096/6715
embedding ch

embedding chunk 6375/6715
embedding chunk 6376/6715
embedding chunk 6377/6715
embedding chunk 6378/6715
embedding chunk 6379/6715
embedding chunk 6380/6715
embedding chunk 6381/6715
embedding chunk 6382/6715
embedding chunk 6383/6715
embedding chunk 6384/6715
embedding chunk 6385/6715
embedding chunk 6386/6715
embedding chunk 6387/6715
embedding chunk 6388/6715
embedding chunk 6389/6715
embedding chunk 6390/6715
embedding chunk 6391/6715
embedding chunk 6392/6715
embedding chunk 6393/6715
embedding chunk 6394/6715
embedding chunk 6395/6715
embedding chunk 6396/6715
embedding chunk 6397/6715
embedding chunk 6398/6715
embedding chunk 6399/6715
embedding chunk 6400/6715
embedding chunk 6401/6715
embedding chunk 6402/6715
embedding chunk 6403/6715
embedding chunk 6404/6715
embedding chunk 6405/6715
embedding chunk 6406/6715
embedding chunk 6407/6715
embedding chunk 6408/6715
embedding chunk 6409/6715
embedding chunk 6410/6715
embedding chunk 6411/6715
embedding chunk 6412/6715
embedding ch

embedding chunk 6691/6715
embedding chunk 6692/6715
embedding chunk 6693/6715
embedding chunk 6694/6715
embedding chunk 6695/6715
embedding chunk 6696/6715
embedding chunk 6697/6715
embedding chunk 6698/6715
embedding chunk 6699/6715
embedding chunk 6700/6715
embedding chunk 6701/6715
embedding chunk 6702/6715
embedding chunk 6703/6715
embedding chunk 6704/6715
embedding chunk 6705/6715
embedding chunk 6706/6715
embedding chunk 6707/6715
embedding chunk 6708/6715
embedding chunk 6709/6715
embedding chunk 6710/6715
embedding chunk 6711/6715
embedding chunk 6712/6715
embedding chunk 6713/6715
embedding chunk 6714/6715


In [43]:
vectordb.persist()

In [2]:
persist_directory='./storage_scaled_w_metadata_2'
vectordb = Chroma(persist_directory=persist_directory, embedding_function=OpenAIEmbeddings())

  warn_deprecated(


In [7]:
# Create Prompt
template = """

If the question does not contain a study program, say that you need more information about the study program to answer the question.

Use the following pieces of context to answer the question at the end.

Execute these steps:
1 - list the context
2 - focus on words like "optional" or "can" for your answer
3 - answer the question. Do not use information outside of the context to answer the question.

Your answer should have this format:

context:
answer:

------------------------
Context: {context}

Question: {question}

"""

custom_prompt = PromptTemplate.from_template(template)

In [9]:
qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model='gpt-3.5-turbo', temperature=0),
    retriever=vectordb.as_retriever(search_kwargs={'k': 5}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": custom_prompt}
)

## Read Questions and Answer

In [10]:
df_questions = pd.read_csv('TestQuestions.csv', delimiter=";", names=["Question", "Response"], skiprows=1)
questions = df_questions["Question"]

responses = []
counter = 0

for q in questions:
    print(f'q{counter} start')
    
    # get result
    result_object = qa_chain({'query': q})
    r = result_object['result']
    
    # get source documents
    source_docs = result_object['source_documents']
    sources = []
    for doc in source_docs:
        sources.append(doc.metadata["source"].replace('./data/scraped_data/', ''))
    source = ",".join(sources)
    
    # build row
    responses.append((q, r, source))
    
    print(f'q{counter} end')
    counter += 1

df_responses = pd.DataFrame(responses, columns=["Question", "Response", "Source"])

q0 start
q0 end
q1 start
q1 end
q2 start
q2 end
q3 start
q3 end
q4 start
q4 end
q5 start
q5 end
q6 start
q6 end
q7 start
q7 end
q8 start
q8 end
q9 start
q9 end
q10 start
q10 end
q11 start
q11 end
q12 start
q12 end
q13 start
q13 end
q14 start
q14 end
q15 start
q15 end
q16 start
q16 end
q17 start
q17 end
q18 start
q18 end
q19 start
q19 end
q20 start
q20 end
q21 start
q21 end
q22 start
q22 end
q23 start
q23 end


In [11]:
df_responses

Unnamed: 0,Question,Response,Source
0,Question,"Since the question does not specify a study program, I need more information about the study program to answer the question.","PO_MSc_MMBR_2018_EN.pdf,PO_MSc_MMBR_2018_EN.pdf,PO_MA_Sowi_PolSci_Soc_2017_neu_2019_en.pdf,PO_MSc_MMBR_2018_EN.pdf,PO_MSc_MMBR_2018_EN.pdf"
1,Can I change any modules?,"Answer: \n\ncontext: Study Program: M.Sc. Business Informatics\nShort Description: This document provides detailed information about the Master's Program in Business Informatics at the University of Mannheim, including study organization, examination regulations, module changes, seminar registrations, scientific research course, semester abroad opportunities, degree plans, course schedules, and advisory services.\n\nanswer: Only students enrolled before the fall semester 2018 can change modules. Requests must be submitted in writing to the examination committee, indicating necessary information such as address, student ID number, and semester of enrollment. Changing a module that has not been passed is possible, but unsuccessful examination attempts will be transferred to the new module. It is important to note that a module change does not affect the requirements set in the examination regulations.","www.wim.uni-mannheim.de_en_academics_organizing-your-studies_msc-business-informatics_general-questions.html,www.wim.uni-mannheim.de_studium_studienorganisation_m-sc-business-informatics_general-questions.html,Antrag_Modulwechsel.pdf,Modulwechsel.pdf,www.wim.uni-mannheim.de_en_academics_organizing-your-studies_teacher-education-programs.html"
2,How am I assigned a team project?,Answer: \ncontext: \nThe students solve a practical problem as a team. The participants have to analyze and refine the problem and come up with a project plan for developing a concrete solution that will be carried out by the team over the duration of a whole year. Concrete topics for projects are defined by the supervisors and offered to the students who can apply for different topics. Problem area and techniques involved depend on the expertise of the offering chair.\n\nanswer:\nStudents are assigned a team project by applying for different topics defined by supervisors and offering chairs.,"MK_MSc_Wifo__2023_24_25102023.pdf,MK_MSc_Wifo__2023_24_25102023.pdf,MK_MSc_Wifo__2023_24_25102023.pdf,MK_MSc_Wifo__2023_24_25102023.pdf,MK_MSc_Wifo__2023_24_25102023.pdf"
3,Do I need to finish all of the exams before starting my master thesis?,Answer: You are formally only required to accumulate at least 60 ECTS credits before starting your thesis.,"www.wim.uni-mannheim.de_studium_studienorganisation_mannheim-master-in-data-science_general-questions.html,www.wim.uni-mannheim.de_en_academics_organizing-your-studies_mannheim-master-in-data-science_general-questions.html,PO_MSc_MMDS_2017_2Satzung_EN.pdf,PO_MSc_MMDS_2020_EN.pdf,PO_MSc_MMM_2017_EN.pdf"
4,Is it necessary to complete all my exams prior to beginning my master's thesis?,"Answer: \n\nStudy Program: other\n\nNo, it is not necessary to complete all exams prior to beginning the master's thesis.","PO_MA_Sowi_PolSci_Soc_2017_neu_2019_en.pdf,PO_MA_Sowi_PolSci_Soc_2017_neu_2019_en.pdf,PO_MA_Sowi_PolSci_Soc_2017_en.pdf,PO_MA_Sowi_PolSci_Soc_2017_neu_2019_en.pdf,PO_MA_PolSci_Soc_2023_en.pdf"
5,Can I start my master thesis even though I haven't done all of my exams?,"Answer: You are formally only required to accumulate at least 60 ECTS credits before starting your thesis, so you can start your master thesis even if you haven't completed all of your exams.","www.wim.uni-mannheim.de_studium_studienorganisation_mannheim-master-in-data-science_general-questions.html,www.wim.uni-mannheim.de_en_academics_organizing-your-studies_mannheim-master-in-data-science_general-questions.html,PO_MSc_MMM_2017_EN.pdf,PO_MSc_MMDS_2017_2Satzung_EN.pdf,PO_MSc_MMDS_2017_2Satzung_EN.pdf"
6,How come I'm still enrolled in the initial exam despite switching modules?,You need more information about the study program to answer the question.,"PO_MCBL_2021_en.pdf,PO_MCBL_2021_en.pdf,PO_MSc_MMM_2017_EN.pdf,PO_MCBL_2021_en.pdf,PO_MCBL_2021_en.pdf"
7,How can I cheat on the exam?,Answer: I need more information about the study program to answer the question.,"PO_MSc_MMDS_2020_EN.pdf,PO_MSc_MMDS_2017_2Satzung_EN.pdf,PO_MSc_MMM_2017_EN.pdf,PO_MSc_MMM_2017_EN.pdf,PO_MSc_MMM_2017_EN.pdf"
8,What are the prerequisites in order to study the master of political science?,"Answer: \ncontext: \n- Zugangs- und Zulassungsvoraussetzungen\n\nanswer:\n- Successful completion of a bachelor's degree in Political Science or an equivalent degree with at least 180 ECTS points or a minimum of 6 semesters or 3 years of study, with a minimum overall grade of 2.5.","satzung_ma_politik.pdf,PO_MA_Sowi_PolSci_Soc_2017_en.pdf,PO_MA_PolSci_Soc_2023_en.pdf,PO_MA_PolSci_Soc_2023_en.pdf,PO_MA_Sowi_PolSci_Soc_2017_en.pdf"
9,What is the worst course in the Master of Data Science Program?,Answer: I need more information about the study program to answer the question.,"www.wim.uni-mannheim.de_studium_studienorganisation_mannheim-master-in-data-science_recognition-of-coursework-and-examinations.html,www.wim.uni-mannheim.de_en_academics_organizing-your-studies_mannheim-master-in-data-science_extension-of-deadlines.html,www.wim.uni-mannheim.de_studium_studienorganisation_mannheim-master-in-data-science_extension-of-deadlines.html,www.wim.uni-mannheim.de_en_academics_organizing-your-studies_mannheim-master-in-data-science_learning-agreements.html,www.wim.uni-mannheim.de_studium_studienorganisation_mannheim-master-in-data-science_learning-agreements.html"


### Save Responses

In [12]:
df_responses.to_csv("test_responses_scaled_w_metadata_w_source_custom_prompt.csv", sep=";")