# Chat with embedded Websites
* need to run the Website embeddings ipynb first
* this notebook aims to compare the different prompt and output parser
* Note that we should compare different LLM models performance from huggingface to save the cost

# Information Retrieval
* https://www.youtube.com/watch?v=KUDn7bVyIfc&list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ&index=26

In [2]:
import sys
sys.path.append("..") # Adds higher directory to python module path
from dotenv import find_dotenv, load_dotenv

load_dotenv(find_dotenv())

In [3]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings() # text-embedding-ada-002

persist_directory = 'vectorDatabase'

## Here is the new embedding being used
embedding = embeddings

# Now we can load the persisted database from disk, and use it as normal. 
vectordb = Chroma(persist_directory=persist_directory, 
                  embedding_function=embedding)

In [4]:
# Here is the key, we pass the db as the retriever
# K cannot be too small, this will impact the performance -> this k is all the {context} the LLM will have
retriever = vectordb.as_retriever(search_type='similarity', search_kwargs={"k":3}) # force only return 3 results

# Make Chain

In [5]:
from langchain.chains import RetrievalQA 
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

In [7]:
## Cite sources
import textwrap

def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

def process_llm_response(llm_response):
    print(wrap_text_preserve_newlines(llm_response['result']))
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])


In [1]:
from langchain.llms import OpenAI

In [2]:
llm=OpenAI(temperature=0)

In [3]:
llm

OpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.completion.Completion'>, model_name='text-davinci-003', temperature=0.0, max_tokens=256, top_p=1, frequency_penalty=0, presence_penalty=0, n=1, best_of=1, model_kwargs={}, openai_api_key='"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" #my open AI key', openai_api_base='', openai_organization='', openai_proxy='', batch_size=20, request_timeout=None, logit_bias={}, max_retries=6, streaming=False, allowed_special=set(), disallowed_special='all', tiktoken_model_name=None)

In [8]:
# https://platform.openai.com/docs/models/gpt-3-5
chat=ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo-0613') 

## Compare different ways of setting up prompt - SystemMessagePromptTemplate and Human MessagePromptTemplate
1. chain_type_kwargs
2. qa_chain.combine_documents_chain.llm_chain.prompt.messages[0].prompt.template


Method 1 & Method 2 are exactly the same

Method 3 outperforms others!

The systemMessagePromptTempalte and HumanMessagePromptTemplate is different for this 3 approaches

In [9]:
customer_facing_template = '''You are a education consultant named TaiGer who specializes study programs in Germany.
you provide up-to-date and accurate information for students who want to apply German study programs.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Always answer from the perspective of being TaiGer 
{context}
'''

### Method 1: ChatPromptTemplate.from_template & chain_type_kwargs

In [22]:
customer_prompt_template = ChatPromptTemplate.from_template(customer_facing_template)

In [23]:
customer_prompt_template # you can check here, 

ChatPromptTemplate(input_variables=['context'], output_parser=None, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], output_parser=None, partial_variables={}, template="You are a education consultant named TaiGer who specializes study programs in Germany.\nyou provide up-to-date and accurate information for students who want to apply German study programs.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\nAlways answer from the perspective of being TaiGer \n{context}\n", template_format='f-string', validate_template=True), additional_kwargs={})])

In [24]:
customer_qa_chain_1 = RetrievalQA.from_chain_type(llm=chat, 
                                       chain_type="stuff",
                                       retriever=retriever,
                                       chain_type_kwargs = {"prompt":customer_prompt_template},
                                       return_source_documents=True)

In [36]:
# This is HumanMessagePrompt!!!
print(customer_qa_chain_1.combine_documents_chain.llm_chain.prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], output_parser=None, partial_variables={}, template="You are a education consultant named TaiGer who specializes study programs in Germany.\nyou provide up-to-date and accurate information for students who want to apply German study programs.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\nAlways answer from the perspective of being TaiGer \n{context}\n", template_format='f-string', validate_template=True), additional_kwargs={})]


### Method 2: ChatPromptTemplate.from_messages & chain_type_kwargs

In [10]:
system_message_prompt = SystemMessagePromptTemplate.from_template(customer_facing_template)

In [11]:
# this part is insane, if i miss this, things will get very werid.
# https://python.langchain.com/docs/modules/model_io/prompts/prompt_templates/#chat-prompt-template
human_template = '''{question}'''
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [12]:
chat_prompt = ChatPromptTemplate.from_messages([
    system_message_prompt,
    human_message_prompt
])

In [40]:
chat_prompt

ChatPromptTemplate(input_variables=['context', 'question'], output_parser=None, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], output_parser=None, partial_variables={}, template="You are a education consultant named TaiGer who specializes study programs in Germany.\nyou provide up-to-date and accurate information for students who want to apply German study programs.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\nAlways answer from the perspective of being TaiGer \n{context}\n", template_format='f-string', validate_template=True), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], output_parser=None, partial_variables={}, template='{question}', template_format='f-string', validate_template=True), additional_kwargs={})])

In [13]:
customer_qa_chain_2 = RetrievalQA.from_chain_type(llm=chat, 
                                       chain_type="stuff",
                                       retriever=retriever,
                                       chain_type_kwargs = {"prompt":chat_prompt},
                                       return_source_documents=True)

In [42]:
# This is SystemMessagePromptTemplate!!!
print(customer_qa_chain_2.combine_documents_chain.llm_chain.prompt.messages)

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], output_parser=None, partial_variables={}, template="You are a education consultant named TaiGer who specializes study programs in Germany.\nyou provide up-to-date and accurate information for students who want to apply German study programs.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\nAlways answer from the perspective of being TaiGer \n{context}\n", template_format='f-string', validate_template=True), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], output_parser=None, partial_variables={}, template='{question}', template_format='f-string', validate_template=True), additional_kwargs={})]


### Method3: customer_qa_chain_2.combine_documents_chain.llm_chain.prompt.messages[0].prompt.template

In [43]:
customer_qa_chain_3 = RetrievalQA.from_chain_type(llm=chat, 
                                       chain_type="stuff",
                                       retriever=retriever,
                                       return_source_documents=True)

In [44]:
# The performance of this is much better
# https://www.youtube.com/watch?v=KUDn7bVyIfc&list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ&index=27
customer_qa_chain_3.combine_documents_chain.llm_chain.prompt.messages[0].prompt.template = customer_facing_template

In [45]:
# This one has the same SystemMessagePromptTemplate but Keep the HumanMessagePromptTempalte
print(customer_qa_chain_3.combine_documents_chain.llm_chain.prompt.messages)

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], output_parser=None, partial_variables={}, template="You are a education consultant named TaiGer who specializes study programs in Germany.\nyou provide up-to-date and accurate information for students who want to apply German study programs.\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\nAlways answer from the perspective of being TaiGer \n{context}\n", template_format='f-string', validate_template=True), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], output_parser=None, partial_variables={}, template='{question}', template_format='f-string', validate_template=True), additional_kwargs={})]


In [46]:
print(customer_qa_chain_3.combine_documents_chain.llm_chain.prompt.messages[0].prompt.template)

You are a education consultant named TaiGer who specializes study programs in Germany.
you provide up-to-date and accurate information for students who want to apply German study programs.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Always answer from the perspective of being TaiGer 
{context}



## Compare the performance - with the different method

In [14]:
query_1 = "what is TUM Informatics about?"

In [26]:
# full example
llm_response = customer_qa_chain_1(query_1)
process_llm_response(llm_response)

As an education consultant specializing in study programs in Germany, I can provide up-to-date and accurate
information about the Master Informatics program at the TUM School of Computation, Information and Technology
at the Technical University of Munich.

The Master Informatics program is a research-oriented degree that builds upon the fundamentals of computer
science acquired during undergraduate studies. It offers the opportunity for individual specialization in one
or more subject areas. The program is taught in English and has a standard duration of 4 semesters, with a
total of 120 ECTS credits.

The main location for the program is in Garching, and it can be started in either the summer or winter
semester. The program is available for both full-time and part-time study. It is important to note that there
are semester fees, but no tuition fees for this program.

For questions about the application process, you can contact the TUM School of Computation, Information and
Technology 

In [41]:
llm_response['result']

"As an education consultant specializing in study programs in Germany, I can provide up-to-date and accurate information about the Master Informatics program at the TUM School of Computation, Information and Technology at the Technical University of Munich.\n\nThe Master Informatics program is a research-oriented degree that builds upon the fundamentals of computer science acquired during undergraduate studies. It offers the opportunity for individual specialization in one or more subject areas. The program is taught in English and has a standard duration of 4 semesters, with a total of 120 ECTS credits.\n\nThe main location for the program is in Garching, and it can be started in either the summer or winter semester. The program is available for both full-time and part-time study. It is important to note that there are semester fees, but no tuition fees for this program.\n\nFor questions about the application process, you can contact the TUM School of Computation, Information and Tech

In [15]:
llm_response = customer_qa_chain_2(query_1)
process_llm_response(llm_response)

TUM Informatics, also known as the Master's in Informatics program at the Technical University of Munich, is a
research-oriented degree program that focuses on the field of computer science. It is designed for students
who have already completed their undergraduate studies in computer science or a related field and want to
further deepen their knowledge and skills in informatics.

The program offers a wide range of courses and research opportunities in various areas of informatics,
including but not limited to artificial intelligence, data science, software engineering, computer graphics,
and human-computer interaction. Students have the flexibility to choose their own specialization based on
their interests and career goals.

The program is taught in English, making it accessible to international students. It has a standard duration
of four semesters and is available on a full-time or part-time basis. The curriculum is structured to provide
a solid foundation in computer science princ

In [24]:
def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

In [None]:
llm_response = customer_qa_chain_3(query_1)
process_llm_response(llm_response)

In [None]:
query_2 = "when is deadline for applying this program?"

In [None]:
# full example
llm_response = customer_qa_chain_1(query_2)
process_llm_response(llm_response)

In [None]:
llm_response = customer_qa_chain_2(query_2)
process_llm_response(llm_response)

In [None]:
llm_response = customer_qa_chain_3(query_2)
process_llm_response(llm_response)

In [None]:
query_3 = "what application documents are required to apply for this program?"

In [None]:
# full example
llm_response = customer_qa_chain_1(query_3)
process_llm_response(llm_response)

In [None]:
llm_response = customer_qa_chain_2(query_3)
process_llm_response(llm_response)

In [None]:
llm_response = customer_qa_chain_3(query_3)
process_llm_response(llm_response)

In [None]:
# full example -> i want this to be off!
query_4 = "do you like Mission impossible 7?"
llm_response = customer_qa_chain_1(query_4)
process_llm_response(llm_response)

In [None]:
llm_response = customer_qa_chain_2(query_4)
process_llm_response(llm_response)

In [None]:
llm_response = customer_qa_chain_3(query_4)
process_llm_response(llm_response)

## Pydantic Object

In [63]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from typing import List, Optional
from datetime import datetime
import pandas as pd

# Make an RetrievalQA chain

In [69]:
from langchain.callbacks import get_openai_callback


## Make a fundtion

In [64]:
def add_source_into_cell(data_dict, sources):
    '''
    add the source into the cell, update the dataframe
    '''
    updated_dict = {key: f"{value}, source = {sources}" for key, value in data_dict.items()}
    return updated_dict


In [74]:
def retreival_qa_chain_wrapper(university_name, program_name, pydanticObject, retriever, chat_model, query, verbose=False):
    '''
    This is just a wrapper function to avoid repeated patten code
    This function includes the system and message message -> TBD: can think of to take it out later
    TODO: if it makes snese to pass the university_name and program_name
    '''
        
    system_message_template = '''
    You are a education consultant named TaiGer who specializes study programs.
    You are helping taiwanese students to apply study programs. so please be explicit include the requirement for taiwanese students
    you provides the information based on the program and university that student want to apply. do not answer other programs' inforamtion
    
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Always answer from the perspective of being TaiGer 
    ----
    {context}
    '''

    human_tamplate = '''
    I am student from Taiwan who would like to apply for {program_name} at {university_name}
    extract the above information in the following format
    \n{format_instructions}\n
    '''

    # Initialize the output_parser from the pydantic object
    output_parser = PydanticOutputParser(pydantic_object=pydanticObject)

    # This is how the RetrievalQA chain
    prompt = ChatPromptTemplate(
        messages=[
            SystemMessagePromptTemplate.from_template(system_message_template),
            HumanMessagePromptTemplate.from_template(human_tamplate)  
        ],
        partial_variables={"format_instructions": output_parser.get_format_instructions(), 
                           "university_name":university_name, 
                           "program_name":program_name},
        output_parser=output_parser # here we add the output parser to the Prompt template
    )

    with get_openai_callback() as cb:
        # Form RetrievalQA chain
        chain = RetrievalQA.from_chain_type(llm=chat_model, 
                                        chain_type="stuff", # stuff cost you less.
                                        retriever=retriever,
                                        chain_type_kwargs = {"prompt":prompt},
                                        return_source_documents=True)

        llm_response = chain(query) #by-pass the query here
    print(f"Total Tokens: {cb.total_tokens}")
    print(f"Prompt Tokens: {cb.prompt_tokens}")
    print(f"Completion Tokens: {cb.completion_tokens}")
    print(f"Successful Requests: {cb.successful_requests}")
    print(f"Total Cost (USD): ${cb.total_cost}")

    # results -> use output parser to parse
    result_dict = output_parser.parse(llm_response['result']).dict() # this covert the pydantic object to dict

    source_list = []
    for source in llm_response["source_documents"]:
        source_list.append(source.metadata['source'])

    # print
    if verbose: 
        process_llm_response(llm_response)

    #todo: here need to clean up the output and make it json or pandas like!
    #remember that the output source need to be kept as well for traceability!
    return result_dict, source_list
    
    

In [75]:
def process_program_information(university_name,
                                program_name,
                                chat_model, 
                                retriever, 
                                verbose=False):
    '''
    process program inforamtion based on the given university + program name
    TODO: Break each session into a smaller unit so i can test the prompt
    return pandas dataframe
    '''

    # Set up a prefix prompt
    prefix_template =  "I am student from Taiwan who would like to apply for {program_name} at {university_name}.\n"
    prefix_prompt = PromptTemplate.from_template(prefix_template)
    formatted_prefix_prompt = prefix_prompt.format(program_name=program_name, university_name=university_name)

    # 1. Get ProgramBase Info
    class ProgramBaseInfo(BaseModel):
        university_name: str                = Field(description='This is the name of the university')
        program_name: str                   = Field(description='This is the name of the program')
        degree: str                         = Field(description='This describes what is type of this degree. please also tell is a science or art related degree')
        country: str                        = Field(description='This is the country of this university')
        tuition_fee: Optional[str]               = Field(description='This describes the tuition fee of this program per semester')


    # do not need the source
    query_base_info = formatted_prefix_prompt + "Can you tell me information about this program?"
    programBaseinfo_result, _ = retreival_qa_chain_wrapper(university_name, program_name, ProgramBaseInfo, retriever, chat, query_base_info, verbose)
    
    # 2. Get Application Period Info
    class ApplicationPeriod(BaseModel):
        semester: str                       = Field(description='Thsi describe does this program has winster or summer semester, or both. Please output WS/SS/Both')
        winter_semester_application_start: str                   = Field(description="This is the applicaiton start date for applying this program for Winter semester")
        winter_semester_application_deadline: str                = Field(description="This is the applicaiton deadline for applying this program for Winter semester")
        summer_semester_application_start: Optional[str]                   = Field(description="This is the applicaiton start date for applying this program for Summer semester")
        summer_semester_application_deadline: Optional[str]                = Field(description="This is the applicaiton deadline for applying this program for Summer semester")
    
    # do not need the source
    query_application_period = formatted_prefix_prompt + "Can you tell me the application period of this program?"
    applicationPeriod_result, _ = retreival_qa_chain_wrapper(university_name, program_name, ApplicationPeriod, retriever, chat, query_application_period, verbose)

    # 3. Get the Required Document Info
    class RequiredDocument(BaseModel):
        ml_required: str                    = Field(description='This describes whether this programs requires Motiviation letter for application, provide yes, no, unknown')
        ml_requirement: Optional[str]                 = Field(description='This describes the detailed requirement for the motivation letter e.g. number of words')
        rl_required: str                    = Field(description='This describes whether this programs requires Recommendation letter for application, provide yes, no, unknown')
        rl_requirement: Optional[str]                 = Field(description='This describes the detailed requirement for the Recommendation letter e.g. how many recommendation letters are required?')
        essay_required: str                 = Field(description='This describes whether this programs requires Scientific essay for application, provide yes, no, unknown')
        essay_requirement: Optional[str]              = Field(description='This describes the detailed requirement for the Scientific essay e.g. what is the topic? how many words is required?')
        supplementary_form_required: str    = Field(description='This describes whether this programs requires any supplementary form for application, provide yes, no, unknown')
        supplementary_form_requirement: Optional[str] = Field(description='This describes the detailed requirement for the supplementary form')
    
    # need source of this
    query_required_documents = formatted_prefix_prompt + "Can you tell me all the requird application documents for applying this program?"
    requiredDocument_result, sources = retreival_qa_chain_wrapper(university_name, program_name, RequiredDocument, retriever, chat, query_required_documents, verbose)
    requiredDocument_result = add_source_into_cell(requiredDocument_result, sources[0]) # add the source into it

    # 4. Get the Required Language Info
    class RequiredLanguage(BaseModel):
        language: str                       = Field(description='This describe the official teaching language for this program')
        toefl: str                          = Field(description="This is the required TOEFL score for applying this program")
        ielts: str                          = Field(description="This is the required IELTS score for applying this program")
        gre: str                            = Field(description='This is the required GRE score for applying this program')
        gmat: str                           = Field(description='This is the required GMAT score for applying this program')
        testdaf: str                        = Field(description='If this program is taught in German, this is the required TESTDAF score for applying this program')

    # don't need the source
    query_required_language = formatted_prefix_prompt + "Can you tell me the required TOFEL/IELTS/GRE/GMAT score to apply this program?"
    requiredLanguage_result, _  = retreival_qa_chain_wrapper(university_name, program_name, RequiredLanguage,  retriever, chat,query_required_language, verbose)


    # 5. Get the required academic performance info
    class RequiredAcademicPerf(BaseModel):
        gpa: Optional[str]          =  Field(description='This describes the minimal required GPA to apply for this program, please output the full details, not just the number')
        ects: Optional[str]           = Field(description='This describes required ECTS in any field to apply for this program e.g. required 30 ECTs in linear algebra')
    
    # need source of this
    query_academic_perf = formatted_prefix_prompt + "Can you tell me the detailed academic requirement e.g. required ETCs to apply this program?"
    requiredAcademicPerf_result, sources  = retreival_qa_chain_wrapper(university_name, program_name, RequiredAcademicPerf, retriever, chat, query_academic_perf, verbose)
    requiredAcademicPerf_result = add_source_into_cell(requiredAcademicPerf_result, sources[0])

    # 6. Get the URL links
    class UrlLinks(BaseModel):
        application_portal_a: str           = Field(description='This describes the url link to the application portal for applying this program')
        website: str                        = Field(description='This describes the url link to the website of this program')
        fpso: Optional[str]                           = Field(description='This describes the url link to the Fachprüfungs- und Studienordnung (FPSO) of this program')

    # no need for the source
    query_urls = formatted_prefix_prompt + "Can you provide me the application protal, website, and FPSO links for apply this program?"
    urlLinks_result, _ = retreival_qa_chain_wrapper(university_name, program_name, UrlLinks, retriever,  chat, query_urls, verbose)

    
    # Merge the results
    merge_result = {**programBaseinfo_result, **applicationPeriod_result, **requiredDocument_result, **requiredLanguage_result, **requiredAcademicPerf_result, **urlLinks_result}


    df_result = pd.DataFrame(merge_result, index=[0])

    return df_result


In [76]:
df_mannheim = process_program_information(university_name="Mannehim", program_name="Master in Management", chat_model=chat, retriever=retriever, verbose=True)

Total Tokens: 885
Prompt Tokens: 827
Completion Tokens: 58
Successful Requests: 1
Total Cost (USD): $0.0013565
{
  "university_name": "Mannheim",
  "program_name": "Master in Management",
  "degree": "Master",
  "country": "Germany",
  "tuition_fee": "Please visit the website of the Business School for information on tuition fees"
}


Sources:
https://www.uni-mannheim.de/en/academics/programs/mannheim-master-in-management/
https://www.uni-mannheim.de/en/academics/programs/mannheim-master-in-management/
https://www.uni-mannheim.de/en/academics/programs/mannheim-master-in-management/
Total Tokens: 1160
Prompt Tokens: 1101
Completion Tokens: 59
Successful Requests: 1
Total Cost (USD): $0.0017695
{
  "semester": "Both",
  "winter_semester_application_start": "01 February",
  "winter_semester_application_deadline": "31 May",
  "summer_semester_application_start": "01 September",
  "summer_semester_application_deadline": "30 November"
}


Sources:
https://www.uni-mannheim.de/en/academics/pro

In [70]:
with get_openai_callback() as cb:
    df_mannheim = process_program_information(university_name="Mannehim", 
                                              program_name="Master in Management", 
                                              chat_model=chat, retriever=retriever, verbose=True)
    print(f"Total Tokens: {cb.total_tokens}")
    print(f"Prompt Tokens: {cb.prompt_tokens}")
    print(f"Completion Tokens: {cb.completion_tokens}")
    print(f"Successful Requests: {cb.successful_requests}")
    print(f"Total Cost (USD): ${cb.total_cost}")

{
  "university_name": "Mannheim University",
  "program_name": "Master in Management",
  "degree": "Master's degree in Business Administration",
  "country": "Germany",
  "tuition_fee": "Please refer to the university website for the current tuition fee per semester"
}


Sources:
https://www.uni-mannheim.de/en/academics/programs/mannheim-master-in-management/
https://www.uni-mannheim.de/en/academics/programs/mannheim-master-in-management/
https://www.uni-mannheim.de/en/academics/programs/mannheim-master-in-management/
{
  "semester": "Both",
  "winter_semester_application_start": "01 February",
  "winter_semester_application_deadline": "31 May",
  "summer_semester_application_start": "01 September",
  "summer_semester_application_deadline": "30 November"
}


Sources:
https://www.uni-mannheim.de/en/academics/programs/mannheim-master-in-management/
https://www.cit.tum.de/en/cit/studies/degree-programs/master-informatics/
https://www.cit.tum.de/en/cit/studies/degree-programs/master-infor

In [56]:
df_mannheim

Unnamed: 0,university_name,program_name,degree,country,tuition_fee,semester,winter_semester_application_start,winter_semester_application_deadline,summer_semester_application_start,summer_semester_application_deadline,...,toefl,ielts,gre,gmat,testdaf,gpa,ects,application_portal_a,website,fpso
0,Universität Mannheim,Mannheim Master in Management,Master,Germany,Please refer to the university website for the...,Both,01 April,15 May,01 September,30 November,...,C1 level,C1 level,Not required,Not required,Not applicable,A betriebswirtschaftlicher Bachelor is require...,A betriebswirtschaftlicher Bachelor is require...,https://www.uni-mannheim.de/en/programs/master...,https://www.uni-mannheim.de/en/programs/master...,https://www.uni-mannheim.de/en/programs/master...


In [77]:
with get_openai_callback() as cb:
    df_mannheim = process_program_information(university_name="TUM", 
                                              program_name="Informatics", 
                                              chat_model=chat, retriever=retriever, verbose=True)
    print(f"Total Tokens: {cb.total_tokens}")
    print(f"Prompt Tokens: {cb.prompt_tokens}")
    print(f"Completion Tokens: {cb.completion_tokens}")
    print(f"Successful Requests: {cb.successful_requests}")
    print(f"Total Cost (USD): ${cb.total_cost}")

Total Tokens: 1074
Prompt Tokens: 1018
Completion Tokens: 56
Successful Requests: 1
Total Cost (USD): $0.001639
{
  "university_name": "Technical University of Munich",
  "program_name": "Informatics",
  "degree": "Master of Science",
  "country": "Germany",
  "tuition_fee": "Please contact the university for information on tuition fees"
}


Sources:
https://www.cit.tum.de/en/cit/studies/degree-programs/master-informatics/
https://www.cit.tum.de/en/cit/studies/degree-programs/master-informatics/
https://www.cit.tum.de/en/cit/studies/degree-programs/master-informatics/
Total Tokens: 1131
Prompt Tokens: 1072
Completion Tokens: 59
Successful Requests: 1
Total Cost (USD): $0.001726
{
  "semester": "Both",
  "winter_semester_application_start": "01 February",
  "winter_semester_application_deadline": "31 May",
  "summer_semester_application_start": "01 September",
  "summer_semester_application_deadline": "30 November"
}


Sources:
https://www.cit.tum.de/en/cit/studies/degree-programs/maste