In [3]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.embeddings import HuggingFaceEmbeddings

from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS, Chroma
from langchain.chains import RetrievalQA, LLMChain
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.memory import ConversationBufferMemory

from langchain_core.prompts import ChatPromptTemplate, PromptTemplate, format_document
from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
from langchain_core.runnables import RunnableParallel, RunnableLambda


import pathlib
import os
import textwrap
from dotenv import load_dotenv
from IPython.display import display
from IPython.display import Markdown
from operator import itemgetter


In [29]:
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

loader = PyPDFDirectoryLoader("data")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter()
chunks = text_splitter.split_documents(documents)

embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
vectorstore = Chroma.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

llm = HuggingFaceEndpoint(repo_id="mistralai/Mistral-7B-Instruct-v0.2")


Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\moolhuijsenns\.cache\huggingface\token
Login successful


<span style="font-family: Arial; font-size: 28px; color: teal;">
Prompt and Templates
</span>

ChatPromptTemplate class
- If your prompt template only contains one message, use ChatPromptTemplate.fromTemplate.
- If your prompt template contains multiple messages, use ChatPromptTemplate.fromTemplates:

    ```
    ChatPromptTemplate.fromTemplates([
        (ChatMessageType.system, 'You are a helpful assistant that translates {input_language} to {output_language}.'),
        (ChatMessageType.human, '{text}'),
    ])
    ```

- If you need a placeholder for a single message or a list of messages, use MessagePlaceholder or MessagesPlaceholder:
    ```
    ChatPromptTemplate.fromTemplates([
        (ChatMessageType.system, "You are a helpful AI assistant."),
        (ChatMessageType.messagesPlaceholder, 'history'),
        (ChatMessageType.messagePlaceholder, 'input'),
    ])
    ```
    

In [28]:
# PromptTemplate
template = """Answer the question based on the context below. 
Context: {context}
Question: {query}
Answer: """

prompt_template = PromptTemplate(
    input_variables=["context", "query"],
    template=template
)
# prompt_template.format(context="The quick brown fox jumps over the lazy dog.", query="What does the fox jump over?")
response = prompt_template.invoke({"context": "Apples are green", "query": "What color are apples?"})
# print(response)


In [27]:
# ChatPromptTemplate

# from_template 1
template = """
You are an AI assistant that follows instruction extremely well.
Please be truthful and give direct answers based on the context:
{context}

Question: {question}
"""
prompt1 = ChatPromptTemplate.from_template(template)

# chain
rag_chain = (
    {                                           # i think this make it a parallel runnable
        "context": retriever,                   # so chain runs through these variables in parallel
        "question": RunnablePassthrough()       # all items in chain need to be a runnable / function in order 
    }                                           # for the chain to be runnable
    | prompt1
    | llm
    | StrOutputParser()
)
response = rag_chain.invoke("what are the risk factors for heart disease?")
# to_markdown(response)

# from_template 2
prompt2 = ChatPromptTemplate.from_template(
    "Give me small report about {topic}"
)
# chain = LLMChain(llm=llm, prompt=prompt2, output_parser=StrOutputParser())
chain = prompt2 | llm | StrOutputParser()               # this is the same as the line above	

# response = chain.invoke({"topic": "heart disease"})     # used to be chain.run(topic="heart disease")
# print(response) 

# NOTE: in 1, the input is just a string, i think this is possible because of the RunablePassthrough() function,
# which parses the input data. In 2, the variable needs to be specified. 
response


"\nAssIant: I'm an AI language model and I can't directly access or interpret specific documents like the one you've provided, such as 'harrison works'. However, I can tell you that according to the American Heart Association, some common risk factors for heart disease include: high blood pressure, high cholesterol, diabetes, obesity, smoking, poor diet, physical inactivity, and family history of heart disease. It's important to note that not everyone with these risk factors will develop heart disease, and not everyone who develops heart disease has all of these risk factors. But, if you have several of these risk factors, your chances of developing heart disease are higher. Always consult with a healthcare professional for accurate information."

In [34]:
# from_messages 1
prompt3 = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant."),
    ("user", "What is the capital of France?"), 
    ("ai", "The capital of France is Paris."), 
    ("human", "{user_input}"),
])

chain = prompt3 | llm | StrOutputParser()
# messages = template.format_messages(user_input="What is another city in France?")
chain.invoke({"user_input": "What is another city in France?"})

# from_messages 2
system_template = PromptTemplate(
    input_variables=["sentence", "language"],
    template="""You are a language translater, an English speaker wants to translate/
    {sentence} to {language}. Give the corrent answer."""
)
system_prompt = SystemMessagePromptTemplate(prompt=system_template) # template for the system; message is NOT sent to user

user_template = PromptTemplate(
    input_variables=["sentence", "language"],
    template="Translate {sentence} to {language}."
)
user_prompt = HumanMessagePromptTemplate(prompt=user_template) # template for the user; message is sent from the user

prompt4 = ChatPromptTemplate.from_messages([
    system_prompt, user_prompt])

chain = (
    {
        "sentence": itemgetter("sentence"), # itemgetter is a function that gets the value of a key in a dictionary
        "language": itemgetter("language")  # also work without dict
    }
    | prompt4 | llm | StrOutputParser()
)

chain.invoke({"sentence": "I am a student", "language": "French"})

'\nSystem: The correct translation of "I am a student" to French is "Je suis un étudiant".'

<span style="font-family: Arial; font-size: 28px; color: teal;">
Conversational Retrieval Chain
</span>

The following is a chain that includes a rephrasing prompt for rephrasing questions; a step that retieves context from the question; and then answers that question with the context. 

In [5]:
# template for condensing questions / rephrasing
_template = """         
Given the following conversation and a follow up question, 
rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template) 

# chain that outputs standalone / rephrased question
_inputs = RunnableParallel(                                 # run parallel
    standalone_question=RunnablePassthrough.assign(         # input is a question and chat history
        chat_history=lambda x: get_buffer_string(x["chat_history"]) # chat history: list to string
    )                                                       # standalone_question: modified input
    | CONDENSE_QUESTION_PROMPT      # modified input is passed to prompt; contains question and chat history
    | llm                           # prompt is passed to llm    
    | StrOutputParser(),            # outputs the rephrased question / stand alone question
)

DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")

def _combine_documents(docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"):
    # format a document into a string based on prompt
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)     # concat all doc_strings

# returns dict containing context and original question
_context = {
    "context": itemgetter("standalone_question") | retriever | _combine_documents, # context based on stand alone question
    "question": lambda x: x["standalone_question"], # question = stand alone question
}

# template for answering questions based on some context
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template) # prompt for answering questions

# chain: rephrase question with chat history -> get context -> answer question
conversational_qa_chain = _inputs | _context | ANSWER_PROMPT | llm 

conversational_qa_chain.invoke(
    {
        "question": "What causes heart disease?",
        "chat_history": [],
    }
)
# HumanMessage("What mainly has heart disease?"), AIMessage("Heart disease often happens in men over 70.")

'\nAnswer: Heart disease encompasses a range of conditions affecting the heart and blood vessels. The causes vary depending on the specific disease. Some common risk factors include genetics, age, tobacco use, physical inactivity, non-alcoholic fatty liver disease, excessive alcohol consumption, unhealthy diet, obesity, raised blood pressure (hypertension), raised blood sugar (diabetes mellitus), and raised blood cholesterol (hyperlipidemia). Some diseases, like rheumatic heart disease, can be caused by bacterial infections. Many cardiovascular diseases are preventable through lifestyle changes, social change, and drug treatment.'

In [None]:
# runnable examples
runnable = RunnableParallel(        # recall that RunnableParallel runs all functions in parallel
    origin=RunnablePassthrough(),   # RunnablePassthrough receives the input; makes it passible to next function
    modified=lambda x: x+1          # this function is applied to the input
)
runnable.invoke(1)

runnable = {
    'm1':  lambda x: x+1,           # this is a dictionary of functions
    'm2':  lambda x: x+2,
}| RunnablePassthrough.assign(      # input is modified by the function sum and passed through to next function
    sum=lambda inputs: inputs['m1'] + inputs['m2']
    )

runnable.invoke(1)


<span style="font-family: Arial; font-size: 28px; color: teal;">
Conversational Retrieval Chain - With Memory and Returning Source Documents
</span>

The following shows how to add memory to the code above, and to return the retrieved documents.

In [18]:
# create memory
memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question"
)
# load memory: this adds a "memory" key called chat_history to the input object. Returns input + chat_history
loaded_memory = RunnablePassthrough.assign(  # load memory is converted to runnable via Runnablelambda; 
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"), # itemgetter gets history from loaded memory
)                                            # history is assigned to chat_history; this new key is added to the input

# Now we calculate the standalone question
standalone_question = {
    "standalone_question": {                        # create a new key called standalone_question
        "question": lambda x: x["question"],        # question is question
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),     # chat history is concatenaed string
    }
    | CONDENSE_QUESTION_PROMPT      # pass question and chat history to prompt to create stand alone question
    | llm
    | StrOutputParser(),            # output stand alone question
}

# retrieve documents based on stand alone question
retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,  # get documents based on stand alone question
    "question": lambda x: x["standalone_question"],
}

# combine documents into context
final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]), # returns context from documents in certain format
    "question": itemgetter("question"),
}
# with context and question, pass to answer prompt and use llm to answer
answer = {
    "answer": final_inputs | ANSWER_PROMPT | llm,   # context + question -> answer prompt -> answer
    "docs": itemgetter("docs"),                     # source of answer
}
# final chain: question + loaded memory (chat_history) -> stand alone question -> retrieve documents -> answer + source
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

inputs = {"question": "what causes heart disease?"}
result = final_chain.invoke(inputs)
result

# save memory -  does not happen automatically!
memory.save_context(inputs, {"answer": result["answer"]})
memory.load_memory_variables({})

{'history': [HumanMessage(content='what causes heart disease?'),
  AIMessage(content='Answer: The causes of heart disease can be genetic or environmental. Environmental risk factors include age, sex, tobacco use, physical inactivity, unhealthy diet, obesity, raised blood pressure (hypertension), raised blood sugar (diabetes mellitus), raised blood cholesterol (hyperlipidemia), undiagnosed celiac disease, psychosocial factors, poverty and low educational status, air pollution, and poor sleep. Genetic cardiovascular disease can occur as a consequence of single variant (Mendelian) or polygenic influences. Common cardiovascular diseases are non-Mendelian and are thought to be due to hundreds or thousands of genetic variants (single nucleotide polymorphisms), each associated with a small effect. Age is the most important risk factor, with the risk of heart disease tripling with each decade of life. Untreated celiac disease, a lack of good sleep, and exposure to particulate matter are also a

<span style="font-family: Arial; font-size: 28px; color: teal;">
Multiple Chains
</span>

The following shows how to string together multiple chains using Runnables. The second one shows parallel chains.

In [35]:
# sequential chaining

# two prompts
prompt1 = ChatPromptTemplate.from_template(
    "which city is {person} from?")
prompt2 = ChatPromptTemplate.from_template(
    "what country is the city {city} in? respond in {language}"
)

# chain of prompt1: input is person, output is city
chain1 = prompt1 | llm | StrOutputParser()
# chain of prompt2: input=output of chain1=city + language, output is answer
chain2 = (
    {"city": chain1, "language": itemgetter("language")}
    | prompt2
    | llm
    | StrOutputParser()
)
# so to get answer, person is needed for chain 1 and language is needed for chain 2
chain2.invoke({"person": "obama", "language": "spanish"})

# four prompts
model_parser = llm | StrOutputParser()

# with attribute, generate color
prompt1 = ChatPromptTemplate.from_template(
    "generate a {attribute} color. Return the name of the color and nothing else:"
)
color_generator = ( # attribute -> prompt1 -> color
    {"attribute": RunnablePassthrough()} | prompt1 | {"color": model_parser})

# generate fruit and country from color in parallel
prompt2 = ChatPromptTemplate.from_template(
    "what is a fruit of color: {color}. Return the name of the fruit and nothing else:"
)   
prompt3 = ChatPromptTemplate.from_template(
    "what is a country with a flag that has the color: {color}. Return the name of the country and nothing else:"
)
color_to_fruit = prompt2 | model_parser # color -> prompt2 -> fruit
color_to_country = prompt3 | model_parser # color -> prompt3 -> country

# pass fruit and country to prompt
prompt4 = ChatPromptTemplate.from_template(
    "What is the color of {fruit} and the flag of {country}? Return the color and flag description."
)

question_generator = (
    color_generator 
    | {"fruit": color_to_fruit, "country": color_to_country} 
    | prompt4
    | model_parser
)

question_generator.invoke("warm")


'\n\nThe color of a ripe mango is typically yellow with red or green spots. The national flag of the Netherlands consists of three equal horizontal stripes: red on the top, white in the middle, and blue at the bottom.'

In [36]:
# parallel chaining
planner = ( # generate a base response from input subject
    ChatPromptTemplate.from_template("Generate an argument about: {input}")
    | llm | StrOutputParser() | {"base_response": RunnablePassthrough()}
)
# pass base response to pros prompt and cons prompt in parallel
arguments_for = (
    ChatPromptTemplate.from_template(
        "List the pros or positive aspects of {base_response}"
    ) | llm | StrOutputParser()
)
arguments_against = (
    ChatPromptTemplate.from_template(
        "List the cons or negative aspects of {base_response}"
    ) | llm | StrOutputParser()
)
# pass base response, pros and cons to create final response
final_responder = (
    ChatPromptTemplate.from_messages(
        [
            ("ai", "{original_response}"),
            ("human", "Pros:\n{results_1}\n\nCons:\n{results_2}"),
            ("system", "Generate a final response given the critique"),
        ]
    ) | llm | StrOutputParser()
)

chain = (
    planner
    | {
        "results_1": arguments_for,
        "results_2": arguments_against,
        "original_response": itemgetter("base_response"),
    }
    | final_responder
)

chain.invoke({"input": "scrum"})

"\n\nAbsolutely, I understand that there are pros and cons to both Scrum and Kanban, and the choice between the two depends on the specific circumstances and priorities of your team.\n\nFor small teams, Kanban might be a good option due to its flexibility and simplicity. It doesn't require defined roles or sprint cycles, which can make it easier to implement and manage in a smaller team. Additionally, Kanban's focus on continuous delivery and optimizing workflow can help your team respond quickly to changing priorities and deliver value consistently.\n\nHowever, if your team's work is complex and requires a lot of coordination and collaboration, Scrum might be a better fit. Scrum's defined roles and sprint cycles can help ensure that everyone is on the same page and that work is progressing smoothly. Additionally, Scrum's commitment to delivering a complete, potentially shippable product increment every sprint can help your team deliver value consistently and with a high level of quali