### **Load Environment variables from .env file**

In [1]:
from langchain.llms import AzureOpenAI
import openai
from dotenv import load_dotenv
import os
from IPython.display import display, HTML, JSON, Markdown
import tiktoken

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")

OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_ADA_EMBEDDING_MODEL_NAME = os.getenv("OPENAI_ADA_EMBEDDING_MODEL_NAME")

OPENAI_DAVINCI_DEPLOYMENT_NAME = os.getenv("OPENAI_DAVINCI_DEPLOYMENT_NAME")
OPENAI_DAVINCI_MODEL_NAME = os.getenv("OPENAI_DAVINCI_MODEL_NAME")

# Configure OpenAI API
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY

### **Add personality to the model and start asking questions**
We call directly the Azure OpenAI API with ***ChatCompletion*** API

In [2]:
# prepare prompt
messages = [{"role": "system", "content": "You are a HELPFUL assistant answering users trivia questions. Answer in a clear and concise manner."},
            {"role": "user", "content": "Good morning, how are you today?"}]


answer = openai.ChatCompletion.create(engine=OPENAI_DEPLOYMENT_NAME,

                                      messages=messages,)
display(HTML("ChatCompletion (gpt-35-turbo) :" +
        answer.choices[0].message.content))

In [3]:
# prepare prompt with another question:
messages = [{"role": "system", "content": "You are q HELPFUL assistant answering users trivia questions. Answer in clear and concise manner."},
            {"role": "user", "content": "What's string theory?"}]


answer = openai.ChatCompletion.create(engine=OPENAI_DEPLOYMENT_NAME,
                                      messages=messages,)

# print("ChatCompletion (gpt-35-turbo) :" + answer.choices[0].message.content)

display(HTML(answer.choices[0].message.content))

In [4]:
# prepare prompt with another question:
messages = [{"role": "system", "content": "You are a HELPFUL assistant answering users trivia questions. Answer as for a FIVE YEARS old child."},
            {"role": "user", "content": "what's string theory?"}]


answer = openai.ChatCompletion.create(engine=OPENAI_DEPLOYMENT_NAME,
                                      messages=messages,)

# print("ChatCompletion (gpt-35-turbo) :" + answer.choices[0].message.content)
display(HTML(answer.choices[0].message.content))

### **LangChain**

LangChain is a framework built around Large Language Models (LLMs).

The core idea of the library is that we can “chain” together different components to create more advanced use cases around LLMs.

LangChain components: Models, Prompts, Indexes(Document Loaders and Splitters, Vector DBs), Chains and Agents

In [5]:
def init_llm(model=OPENAI_MODEL_NAME,
             deployment_name=OPENAI_DEPLOYMENT_NAME,
             openai_api_version=OPENAI_DEPLOYMENT_VERSION,
             temperature=0,
             max_tokens=400,
             stop="<|im_end|>",
             ):

    llm = AzureOpenAI(deployment_name=deployment_name,
                      model=model,
                      openai_api_version=openai_api_version,
                      temperature=temperature,
                      max_tokens=max_tokens,
                      model_kwargs={"stop": ["<|im_end|>"]})
    return llm

# init LLM Azure OpenAI model
llm = init_llm()
llm

AzureOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.completion.Completion'>, model_name='gpt-35-turbo', temperature=0.0, max_tokens=400, top_p=1, frequency_penalty=0, presence_penalty=0, n=1, best_of=1, model_kwargs={'stop': ['<|im_end|>']}, openai_api_key='6ce2f2fae6e54aa0a8d670505917e3ee', openai_api_base='', openai_organization='', openai_proxy='', batch_size=20, request_timeout=None, logit_bias={}, max_retries=6, streaming=False, allowed_special=set(), disallowed_special='all', tiktoken_model_name=None, deployment_name='gpt-35-turbo', openai_api_type='azure', openai_api_version='2023-03-15-preview')

In [6]:
# model "gpt-35-turbo"
# You can see that gpt-35-turbo has been trained in QnA conversational style.
answer = llm("Good morning, how are you? ")
display(HTML("gpt-35-turbo: " + answer))

In [7]:
llm = init_llm()
answer = llm("Create a Python function that takes a string argument and reverses it.")
display(Markdown("gpt-35-turbo: " + answer))

gpt-35-turbo:  Use a for loop, not the built-in reverse() function.

def reverse_string(string):
    reversed_string = ""
    for i in range(len(string)-1, -1, -1):
        reversed_string += string[i]
    return reversed_string

print(reverse_string("hello")) # "olleh"
print(reverse_string("racecar")) # "racecar"
print(reverse_string("python")) # "nohtyp"<|im_sep|>

#### Prompts with LangChain

In [8]:
from langchain import PromptTemplate

# create template for prompt

template = """You are a {profession} answering users questions. 
            More specifically, you are an expert in {expertise}. Answer in a clear and concise manner. Assume that the user is not a subject expert.
            If a question is not clear or not related to {expertise} say: it's not clear or the question is not related to {expertise}.
            
            USER: {question}
            ASSISTANT:
            
            <|im_end|>
            """

llm = init_llm()
prompt_template = PromptTemplate(template=template, input_variables=[
                                 "profession", "expertise", "question"])
answer = llm(prompt_template.format(profession="Financial Trading Consultant",  expertise="Risk Management",
                                    question="How do you assess the risk tolerance of a new client?"))
display(Markdown("gpt-35-turbo: " + answer))

gpt-35-turbo:  To assess the risk tolerance of a new client, you can use a risk tolerance questionnaire. This questionnaire will ask the client a series of questions about their investment goals, investment experience, and risk preferences. Based on the answers provided, you can determine the client's risk tolerance and recommend an investment strategy that aligns with their risk tolerance.

In [9]:
# asking not related question
prompt_template = PromptTemplate(template=template, input_variables=[
                                 "profession", "expertise", "question"])
answer = llm(prompt_template.format(profession="Financial Trading Consultant",  expertise="Risk Management",
                                    question="What's the fastest car in the world? Answer in one sentence. "))
display(Markdown("gpt-35-turbo: " + answer))

gpt-35-turbo:  It's not clear or the question is not related to Risk Management.

##### ChatPromptTemplate is a template that is specifically designed for conversational use cases.

It infers the variables from the template, so no need to pass them in as arguments.

In [10]:
template = """You are a {profession} answering users questions. 
            More specifically, you are an expert in {expertise}. Answer in a clear and concise manner. Assume that the user is not a subject expert.
            If a question is not clear or not related to {expertise} say: it's not clear or the question is not related to {expertise}.
            
            USER: {question}
            ASSISTANT:
            
            <|im_end|>
            """

In [11]:
from langchain.prompts import ChatPromptTemplate

prompt_template = ChatPromptTemplate.from_template(template)

In [12]:
# ChatPromptTemplate figures out templates variables automatically
answer = llm(prompt_template.format(profession="Financial Trading Consultant",  expertise="Risk Management",
                                    question="What's the fastest car in the world? Answer in one sentence."))
display(Markdown("gpt-35-turbo: " + answer))

gpt-35-turbo:  It's not clear or the question is not related to Risk Management.

#### **OPTIONAL.** LLM output parsers with LangChain
You can define the output schema and LangChain will parse the output for you.

The main idea is to define parsing instructions as a code rather than doing it in textual form.


In [13]:
from langchain.prompts import ChatPromptTemplate

In [14]:

customer_call = """

**Customer**: Hello, my name is John, and I'm a customer of Imaginal Bank.
**Clerk**: Hello, John! My name is Sara, and I'm a customer service representative at Imaginal Bank. How can I assist you today?
**Customer**: Hi, Sara. I'm interested in your bank's investment programs. 
              Can you tell me more about them, especially in terms of risk management?

**Clerk**: Absolutely, John. We have a few key programs I can highlight.

First, there's our 'Balanced Growth Fund'. It's a diversified mutual fund that invests in a mix of equities and bonds to provide both growth and income, reducing risk through diversification. 

We also have the 'Index Tracker ETF', which is designed to replicate the performance of a specific market index. By spreading investments across the entire index, it inherently reduces the risk associated with individual stocks.

Additionally, for those with a lower risk tolerance, we have the 'Secure Income Bond Fund', which focuses on government and high-quality corporate bonds. 

Our financial advisors are always available to guide you in choosing the right program based on your financial goals and risk tolerance.

**Customer**: I see. Could you elaborate on how the Balanced Growth Fund manages risk?

**Clerk**: Sure. The Balanced Growth Fund mitigates risk by diversifying investments across a wide range of assets. If one investment performs poorly, it's likely to be offset by other investments that are performing well. Furthermore, our portfolio managers actively manage the fund, adjusting holdings based on changing market conditions to manage risk and enhance returns.

**Customer**: Does the bank provide any tools to monitor my investments?

**Clerk**: Yes, John. We offer an online platform called 'Imaginal Investor Dashboard'. It provides real-time tracking of your investments, balance updates, and market trends. You can also set up alerts to be notified about significant changes in your portfolio.

**Customer**: That sounds quite comprehensive. How can I get started?

**Clerk**: You can schedule an appointment with one of our financial advisors. They'll walk you through your options, help you understand your risk tolerance, and guide you in choosing the right investment program. Would you like me to arrange that for you?

**Customer**: Yes, please. That would be helpful.

**Clerk**: Fantastic, John! Let's get that set up for you..."""


call_center_prompt_template = """

For the following text, extract the following information:
agent politeness: How polite is the agent? use the following values to descibe agent politenes: very polite, polite, neutral, impolite, very impolite.
agent knowledge: How knowledgeable is the agent? use the following values to descibe agent knowledge: very knowledgeable, knowledgeable, neutral, not knowledgeable, very not knowledgeable.
customer issue resolution: How well did the agent resolve the issue? use the following values to descibe issue resolution: very well, well, neutral, not well, very not well.
customer satisfaction: How satisfied is the customer? use the following values to descibe customer satisfaction: very satisfied, satisfied, neutral, dissatisfied, very dissatisfied.

Format the output as a json object with the following keys: agent_politeness, agent_knowledge, customer_issue_resolution, customer_satisfaction.

text: {text}
"""

In [15]:
prompt_template = ChatPromptTemplate.from_template(call_center_prompt_template)
# print (prompt_template)
llm = init_llm()
# Note that the type of the response is a string, it looks like a json object, but it's string
response = llm(prompt_template.format(text=customer_call))
print(response)

 
Output:

{
    "agent_politeness": "very polite",
    "agent_knowledge": "knowledgeable",
    "customer_issue_resolution": "well",
    "customer_satisfaction": "very satisfied"
}


In [16]:
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

In [17]:

agent_politeness_schema = ResponseSchema(
    name="agent_politeness", description="How polite is the agent?")
aggent_knowledge_schema = ResponseSchema(
    name="agent_knowledge", description="How knowledgeable is the agent?")
customer_issue_resolution_schema = ResponseSchema(
    name="customer_issue_resolution", description="How well did the agent resolve the issue?")
customer_satisfaction_schema = ResponseSchema(
    name="customer_satisfaction", description="How satisfied is the customer?")
customer_service_schemas = [agent_politeness_schema, aggent_knowledge_schema,
                            customer_issue_resolution_schema, customer_satisfaction_schema]

output_parser = StructuredOutputParser.from_response_schemas(
    customer_service_schemas)
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"agent_politeness": string  // How polite is the agent?
	"agent_knowledge": string  // How knowledgeable is the agent?
	"customer_issue_resolution": string  // How well did the agent resolve the issue?
	"customer_satisfaction": string  // How satisfied is the customer?
}
```


In [18]:
call_center_prompt_template_with_output_parser = """

For the following text, extract the following information:
agent politeness: How polite is the agent? use the following values to descibe agent politenes: very polite, polite, neutral, impolite, very impolite.
agent knowledge: How knowledgeable is the agent? use the following values to descibe agent knowledge: very knowledgeable, knowledgeable, neutral, not knowledgeable, very not knowledgeable.
customer issue resolution: How well did the agent resolve the issue? use the following values to descibe issue resolution: very well, well, neutral, not well, very not well.
customer satisfaction: How satisfied is the customer? use the following values to descibe customer satisfaction: very satisfied, satisfied, neutral, dissatisfied, very dissatisfied.

Format the output as a json object with the following keys: agent_politeness, agent_knowledge, customer_issue_resolution, customer_satisfaction.

text: {text}

{format_instructions}
"""

In [19]:
prompt = ChatPromptTemplate.from_template(
    template=call_center_prompt_template_with_output_parser)
input = prompt.format(text=customer_call,
                      format_instructions=format_instructions)

In [20]:
print(input)

Human: 

For the following text, extract the following information:
agent politeness: How polite is the agent? use the following values to descibe agent politenes: very polite, polite, neutral, impolite, very impolite.
agent knowledge: How knowledgeable is the agent? use the following values to descibe agent knowledge: very knowledgeable, knowledgeable, neutral, not knowledgeable, very not knowledgeable.
customer issue resolution: How well did the agent resolve the issue? use the following values to descibe issue resolution: very well, well, neutral, not well, very not well.
customer satisfaction: How satisfied is the customer? use the following values to descibe customer satisfaction: very satisfied, satisfied, neutral, dissatisfied, very dissatisfied.

Format the output as a json object with the following keys: agent_politeness, agent_knowledge, customer_issue_resolution, customer_satisfaction.

text: 

**Customer**: Hello, my name is John, and I'm a customer of Imaginal Bank.
**Cler

In [21]:
response = llm(input)

In [22]:
dict = output_parser.parse(response)
dict.get("agent_politeness")

'polite'

#### Prompts management best practices
Reuse prompts as much as possible. This will help you to get more consistent results.

Treat your prompts as a code, keep it in a version control system. This will help you to track changes and to revert them if needed.

Use LangChain specific to use case prompt templates, e.g. ChatPromptTemplate for conversational flows.


###  **One-shot, Few-shot learning**

This technique could improve model performance by a lot. 
We can use the model to learn from a few examples and then use it to generate text. This is called few-shot learning. We can also use the model to learn from a single example and then use it to generate text. This is called one-shot learning.

In [23]:
template_few_shot = """You are a {profession} answering users questions. 
            More specifically, you are an expert in {expertise}. Answer in a clear and concise manner. Assume that a user is not a subject expert.
            If a question is not clear or not related to {expertise} say: it's not clear or the question is not related to {expertise}.
           
            USER: How do you assess the risk tolerance of a new client?
            ASSISTANT: I begin by having a comprehensive discussion with the client about their financial goals, investments horizon, and comfort level with different levels of risk.
            
            USER: Can you provide an example of a specific risk management strategy you'd recommended to a client in a volatile market situation?
            ASSISTANT: During the market volatility caused by the pandemic, I'd recommended that a client diversify their portfolio further to reduce risk exposure.
            
            USER: How do you handle the situation when a client wants to pursue a risky investment that goes beyond their risk tolerance?
            ASSISTANT: I would clearly communicate the potential risks associated with the investment and how it might not align with their established risk tolerance. 
            
            USER: {question}
            ASSISTANT:
            
            <|im_end|>
            """

In [24]:
from langchain.chains import LLMChain

In [25]:
prompt_few_shot = PromptTemplate(template=template_few_shot, input_variables=[
                                 "profession", "expertise", "question"])
chain = LLMChain(llm=llm, prompt=prompt_few_shot)

res = chain.run(profession="Financial Trading Consultant",  expertise="Risk Management",
                question="How do you use technology or specific financial tools to assist in risk management for your clients?")
display(Markdown(res))

 There are many tools available to assist in risk management, including portfolio management software, risk assessment tools, and financial modeling software. I use these tools to help clients understand the potential risks associated with different investments and to develop strategies to mitigate those risks. Additionally, I use technology to monitor market trends and to keep clients informed about changes that may impact their investments.

The Large Language Models (LLMs) are stateless. This means that they don’t retain any information about the conversation history.

In [26]:
# Since we don't save a history of the conversation, the model will fail to answer questions that require context.
res = chain.run(profession="Financial Trading Consultant",  expertise="Risk Management",
                question="Which software do you use?")
display(Markdown(res))

 It's not clear or the question is not related to Risk Management.

### **Retain conversation history** 

##### The Large Language Models (LLMs) are stateless. This means that they don’t retain any information about the conversation history.
Each transaction is independent of the previous one. Chatbots keep in memory the conversation history and use it to generate the next response. This is why they are able to generate more coherent responses.

Previously we saw that a model fails to answer the question that requires context. We can solve this problem by retaining the conversation history. We can do this by using the LangChain ConversationBufferMemory.

In [27]:
template = """You are a {profession} answering users questions. 
            More specifically, you are an expert in {expertise}. Answer in a clear and concise manner. Assume that the user is not a subject expert.
            If a question is not clear or not related to {expertise} say: it's not clear or the question is not related to {expertise}.
            
            USER: {question}
            ASSISTANT:

            <|im_end|>
            """

In [28]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain.prompts import ChatPromptTemplate

llm = init_llm()

# ConversationBufferMemory is a memory that stores the conversation history
memory = ConversationBufferMemory()
# try to change the verbose to True, to see more details
conversation = ConversationChain(llm=llm, memory=memory, verbose=False)

In [29]:
prompt = ChatPromptTemplate.from_template(template=template)

response = conversation.run(input=prompt.format(profession="Financial Trading Consultant",
                                                expertise="Risk Management",
                                                question="How do you use technology or specific financial tools to assist in risk management for your clients?"))

display(Markdown(response))

 Hello! I'm happy to help. There are many ways that technology and financial tools can be used to assist in risk management. One of the most common ways is through the use of risk management software. This software can help identify potential risks and provide tools to mitigate those risks. Additionally, there are many financial tools that can be used to help manage risk, such as options, futures, and other derivatives. These tools can be used to hedge against potential losses and protect against market volatility. Finally, there are many data analysis tools that can be used to help identify trends and patterns in financial data, which can be used to make more informed decisions about risk management. Does that help?

In [30]:
# Now with the conversation history, the model can answer questions that require context.
response = conversation.run(input=prompt.format(profession="Financial Trading Consultant",
                                                expertise="Risk Management",
                            question="Which software do you use? List software products in a separate line."))

display(Markdown(response))

  There are many different risk management software products available, and the specific software that we use depends on the needs of our clients. Some of the most popular risk management software products include:

- RiskMetrics
- MSCI RiskManager
- Algorithmics RiskWatch
- SAS Risk Management
- Bloomberg Risk Management

Each of these products has its own strengths and weaknesses, and the best choice depends on the specific needs of the client. Does that answer your question?

In [31]:
# you can print the conversation history buffer:
history = conversation.memory.chat_memory.messages
for msg in history:
    print(f"{msg.type}: {msg.content}")

human: Human: You are a Financial Trading Consultant answering users questions. 
            More specifically, you are an expert in Risk Management. Answer in a clear and concise manner. Assume that the user is not a subject expert.
            If a question is not clear or not related to Risk Management say: it's not clear or the question is not related to Risk Management.
            
            USER: How do you use technology or specific financial tools to assist in risk management for your clients?
            ASSISTANT:

            <|im_end|>
            
ai:  Hello! I'm happy to help. There are many ways that technology and financial tools can be used to assist in risk management. One of the most common ways is through the use of risk management software. This software can help identify potential risks and provide tools to mitigate those risks. Additionally, there are many financial tools that can be used to help manage risk, such as options, futures, and other derivatives. Th

In [32]:
# pay attention that LangChain added to the prompt: ```Current conversation:{history}```
print(conversation.prompt)
print()
print(memory.load_memory_variables({}))

input_variables=['history', 'input'] output_parser=None partial_variables={} template='The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n\nCurrent conversation:\n{history}\nHuman: {input}\nAI:' template_format='f-string' validate_template=True

{'history': "Human: Human: You are a Financial Trading Consultant answering users questions. \n            More specifically, you are an expert in Risk Management. Answer in a clear and concise manner. Assume that the user is not a subject expert.\n            If a question is not clear or not related to Risk Management say: it's not clear or the question is not related to Risk Management.\n            \n            USER: How do you use technology or specific financial tools to assist in risk management for your clients?\n            ASSISTANT:\n\n            <|im_en

In [33]:
ans = conversation.run(input=prompt.format(profession="Financial Trading Consultant",
                                           expertise="Risk Management",
                                           question="List all questions I've asked you about Risk Management?"))
display(Markdown(ans))

  Sure, here are all the questions you've asked me about risk management:

- How do you use technology or specific financial tools to assist in risk management for your clients?
- Which software do you use? List software products in a separate line.

Is there anything else I can help you with?

#### More conversation memory types.

Keeping full conversation history could be expensive and we can hit the Azure API limits. LangChain provides different types of conversation memory that can be used to keep the conversation history and mitigate the limits issues.

**1. ConversationBufferWindowMemory** - keeps the last N messages in the conversation history.

**2. ConversationTokenBufferMemory** - keeps the last N tokens in the conversation history.

**3. ConversationSummaryBufferMemory** - keeps summary of the conversation over time.

Additional Memory Type includes:

**4. Vector data memory** - stires the text in a vector DB and retrieves most semantically similar text.

**5. Entity memories** - use LLM which rememebers details about specific entities. 

Note: You can use multiple memories at the same time. 

Finally you can store the conevrsation history in a conventional database like SQL or NoSQL. 


In [34]:
from langchain.memory import ConversationBufferWindowMemory

memory = ConversationBufferWindowMemory(k=1)
memory.save_context({"input": "hi"}, {"output": "hello"})
memory.save_context({"input": "What's your name"},
                    {"output": "My name is John"})

memory.load_memory_variables({})

{'history': "Human: What's your name\nAI: My name is John"}