In [1]:
import os
import openai

key = os.environ['OPENAI_API_KEY']
assert key, "Please set your OPENAI_API_KEY environment variable."
openai.api_key = key

In [2]:
def get_completion(prompt: str, model: str = "gpt-3.5-turbo") -> str:
    """Use OpenAI API to generate text completion.

    Parameters
    ----------
    prompt
        The prompt to generate text completion for.
    model
        The LLM to use for generating text completion.

    Returns
    -------
    str
        The text completion generated by the LLM.
    """
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, 
    )
    return response.choices[0].message["content"]

In [3]:
get_completion("What is 1+1?")


'1+1 equals 2.'

In [4]:
prompt_template = """
Translate the text that is delimited by triple backticks into a style that is {output_style}.
text: ```{input_text}```
"""

print(prompt_template)


Translate the text that is delimited by triple backticks into a style that is {output_style}.
text: ```{input_text}```



In [5]:
# fill prompt_template with values
output_style = "American English with a calm and polite tone"
input_text = "Tcheu, c'est pas vrai, ça ! Le machin pour la clim que vous m'avez vendu ne fonctionne pas ! Avec qui je dois parler pour me faire rembourser ???"

prompt = prompt_template.format(output_style=output_style, input_text=input_text)

print(prompt)


Translate the text that is delimited by triple backticks into a style that is American English with a calm and polite tone.
text: ```Tcheu, c'est pas vrai, ça ! Le machin pour la clim que vous m'avez vendu ne fonctionne pas ! Avec qui je dois parler pour me faire rembourser ???```



In [6]:
get_completion(prompt)

"Oh dear, this is quite frustrating! The air conditioning unit that you sold me doesn't seem to be working. Could you please let me know who I should speak to in order to request a refund? Thank you very much for your assistance."

# 1. Prompts

In [7]:
from langchain.prompts import ChatPromptTemplate


In [8]:
chat_prompt_template = ChatPromptTemplate.from_template(template=prompt_template)

In [9]:
chat_prompt_template.messages[0]

HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input_text', 'output_style'], output_parser=None, partial_variables={}, template='\nTranslate the text that is delimited by triple backticks into a style that is {output_style}.\ntext: ```{input_text}```\n', template_format='f-string', validate_template=True), additional_kwargs={})

In [10]:
print(chat_prompt_template.messages[0])
print(vars(chat_prompt_template.messages[0]))

prompt=PromptTemplate(input_variables=['input_text', 'output_style'], output_parser=None, partial_variables={}, template='\nTranslate the text that is delimited by triple backticks into a style that is {output_style}.\ntext: ```{input_text}```\n', template_format='f-string', validate_template=True) additional_kwargs={}
{'prompt': PromptTemplate(input_variables=['input_text', 'output_style'], output_parser=None, partial_variables={}, template='\nTranslate the text that is delimited by triple backticks into a style that is {output_style}.\ntext: ```{input_text}```\n', template_format='f-string', validate_template=True), 'additional_kwargs': {}}


In [11]:
chat_prompt_template.messages[0].prompt.input_variables

['input_text', 'output_style']

In [12]:
customer_messages = chat_prompt_template.format_messages(
    input_text=input_text,
    output_style=output_style,
    )


In [13]:
customer_messages[0]

HumanMessage(content="\nTranslate the text that is delimited by triple backticks into a style that is American English with a calm and polite tone.\ntext: ```Tcheu, c'est pas vrai, ça ! Le machin pour la clim que vous m'avez vendu ne fonctionne pas ! Avec qui je dois parler pour me faire rembourser ???```\n", additional_kwargs={}, example=False)

In [14]:
print(customer_messages[0].content)


Translate the text that is delimited by triple backticks into a style that is American English with a calm and polite tone.
text: ```Tcheu, c'est pas vrai, ça ! Le machin pour la clim que vous m'avez vendu ne fonctionne pas ! Avec qui je dois parler pour me faire rembourser ???```



# 2. Model

In [15]:
from langchain.chat_models import ChatOpenAI

In [16]:
chat_model = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.0,
    cache=None,
)

In [17]:
chat_model

ChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.0, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None, tiktoken_model_name=None)

In [19]:
model_response = chat_model(customer_messages)
print(model_response.content)

Oh dear, this is quite frustrating! The air conditioning unit that you sold me isn't working at all. Could you please let me know who I should speak to in order to get a refund? Thank you very much for your assistance.


# 3. Parser

In [20]:
from langchain.output_parsers import ResponseSchema, StructuredOutputParser

In [21]:
paper_abstract = """
Title: Randomized Clinical Trial Investigating the Efficacy of Drug Y in the Management of Type 2 Diabetes Mellitus

Abstract:

In this double-blind, randomized, placebo-controlled trial, we aimed to evaluate the effectiveness and safety of Drug Y in lowering glycated hemoglobin (HbA1c) levels in patients with type 2 diabetes mellitus. The study enrolled participants aged 45-75 years, with four hundreds patients receiving Drug Y and two hundreds patients receiving a placebo over a 24-week period.

The primary endpoint was the change from baseline in HbA1c levels (measured in %) at 24 weeks. At the end of the trial, HbA1c levels had decreased by an average of 1.2% in the Drug Y group compared to a reduction of 0.2% in the placebo group (mean difference = -1.0%; 95% CI, -1.3 to -0.7; p<0.001).

Secondary endpoints included changes in fasting plasma glucose levels (FPG), measured in mg/dL. Patients in the Drug Y group exhibited a mean reduction of 30.6 mg/dL in FPG levels compared to a reduction of 5.4 mg/dL in the placebo group (mean difference = -25.2 mg/dL; 95% CI, -28.1 to -22.3 mg/dL; p<0.001).

Adverse events were reported in 14% of the participants taking Drug Y, and 10% in the placebo group, with the most common being mild hypoglycemia and gastrointestinal discomfort. Serious adverse events were similar in both groups.

In conclusion, Drug Y significantly lowered HbA1c and FPG levels over 24 weeks in patients with type 2 diabetes, demonstrating a superior glycemic control compared to placebo, with a manageable safety profile. Future studies should focus on the long-term effects of Drug Y in managing type 2 diabetes.
"""

In [22]:
population_size_schema = ResponseSchema(
    name="population_size",
    description="Total population size on which the study was performed, including both the treatment and control groups. \
        It should be expressed as an integer number, e.g. 100, not a string e.g. 'one hundred'. \
        If the population size is not found, set value to None.",
    type="int",
    )
fpg_reduction_schema = ResponseSchema(
    name="fpg_reduction",
    description="Reduction in fasting plasma glucose (FPG) levels in the treatment group compared to the control group. \
        It should be expressed as a float number followed by a measurement unit, e.g. '1.2 mg/L'. \
        If the FPG reduction is not found, set value to None.",
    type="str",
    )


response_schemas = [
    population_size_schema,
    fpg_reduction_schema,
    ]

In [23]:
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)


In [24]:
format_instructions = output_parser.get_format_instructions()


In [25]:
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"population_size": int  // Total population size on which the study was performed, including both the treatment and control groups.         It should be expressed as an integer number, e.g. 100, not a string e.g. 'one hundred'.         If the population size is not found, set value to None.
	"fpg_reduction": str  // Reduction in fasting plasma glucose (FPG) levels in the treatment group compared to the control group.         It should be expressed as a float number followed by a measurement unit, e.g. '1.2 mg/L'.         If the FPG reduction is not found, set value to None.
}
```


In [26]:
prompt_template = """\
For the following paper abstract (identified by ABSTRACT_BEGIN and ABSTRACT_END), extract the following output:
- population_size
- fpg_reduction

ABSTRACT_BEGIN
{paper_abstract}
ABSTRACT_END

{format_instructions}
"""

prompt = ChatPromptTemplate.from_template(template=prompt_template)

messages = prompt.format_messages(
    paper_abstract=paper_abstract,
    format_instructions=format_instructions,
    )

In [27]:
print(messages[0].content)

For the following paper abstract (identified by ABSTRACT_BEGIN and ABSTRACT_END), extract the following output:
- population_size
- fpg_reduction

ABSTRACT_BEGIN

Title: Randomized Clinical Trial Investigating the Efficacy of Drug Y in the Management of Type 2 Diabetes Mellitus

Abstract:

In this double-blind, randomized, placebo-controlled trial, we aimed to evaluate the effectiveness and safety of Drug Y in lowering glycated hemoglobin (HbA1c) levels in patients with type 2 diabetes mellitus. The study enrolled participants aged 45-75 years, with four hundreds patients receiving Drug Y and two hundreds patients receiving a placebo over a 24-week period.

The primary endpoint was the change from baseline in HbA1c levels (measured in %) at 24 weeks. At the end of the trial, HbA1c levels had decreased by an average of 1.2% in the Drug Y group compared to a reduction of 0.2% in the placebo group (mean difference = -1.0%; 95% CI, -1.3 to -0.7; p<0.001).

Secondary endpoints included chan

In [28]:
response = chat_model(messages)


In [29]:
print(response.content)


```json
{
	"population_size": 600,
	"fpg_reduction": "-25.2 mg/dL"
}
```


In [30]:
output_dict = output_parser.parse(response.content)
print(output_dict)


{'population_size': 600, 'fpg_reduction': '-25.2 mg/dL'}


In [31]:
# now a new parser, for the fpg reduction: separate numeric_value (float) from measurement_unit (str)
value_schema = ResponseSchema(
    name="value",
    description="Numerical value of measurement, e.g. for '-123.4 m/(kg*s^2)', the value is -123.4.\
        If the value is not found, set value to None.",
    type="float",
    )
unit_schema = ResponseSchema(
    name="unit",
    description="Measurement unit, e.g. for '-123.4 m/(kg*s^2)', the unit is 'm/(kg*s^2)'.\
        If the unit is not found, set value to None.",
    type="str",
    )

In [32]:
response_schemas = [
    value_schema,
    unit_schema,
    ]

fpg_reduction_parser = StructuredOutputParser.from_response_schemas(response_schemas)


In [33]:
fpg_reduction = output_dict["fpg_reduction"]
print(fpg_reduction)

-25.2 mg/dL


In [34]:
prompt_template = """\
    For the following fpg reduction, extract the following output:
    - value
    - unit

    fpg_reduction: {fpg_reduction}

    {format_instructions}
    """

format_instructions = fpg_reduction_parser.get_format_instructions()
print(format_instructions)

prompt = ChatPromptTemplate.from_template(template=prompt_template)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"value": float  // Numerical value of measurement, e.g. for '-123.4 m/(kg*s^2)', the value is -123.4.        If the value is not found, set value to None.
	"unit": str  // Measurement unit, e.g. for '-123.4 m/(kg*s^2)', the unit is 'm/(kg*s^2)'.        If the unit is not found, set value to None.
}
```


In [35]:
messages = prompt.format_messages(
    fpg_reduction=fpg_reduction,
    format_instructions=format_instructions,
    )
print(messages[0].content)

response = chat_model(messages)
print(response.content)


    For the following fpg reduction, extract the following output:
    - value
    - unit

    fpg_reduction: -25.2 mg/dL

    The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"value": float  // Numerical value of measurement, e.g. for '-123.4 m/(kg*s^2)', the value is -123.4.        If the value is not found, set value to None.
	"unit": str  // Measurement unit, e.g. for '-123.4 m/(kg*s^2)', the unit is 'm/(kg*s^2)'.        If the unit is not found, set value to None.
}
```
    
```json
{
	"value": -25.2,
	"unit": "mg/dL"
}
```


In [36]:
fpg_reduction_dict = fpg_reduction_parser.parse(response.content)
print(fpg_reduction_dict)


{'value': -25.2, 'unit': 'mg/dL'}


In [37]:
print(output_dict)
final_dict = {**output_dict}
final_dict["fpg_reduction"] = fpg_reduction_dict
print(final_dict)

{'population_size': 600, 'fpg_reduction': '-25.2 mg/dL'}
{'population_size': 600, 'fpg_reduction': {'value': -25.2, 'unit': 'mg/dL'}}


In [38]:
import json

print(json.dumps(final_dict, indent=4))

{
    "population_size": 600,
    "fpg_reduction": {
        "value": -25.2,
        "unit": "mg/dL"
    }
}
