**Notebook for experimenting with prompts**

In [1]:
## Imports
## System imports
import os

## Data manipulation
import pandas as pd

## LangChain
from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

## Misc
from dotenv import load_dotenv
from datetime import datetime
from pprint import pprint

## Self-defined modules
from utils import calculations, prompts

API keys (retrieved from .env file)

In [2]:
## Load environment variables
load_dotenv()
assert os.environ['LANGCHAIN_API_KEY'], "Please set the LANGCHAIN_API_KEY environment variable"
assert os.environ['GROQ_API_KEY'], "Please set the GROQ_API_KEY environment variable"
assert os.environ['OPENAI_API_KEY'], "Please set the OPENAI_API_KEY environment variable"

Update data sources based on your file structure

In [3]:
## Data sources
DATA_DIR = "../../data/raw"
DATA_CSV_PATH = DATA_DIR + "/data.csv"

In [4]:
## Data pre-processing
df = pd.read_csv(DATA_CSV_PATH)

df["CreateDate"] = df["CreateDate"].apply(lambda x: datetime.strptime(x, "%d/%m/%Y %H:%M"))
df["DeleteDate"] = df["DeleteDate"].apply(lambda x: datetime.strptime(x, "%d/%m/%Y %H:%M"))

In [5]:
## Getting metrics (similar to those on dashboard)
bi1_overview, bi2_sector, bi3_capacity, bi4_lifetime, bi5_price_instruction, \
    bi6_trades_over_time, bi7_sankey_diagram = calculations.calculate_metrics(df)

str(bi1_overview)

"{'current': {'total_number_of_orders': 5000, 'total_volume_of_orders': 20176480, 'total_value_of_orders': 80582268.83}, 'average_historical': {'total_number_of_orders': 5000, 'total_volume_of_orders': 20176480, 'total_value_of_orders': 80582268.83}, 'percentage_difference': {'in_total_orders': 0.0, 'in_total_volume': 0.0, 'in_total_value': 0.0}}"

In [6]:
## Configuring LLM
groq_llm = ChatGroq(model="llama3-8b-8192")
openai_llm = ChatOpenAI(model="gpt-4o-mini", api_key=os.environ['OPENAI_API_KEY'])

**Prompts**

Use PromptTemplate to create a prompt, specifying the input variables to be replaced by data

In [7]:
## Format a prompt with the data
example_template = """
    Generate a summary of the financial data.
    Data required: {data}
    Provide an example of output: {example}
"""
example_prompt = PromptTemplate(
    input_variables=["data", "example"],
    template=example_template
)

example_output = "Today's revenue was $300, with costs of $50."

example_chain = example_prompt | openai_llm | StrOutputParser()

## Invoke the chain by filling in the input variables
# print(example_chain.invoke({"data": bi1_overview, "example": example_output}))

In [8]:
## try out the prompt for the Overview section
overview_prompt_str = prompts.overview_order_prompt

## Add a final sentence to include today's data
overview_prompt_str += """
    Use the above template for today's data: {data}
"""

overview_prompt = PromptTemplate(
    input_variables=["data"],
    template=overview_prompt_str
)

overview_chain = overview_prompt | openai_llm | StrOutputParser()

pprint(overview_chain.invoke({"data": bi1_overview}))

## Change up the imported data to have some varied output
bi1_overview_new = bi1_overview.copy()
bi1_overview_new["average_historical"]["total_number_of_orders"] = 4000
bi1_overview_new["percentage_difference"]["in_total_orders"] = 20
pprint(bi1_overview_new)

pprint(overview_chain.invoke({"data": bi1_overview_new}))

('**Overview**  \n'
 'For the period of 11/01/2024 - 14/01/2024, there was a total of 5000 orders '
 'placed. The Total Volume done for the day was 20,176,480, and the Total '
 'Value was 80,582,268.83. This is the same compared to the previous 3 days, '
 'which had 5000 orders placed with a Total Volume of 20,176,480 and Total '
 'Value of 80,582,268.83.')
{'average_historical': {'total_number_of_orders': 4000,
                        'total_value_of_orders': 80582268.83,
                        'total_volume_of_orders': 20176480},
 'current': {'total_number_of_orders': 5000,
             'total_value_of_orders': 80582268.83,
             'total_volume_of_orders': 20176480},
 'percentage_difference': {'in_total_orders': 20,
                           'in_total_value': 0.0,
                           'in_total_volume': 0.0}}
('**Order Overview**  \n'
 'For the period of 11/01/2024 - 14/01/2024, there was a total of 5000 orders '
 'placed. The Total Volume done for the day was 20,176,48