In [6]:
from tiktoken import get_encoding
import fitz

In [4]:
encoder = get_encoding("cl100k_base")

In [7]:
with fitz.open('../data/directiva_residuos/2008_98_ce_boetxt.pdf') as doc:
    out_text_2008= ""
    for page in doc:
        text = page.get_text()
        out_text_2008 = out_text_2008 + "\n\n" + text
with fitz.open('../data/directiva_residuos/2018_851_boetxt.pdf') as doc:
    out_text_2018 = ""
    for page in doc:
        text = page.get_text()
        out_text_2018 = out_text_2018 + "\n\n" + text

In [12]:
with open('../data/directiva_residuos/mods_2018.json') as file:
    mods_2018 = eval(file.read())
with open('../data/directiva_residuos/articles_2008.json') as file:
    articles_2008 = eval(file.read())

In [8]:
tokens_2008 = encoder.encode(out_text_2008)
tokens_2018 = encoder.encode(out_text_2018)

In [17]:
tokens_articles_2008 = encoder.encode("\n".join(articles_2008.values()))
tokens_mods_2018 = encoder.encode("\n".join(mods_2018.values()))

In [31]:
api_token_pricing = {
    "gpt-4-8k": {"input": 0.03/1000, "output": 0.06/1000, "context_len": 8000},
    "gpt-4-32k": {"input": 0.06/1000, "output": 0.12/1000, "context_len": 32000},
    "gpt-3.5-Turbo-4k": {"input": 0.0015/1000, "output": 0.002/1000, "context_len": 4000},
    "gpt-3.5-Turbo-16k": {"input": 0.003/1000, "output": 0.004/1000, "context_len": 16000},
    "claude_instant-100k": {"input": 1.63/1000000, "output": 5.51/1000000, "context_len": 100000},
    "claude_2-100k": {"input": 11.02/1000000, "output": 32.68/1000000, "context_len": 100000},
}

In [11]:
len(tokens_2008), len(tokens_2018)

(24332, 41562)

In [19]:
len(tokens_articles_2008), len(tokens_mods_2018)

(14934, 21211)

In [26]:
api_token_pricing["gpt-4-8k"]["input"] * len(tokens_2008)*2

1.4599199999999999

# Clean text and extract articles
Two scenarios:
1. The tasks are solved properly with just one prompt call
2. The tasks needs one prompt call for cleaning text and another one for article extraction

Assumptions:
- The output has 90% of input tokens
- All models can be applied sequentially so no context length limitations are taken into account

In [38]:
import math

In [61]:
def get_single_task_cost(
    input_token_len: int,
    output_token_len: int,
    cost_dict: dict,
    sys_prompt_token_len: int = 1000,
    verbose: bool = False,
) -> float:
    # n_call = two times input call since needs output for each call
    n_calls = math.ceil(input_token_len / cost_dict["context_len"]) * 2
    sys_prompt_cost = sys_prompt_token_len * n_calls * cost_dict["input"]
    input_cost = input_token_len * cost_dict["input"]
    output_cost = output_token_len * cost_dict["output"]
    if verbose:
        print(
            f"""
sys_prompt_cost = {sys_prompt_cost}
input_cost = {input_cost}
output_cost = {output_cost}
"""
        )
    return round(sys_prompt_cost + input_cost + output_cost, 2)

In [63]:
def get_models_cost(
    model_cost_dict: dict, input_token_len: int, output_ratio: float = 0.85, sys_prompt_tokens: int = 1000
) -> dict:
    models_costs = {}
    for model_name, cost_dict in model_cost_dict.items():
        models_costs[model_name] = get_single_task_cost(
            input_token_len, input_token_len * output_ratio, cost_dict, sys_prompt_tokens
        )
    return models_costs

## Scenario 1
The tasks are solved properly with just one prompt call

In [66]:
s1_input = len(tokens_articles_2008) + len(tokens_mods_2018)
s1_output_ratio = 0.85
s1_sys_prompt_token_len = 1000

In [67]:
s1_costs = get_models_cost(api_token_pricing, s1_input, s1_output_ratio, s1_sys_prompt_token_len)

In [68]:
s1_costs

{'gpt-4-8k': 3.23,
 'gpt-4-32k': 6.1,
 'gpt-3.5-Turbo-4k': 0.15,
 'gpt-3.5-Turbo-16k': 0.25,
 'claude_instant-100k': 0.23,
 'claude_2-100k': 1.42}

## Scenario 2
The tasks needs one prompt call for cleaning text and another one for article extraction

In [78]:
# Chained token reduction over tasks
0.9*0.945

0.8504999999999999

In [77]:
s2_1_input = len(tokens_articles_2008) + len(tokens_mods_2018)
s2_1_output_ratio = 0.945
s2_2_input = s2_1_input * s2_1_output_ratio
s2_2_output_ratio = 0.9
s2_sys_prompt_token_len = 1000

In [79]:
s2_1_costs = get_models_cost(api_token_pricing, s2_1_input, s2_1_output_ratio, s2_sys_prompt_token_len)
s2_2_costs = get_models_cost(api_token_pricing, s2_2_input, s2_2_output_ratio, s2_sys_prompt_token_len)

In [80]:
s2_costs = {}
for model_name in s2_1_costs.keys():
    s2_costs[model_name] = s2_1_costs[model_name] + s2_2_costs[model_name]
s2_costs

{'gpt-4-8k': 6.6,
 'gpt-4-32k': 12.49,
 'gpt-3.5-Turbo-4k': 0.29000000000000004,
 'gpt-3.5-Turbo-16k': 0.5,
 'claude_instant-100k': 0.48,
 'claude_2-100k': 2.94}

# Confidence interval costs

In [81]:
min_max_costs = {}
for model_name in s2_costs.keys():
    min_max_costs[model_name] = {"min": s1_costs[model_name], "max":  s2_costs[model_name]}
min_max_costs

{'gpt-4-8k': {'min': 3.23, 'max': 6.6},
 'gpt-4-32k': {'min': 6.1, 'max': 12.49},
 'gpt-3.5-Turbo-4k': {'min': 0.15, 'max': 0.29000000000000004},
 'gpt-3.5-Turbo-16k': {'min': 0.25, 'max': 0.5},
 'claude_instant-100k': {'min': 0.23, 'max': 0.48},
 'claude_2-100k': {'min': 1.42, 'max': 2.94}}

In [85]:
cost_text = "Text clean and split cost by model in dollars:"
for k, v in min_max_costs.items():
    cost_text = cost_text + f"\n  Model {k}:"
    cost_text = cost_text + f"\n    Min cost {v['min']}$"
    cost_text = cost_text + f"\n    Max cost {v['max']}$"
print(cost_text)

Text clean and split cost by model in dollars:
  Model gpt-4-8k:
    Min cost 3.23$
    Max cost 6.6$
  Model gpt-4-32k:
    Min cost 6.1$
    Max cost 12.49$
  Model gpt-3.5-Turbo-4k:
    Min cost 0.15$
    Max cost 0.29000000000000004$
  Model gpt-3.5-Turbo-16k:
    Min cost 0.25$
    Max cost 0.5$
  Model claude_instant-100k:
    Min cost 0.23$
    Max cost 0.48$
  Model claude_2-100k:
    Min cost 1.42$
    Max cost 2.94$
