In [14]:
# read test data from data/{dataname}/Queried_{dataname}_all_models_clean_test

import pandas as pd
import numpy as np
import os
import sys
import json
import re
import tiktoken

sys.path.append('src')

from FrugalGPT import optimizer
import FrugalGPT

In [15]:
supported_LLM = FrugalGPT.getservicename()
print("supported LLMs:",supported_LLM)
supported_LLM_names = [llm.split("/")[1] for llm in supported_LLM]
print("supported_LLM_names:", supported_LLM_names)

supported LLMs: ['google/gemini-1.5-flash-002', 'google/gemini-1.5-pro-002', 'google/gemini-1.0-pro', 'openaichat/gpt-4o-mini', 'openaichat/gpt-4o', 'azure/Phi-3-mini-4k-instruct', 'azure/Phi-3.5-mini-instruct', 'azure/Phi-3-small-8k-instruct', 'azure/Phi-3-medium-4k-instruct', 'deepinfra/llama-3-8B', 'deepinfra/llama-3-70B', 'deepinfra/mixtral-8x7B']
supported_LLM_names: ['gemini-1.5-flash-002', 'gemini-1.5-pro-002', 'gemini-1.0-pro', 'gpt-4o-mini', 'gpt-4o', 'Phi-3-mini-4k-instruct', 'Phi-3.5-mini-instruct', 'Phi-3-small-8k-instruct', 'Phi-3-medium-4k-instruct', 'llama-3-8B', 'llama-3-70B', 'mixtral-8x7B']


In [16]:
service_names = ['openaichat/gpt-4o-mini',
                'openaichat/gpt-4o',
                'google/gemini-1.5-flash-002',
                'google/gemini-1.5-pro-002',
                'google/gemini-1.0-pro',
                'azure/Phi-3-mini-4k-instruct',
                'azure/Phi-3.5-mini-instruct',
                'azure/Phi-3-small-8k-instruct',
                'azure/Phi-3-medium-4k-instruct',
                'deepinfra/llama-3-8B',
                'deepinfra/llama-3-70B',
                'deepinfra/mixtral-8x7B',
                ]

In [17]:
# dataname = "OVERRULING"
dataname = "AGNEWS"
# dataname = "HEADLINES"
# read data
test_data_df = pd.read_csv(f"data/{dataname}/Queried_{dataname}_all_models_clean_test.csv", header=0)

test_data_df.head()

Unnamed: 0,query_raw,query,ref_answer,gpt-4o-mini,gpt-4o,llama-3-8B,llama-3-70B,mixtral-8x7B,gemini-1.5-flash-002,gemini-1.0-pro,gemini-1.5-pro-002,Phi-3.5-mini-instruct,Phi-3-small-8k-instruct,Phi-3-mini-4k-instruct,Phi-3-medium-4k-instruct
0,Q: America West Backs Away From ATA Bid Americ...,"Please answer which category (World, Sports, B...",business,business,business,business,business,business,business,business,business,business,business,business,business
1,"Q: Compete against your friends, SI experts an...","Please answer which category (World, Sports, B...",sports,sports,sports,sports,sports,sports,sports,sports,sports,sports,sports,sports,sports
2,Q: Oracle expected to push on content manageme...,"Please answer which category (World, Sports, B...",sci/tech,business,sci/tech,business,business,business,business,business,sci/tech,business,business,business,sci/tech
3,"Q: Bosox strike deal with Mirabelli; Yanks, Fl...","Please answer which category (World, Sports, B...",sports,sports,sports,sports,sports,sports,sports,sports,sports,business,sports,business,sports
4,Q: Bonds deserves a quot;C quot; for historic...,"Please answer which category (World, Sports, B...",sports,sports,sports,sports,sports,sports,sports,sports,sports,world,sports,sports,sports


In [5]:
test_data = []
for index, row in test_data_df.iterrows():
    query = row['query']
    ref_answer = row['ref_answer']
    _id = index
    model_answer = {}
    for model_name in supported_LLM_names:
        model_answer[model_name] = row[model_name]
    test_data.append([query, ref_answer, _id, model_answer])

In [6]:
print(test_data_df.iloc[1]["query"])
# calculate the number of words in the query
encoding = tiktoken.get_encoding('cl100k_base')
in_token_num = len(encoding.encode(test_data_df.iloc[303]["query"]))
print(in_token_num)

Please answer which category (World, Sports, Business or Sci/Tech) a provided news follows into.

Q: Five-year ban for Blackburn fan One of the two Blackburn Rovers Football Club fans charged with public disorder for racially abusing Dwight Yorke has been handed a five-year ban.
A: Sports

Q: Major software pirates caught A multimillion-euro software piracy ring has been broken following synchronized raids in Athens and London yesterday, Attica police said.
A: Sci/Tech

Q: Compete against your friends, SI experts and celebrities in this &lt;b&gt;...&lt;/b&gt; OWINGS MILLS, Maryland (Ticker) --  quot;Prime Time quot; has decided this is the right time to return to the NFL. Deion Sanders, regarded as perhaps the most electrifying cornerback in league history, arrived here 
A:
160


In [7]:
# calculate the average token length in query column using tiktoken
def get_avg_token_length(dataframe):
    token_num = 0
    for q in dataframe["query"]:
        token_num += len(tiktoken.get_encoding('cl100k_base').encode(q))
    return token_num / len(dataframe)
print(get_avg_token_length(test_data_df))

153.53684210526316


In [18]:
# read from config/serviceinfo_thrift.json, get the price per token for each model

with open("config/serviceinfo_thrift_actual.json", "r") as f:
    serviceinfo = json.load(f)

def get_input_price_per_token(model_name):
    for service in serviceinfo:
        if model_name in serviceinfo[service]:
            return serviceinfo[service][model_name]["cost_input"]
    return None

def get_output_price_per_token(model_name):
    for service in serviceinfo:
        if model_name in serviceinfo[service]:
            return serviceinfo[service][model_name]["cost_output"]
    return None

def get_fixed_price(model_name):
    for service in serviceinfo:
        if model_name in serviceinfo[service]:
            return serviceinfo[service][model_name]["cost_fixed"]
    return None

print(get_input_price_per_token("gemini-1.5-flash-002"))
print(get_output_price_per_token("gpt-4o"))
print(get_fixed_price("gpt-4o"))

7.5e-08
1.5e-05
0


In [10]:
data = test_data
llm_vanilla = FrugalGPT.llmvanilla.LLMVanilla()

for i in range(len(data)):
    for name in service_names:
        service_name = name
        query = data[i][0]
        cost = llm_vanilla.compute_cost(input_text=query, output_text="no", service_name=service_name)
        print("data index is: ", data[i][2], "and cost for", service_name, " is: ", cost)
        # 计算average cost in the test data for each model
        print("average cost for", service_name, " is: ", sum(cost) / len(cost))
        

data index is:  0 and cost for openaichat/gpt-4o-mini  is:  2.325e-05


TypeError: 'float' object is not iterable

In [19]:
# calculate the total cost of a cascaded models (depth = 3), by summing the cost of input and output tokens
# first read from /strategy/{dataname}_1015/cascade_strategy.json, to get the model_list with corresponding budget
# this file is like:
# {
    # "budget": {
    #     "1e-05": {
    #         "thres_list": [
    #             0.05691420375880523,
    #             0.10379833045938076,
    #             1.0
    #         ],
    #         "model_list": [
    #             "deepinfra/llama-3-8B",
    #             "azure/Phi-3-mini-4k-instruct",
    #             "openaichat/gpt-4o-mini"
    #         ],
    #         "quantile": [
    #             0.02565051282051282,
    #             0.02565051282051282
    #         ]
    #     },
    #     "5e-05": {

    # need to calculate the total cost of a cascaded models (depth = 3), by summing the cost of input and output tokens for each model
    # and use the quantile to decide the weight, e.g., 0.02565051282051282 for the first model, 0.02565051282051282 for using both the first and second model, 1-0.02565051282051282-0.02565051282051282 for using all three models
    

with open(f"strategy/{dataname}_1015/cascade_strategy.json", "r") as f:
    cascade_strategy = json.load(f)

def get_model_list(budget):
    # split and only need the model name, e.g., from 'openaichat/gpt-4o-mini' to 'gpt-4o-mini'
    model_list = cascade_strategy["budget"][budget]["model_list"]
    return [re.split("/", model)[1] for model in model_list]

def get_quantile(budget):
    # need to fill up the third value which is 1-quantile[0]-quantile[1]
    quantile = cascade_strategy["budget"][budget]["quantile"]
    quantile.append(1-quantile[0]-quantile[1])
    return quantile

print(get_model_list("1e-05"))
print(get_quantile("0.0005"))

['llama-3-8B', 'Phi-3-medium-4k-instruct', 'Phi-3.5-mini-instruct']
[0.21643250430689104, 0.4379214029447115, 0.3456460927483974]


In [24]:
def get_total_cost(dataframe, budget):
    # need to calculate the total cost of a cascaded models (depth = 3), by summing the cost of input and output tokens for each model
    # and use the quantile to decide the weight, e.g., 0.02565051282051282 for the first model, 0.02565051282051282 for using both the first and second model, 1-0.02565051282051282-0.02565051282051282 for using all three models
    model_list = get_model_list(budget)
    use_first_cost = (get_input_price_per_token(model_list[0]) * get_avg_token_length(dataframe) + get_output_price_per_token(model_list[0]) * 1) * get_quantile(budget)[2]
    # use_first2_cost = (get_input_price_per_token(model_list[0]) * get_avg_token_length(dataframe) + get_output_price_per_token(model_list[0]) * 1 
    #                    + (get_input_price_per_token(model_list[1]) * get_avg_token_length(dataframe) + get_output_price_per_token(model_list[1]) * 1)) * get_quantile(budget)[1]
    # use_all3_cost = (get_input_price_per_token(model_list[0]) * get_avg_token_length(dataframe) + get_output_price_per_token(model_list[0]) * 1
                        # + (get_input_price_per_token(model_list[1]) * get_avg_token_length(dataframe) + get_output_price_per_token(model_list[1]) * 1)
                        # + (get_input_price_per_token(model_list[2]) * get_avg_token_length(dataframe) + get_output_price_per_token(model_list[2]) * 1)) * get_quantile(budget)[0]
    use_first2_cost = (get_input_price_per_token(model_list[1]) * get_avg_token_length(dataframe) + get_output_price_per_token(model_list[1]) * 1) * get_quantile(budget)[1]
    use_all3_cost = (get_input_price_per_token(model_list[2]) * get_avg_token_length(dataframe) + get_output_price_per_token(model_list[2]) * 1) * get_quantile(budget)[0]
    
    total_cost = use_first_cost + use_first2_cost + use_all3_cost
    return total_cost


def get_total_cost_cascaded(dataframe, budget):
    model_list = get_model_list(budget)
    total_cost = 0
    for i in range(len(model_list)):
        model = model_list[i]
        total_cost += get_input_price_per_token(model) * get_avg_token_length(dataframe)
        total_cost += get_output_price_per_token(model) * 1
        if total_cost < float(budget):
            print(f"up to model {model} cost: {total_cost}")
            continue
        else:
            break

In [25]:
print(get_total_cost_cascaded(test_data_df, "1e-05"))
# print(get_total_cost(test_data_df, "0.001"))

up to model llama-3-8B cost: 8.499526315789475e-06
3.528078947368421e-05
