# GPT Calc

## GPT-4 API Pricing

In [30]:
input_tokens_1k_usd  = 0.01 # $0.01 per 1000 tokens
output_tokens_1k_usd = 0.03 # $0.03 per 1000 tokens

## Import Libraries

In [1]:
import tiktoken
from main import *
from datetime import datetime

In [2]:
with open("conversations.json") as f:
    sessions = json.loads(f.read())
    topics = flatten(sessions)
mapping = [s.get("mapping", {}) for s in sessions]  # Use get() to avoid KeyError

entries = process_mapping(mapping)

entries_df = pd.DataFrame(
    data=entries,
    columns=["model", "role", "text", "create_time"])
entries_df["create_time"] = pd.to_datetime(entries_df["create_time"] * 1_000_000_000)

In [3]:
entries_df.head()

Unnamed: 0,model,role,text,create_time
0,,system,,NaT
1,gpt-4,user,my friend showed me an app which lets you try ...,2024-02-12 05:16:20.824388864
2,gpt-4,assistant,To find an app that allows you to experiment w...,2024-02-12 05:16:26.078672896
3,,system,,NaT
4,gpt-4,user,"Hi, how do I politely word a text to a client ...",2024-02-11 15:05:09.858119936


In [4]:
entries_df["role"].unique()

array(['system', 'user', 'assistant', 'tool'], dtype=object)

In [5]:
# text-davinci-002-render-sha := GPT 3.5
entries_df["model"].unique()

array(['', 'gpt-4', 'text-davinci-002-render-sha', 'gpt-4-gizmo'],
      dtype=object)

In [6]:
gizmo_df = entries_df[entries_df["model"] == "gpt-4-gizmo"]

In [7]:
gizmo_df

Unnamed: 0,model,role,text,create_time
2021,gpt-4-gizmo,tool,,2024-01-11 08:27:46.109074176
2023,gpt-4-gizmo,tool,"{""items"":[{""paper_title"":""Signal acquisition a...",2024-01-11 08:29:09.108526080
2024,gpt-4-gizmo,assistant,The state of the art (SOTA) in Surface Electro...,2024-01-11 08:29:09.108745984


## Tokenise All Prompts and Responses

In [8]:
enc = tiktoken.get_encoding("cl100k_base")

def get_tokens(row):
    text = row["text"]
    tokens = enc.encode(text)
    return len(tokens)

token_count = entries_df.apply(lambda row: get_tokens(row), axis=1)

In [9]:
entries_df["token_count"] = token_count

In [27]:
jan_input_tokens = entries_df[
    (entries_df["model"] == "gpt-4") &
    (entries_df["role"]  == "assistant") &
    (entries_df["create_time"] > datetime(2024, 1, 1)) & 
    (entries_df["create_time"] < datetime(2024, 2, 1))
]["token_count"].sum()

In [28]:
jan_output_tokens = entries_df[
    (entries_df["model"] == "gpt-4") &
    (entries_df["role"]  == "user") &
    (entries_df["create_time"] > datetime(2024, 1, 1)) & 
    (entries_df["create_time"] < datetime(2024, 2, 1))
]["token_count"].sum()

## Calculate Costs

In [31]:
jan_input_cost_usd = (jan_input_tokens / 1000) * input_tokens_1k_usd
jan_output_cost_usd = (jan_output_tokens / 1000) * output_tokens_1k_usd
jan_input_cost_usd, jan_output_cost_usd

(4.0900799999999995, 9.51024)