In [2]:
import pandas as pd

!pip install -q openai backoff gpt-cost-estimator

import openai
from openai import OpenAI
from google.colab import userdata
import backoff
from gpt_cost_estimator import CostEstimator


#@title Setup for the OpenAI API

#@markdown We're using the new Colab Feature to store keys safely within the Colab Environment.
#@markdown Click on the key on the left to add your API key and enable it for this notebook.
#@markdown Enter the name of your API-Key below.
api_key_name = "openai-lehrstuhl-api" # @param {type: "string"}
api_key = userdata.get(api_key_name)

# Initialize OpenAI using the key
client = OpenAI(
    api_key=api_key
)



@CostEstimator()
def query_openai(model, temperature, messages, mock=True, completion_tokens=10):
    return client.chat.completions.create(
                      model=model,
                      temperature=temperature,
                      messages=messages,
                      max_tokens=600)

# We define the run_request method to wrap it with the @backoff decorator
@backoff.on_exception(backoff.expo, (openai.RateLimitError, openai.APIError))
def run_request(system_prompt, user_prompt, model, mock):
  messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt}
  ]

  return query_openai(
          model=model,
          temperature=0.0,
          messages=messages,
          mock=mock
        )


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m262.9/262.9 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m51.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [19]:
system_prompt = """
Translate the text to german. Only write the german translation in your answer.

Text: [TEXT]
"""

In [20]:
prompt = """
Translate the text to german. Only write the german translation in your answer.

Text: [TEXT]
"""

In [27]:
df = pd.read_csv("/content/dair_ai_emotion.csv")
df

Unnamed: 0,text,label
0,i didnt feel humiliated,0
1,i can go from feeling so hopeless to so damned...,0
2,im grabbing a minute to post i feel greedy wrong,3
3,i am ever feeling nostalgic about the fireplac...,2
4,i am feeling grouchy,3
...,...,...
33995,im having ssa examination tomorrow in the morn...,0
33996,i constantly worry about their fight against n...,1
33997,i feel its important to share this info for th...,1
33998,i truly feel that if you are passionate enough...,1


In [29]:
from tqdm.auto import tqdm
#@title Running the request.
#@markdown The following code snippet uses my [gpt-cost-estimator](https://pypi.org/project/gpt-cost-estimator/) package to simulate API requests and calculate a cost estimate. Please run the estimation whne possible to asses the price-tag before sending requests to OpenAI!

#@markdown Make sure 'run_request' and 'system_prompt' are defined before this block by running the two blocks above!

#@markdown Do you want to mock the OpenAI request (dry run) to calculate the estimated price?
MOCK = False # @param {type: "boolean"}
#@markdown Do you want to reset the cost estimation when running the query?
RESET_COST = False # @param {type: "boolean"}
#@markdown What's the column name to save the results of the data extraction task to?
COLUMN = 'text_de' # @param {type: "string"}
#@markdown Do you want to run the request on a smaller sample of the whole data? (Useful for testing). Enter 0 to run on the whole dataset.
SAMPLE_SIZE = 0 # @param {type: "number", min: 0}

#@markdown Which model do you want to use?
MODEL = "gpt-3.5-turbo-0613" # @param ["gpt-3.5-turbo-0613", "gpt-4-1106-preview", "gpt-4-0613"] {allow-input: true}


# Initializing the empty column
if COLUMN not in df.columns:
  df[COLUMN] = None

# Reset Estimates
CostEstimator.reset()
print("Reset Cost Estimation")

filtered_df = df.copy()

# Skip previously annotated rows
filtered_df = filtered_df[pd.isna(filtered_df[COLUMN])]

if SAMPLE_SIZE > 0:
  filtered_df = filtered_df.sample(SAMPLE_SIZE)

for index, row in tqdm(filtered_df.iterrows(), total=len(filtered_df)):
    try:
        p = prompt.replace('[TEXT]', row['text'])
        response = run_request(system_prompt, p, MODEL, MOCK)

        if not MOCK:
          # Extract the response content
          # Adjust the following line according to the structure of the response
          r = response.choices[0].message.content

          # Update the 'new_df' DataFrame
          df.at[index, COLUMN] = r

    except Exception as e:
        print(f"An error occurred: {e}")
        # Optionally, handle the error (e.g., by logging or by setting a default value)

print()

Reset Cost Estimation


  0%|          | 0/34000 [00:00<?, ?it/s]

Cost: $0.0002 | Total: $5.9083


In [30]:
df.to_csv("/content/dair_ai_emotion_de.csv")

In [31]:
df

Unnamed: 0,text,label,text_de
0,i didnt feel humiliated,0,Ich habe mich nicht gedemütigt gefühlt.
1,i can go from feeling so hopeless to so damned...,0,Ich kann mich von so hoffnungslos fühlen zu so...
2,im grabbing a minute to post i feel greedy wrong,3,"Ich nehme mir eine Minute, um zu posten, ich f..."
3,i am ever feeling nostalgic about the fireplac...,2,Text: Wenn ich jemals nostalgische Gefühle für...
4,i am feeling grouchy,3,Ich fühle mich mürrisch.
...,...,...,...
33995,im having ssa examination tomorrow in the morn...,0,Text: Ich habe morgen früh eine SSA-Prüfung un...
33996,i constantly worry about their fight against n...,1,Ich mache mir ständig Sorgen über ihren Kampf ...
33997,i feel its important to share this info for th...,1,"Ich finde es wichtig, diese Informationen für ..."
33998,i truly feel that if you are passionate enough...,1,"Ich glaube wirklich, dass wenn man genug Leide..."
