# GPT

In [None]:
import matplotlib.pyplot as plt
import openai
import os
import pandas as pd
import random
import tiktoken
import time

from dotenv import load_dotenv
from openai import OpenAI
from sklearn.metrics import accuracy_score
from tqdm import tqdm

In [None]:
os.makedirs("output/", exist_ok=True)

In [None]:
load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')

In [None]:
train = pd.read_csv("data/processed/train.csv")
test = pd.read_csv("data/processed/test.csv")

# Create prompts

Few-shots prompting, where prompts are constructed as follows:

Here is an example of a positive movie review, associated with a rating of 10: \*[ <...> ]\*

Here is an example of a negative movie review, associated with a rating of 2: \*[ <...> ]\*

Now, consider the following review. Based on its content, is the sentiment of the review positive or negative? Answer with a single word. \*[ <...> ]\*

Instructions:
- \*\*Positive\*\* reviews typically highlight enjoyment, satisfaction, or praise for aspects of the film (e.g., acting, storyline, direction).
- \*\*Negative\*\* reviews tend to criticize the film for its shortcomings or failures (e.g., poor pacing, bad acting, or unsatisfying plot).
- Focus on the general tone of the review as a whole, not isolated statements or minor contradictions.

Example prompts are limited to 150 words. The order of positive and negative examples is random.

In [None]:
# Review length
train['nb_words'] = train['sentiment'].apply(lambda x: len(x.split()))

In [None]:
prompts = []

# Iterate over the test dataset
for _, test_row in tqdm(test.iterrows(),
                        total=test.shape[0],
                        desc = "Generating Prompts",
                        unit = "prompts"):
    
    # Select a random positive and negative sentiment review from train
    positive_row = random.choice(
        train[(train['sentiment'] == 'positive') & (train['nb_words'] < 150)].to_dict('records'))
    negative_row = random.choice(
        train[(train['sentiment'] == 'negative') & (train['nb_words'] < 150)].to_dict('records'))

    # Randomize the order of positive and negative examples
    examples = random.sample(
        [(f"Here is an example of a positive movie review, associated with a rating of {positive_row['rating']}:\n*[{positive_row['text']}]*",
          f"Here is an example of a negative movie review, associated with a rating of {negative_row['rating']}:\n*[{negative_row['text']}]*"),
         (f"Here is an example of a negative movie review, associated with a rating of {negative_row['rating']}:\n*[{negative_row['text']}]*",
          f"Here is an example of a positive movie review, associated with a rating of {positive_row['rating']}:\n*[{positive_row['text']}]*")
        ], 1)[0]

    # Construct the prompt
    prompt = f"""
    {examples[0]}

    {examples[1]}

    Now, consider the following review. Based on its content, is the sentiment of the review positive or negative? Answer with a single word.
    *[{test_row['text']}]*

    Instructions:
    - **Positive** reviews typically highlight enjoyment, satisfaction, or praise for aspects of the film (e.g., acting, storyline, direction).
    - **Negative** reviews tend to criticize the film for its shortcomings or failures (e.g., poor pacing, bad acting, or unsatisfying plot).
    - Focus on the general tone of the review as a whole, not isolated statements or minor contradictions.
    """
    
    prompts.append({
        'review_id': test_row['review_id'],
        'prompt': prompt
    })
    
prompts = pd.DataFrame(prompts)

In [None]:
prompts = pd.DataFrame(prompts)

In [None]:
# Remove '\n' characters and strip leading/trailing spaces from each prompt
def clean_prompt(prompt):
    if isinstance(prompt, str):
        cleaned_prompt = prompt.replace('\n', ' ').strip()
        return cleaned_prompt
    return prompt  # In case it's not a string

prompts['prompt'] = prompts['prompt'].apply(clean_prompt)

In [None]:
# Save
prompts.to_csv("data/processed/GTP_prompts.csv", index=False)

# Estimate cost

In [None]:
# Tokenize with tiktoken
enc = tiktoken.get_encoding("cl100k_base")
prompts['tokens'] = prompts['prompt'].apply(lambda x: len(enc.encode(x)))
total_tokens = prompts['tokens'].sum()
print(f"Total number of tokens: {total_tokens}")

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(prompts['tokens'], bins=30, color='cornflowerblue', edgecolor='midnightblue')
plt.title('Distribution of Token Counts')
plt.xlabel('Number of Tokens')
plt.ylabel('')
plt.show()

In [None]:
gpt_4o = (total_tokens * 2.5 + len(prompts) * 1.25) * 1e-6
gpt_35_turbo = (total_tokens * 0.5 + len(prompts) * 1.5) * 1e-6
gpt_4o_mini = (total_tokens * 0.15 + len(prompts) * 0.075) * 1e-6

print(f"Cost with gpt-4o: {gpt_4o:.2f} $")
print(f"Cost with gpt-3.5-turbo: {gpt_35_turbo:.2f} $")
print(f"Cost with gpt-4o-mini: {gpt_4o_mini:.2f} $")

# Send prompts

In [None]:
# Call the API
client = OpenAI()

def get_openai_answer(prompt):    
    try:
        completion = client.chat.completions.create(
            model = "gpt-3.5-turbo",
            messages = [{"role": "user", "content": prompt}]
        )
        return "request worked", completion

    except Exception as e:
        return "request failed", str(e)

In [None]:
# Resume API calls
if os.path.exists("output/GPT_in_progress.csv"):
    responses = pd.read_csv("output/GPT_in_progress.csv")
    calls_left = len(responses[(responses['GPT'].isna()) | (responses['failed_calls'] > 0)])
    print(f"Resuming API calls - {calls_left} left")

elif os.path.exists("data/processed/GTP_prompts.csv"):
    responses = pd.read_csv("data/processed/GTP_prompts.csv")
    responses['GPT'] = pd.NA
    responses['failed_calls'] = None
    print("Starting API calls")

else:
    print("Warning: regenerate prompts")

In [None]:
rows_to_process = responses[(responses['GPT'].isna()) | (responses['failed_calls'] > 0)]

time0 = time.time()
total_fails = 0

if len(rows_to_process) > 0:
    for i, row in tqdm(rows_to_process.iterrows(), total=len(rows_to_process), desc="Processing reviews"):
        review_id = row['review_id']
        prompt = row['prompt']
        status, response = get_openai_answer(prompt)

        if status == "request failed":
            responses.loc[responses['review_id'] == review_id, 'failed_calls'] = (
                responses.loc[responses['review_id'] == review_id, 'failed_calls'] + 1
            )
            print(f"Error for review #{review_id}:\n{response}")
            
            total_fails += 1
            if total_fails >3:
                print("Stopping after 3 failures...")
                break

        else:
            responses.loc[responses['review_id'] == review_id, 'GPT'] = response.choices[0].message.content
        
        if (i + 1) % 25 == 0:
            responses.to_csv('output/GPT_in_progress.csv', index=False)

        time.sleep(0.05)
        
time1 = time.time()

In [None]:
print(f"API call duration: {time1-time0:.2f} seconds")

In [None]:
responses['GPT'] = responses['GPT'].str.lower()
results = pd.merge(test, responses, on = "review_id")
results.to_csv("output/GTP.csv")

In [None]:
accuracy = accuracy_score(results['sentiment'], results['GPT'])
print(f"Accuracy: {accuracy:.4f}")