In [None]:
pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.84.0
    Uninstalling openai-1.84.0:
      Successfully uninstalled openai-1.84.0
Successfully installed openai-0.28.0


In [None]:
import pandas as pd
import openai
from tqdm import tqdm
import time

df = pd.read_csv("Saudi.csv").sample(n=1600, random_state=42).reset_index(drop=True)

tweets = df["tweet"].tolist()

In [None]:


openai.api_key = "#############################"


system_message = """
Classify the input text as 'offensive' or 'not'.
The text will be delimited by triple backticks ``` in the input.
Answer only with 'offensive' or 'not'.
Do not explain your answer.
"""

user_message_template = "Text: ```{text}```"


In [None]:
def create_examples(df, n_per_class=3):
    offensive_examples = df[df["label"].str.lower() == "offensive"].sample(n=n_per_class, random_state=1)
    not_examples = df[df["label"].str.lower() == "not"].sample(n=n_per_class, random_state=1)

    examples = []
    for _, row in pd.concat([offensive_examples, not_examples]).iterrows():
        examples.append({
            "text": row["tweet"],
            "label": row["label"].strip().lower()
        })
    return examples

def create_prompt(system_msg, examples, user_template):
    messages = [{"role": "system", "content": system_msg}]
    for ex in examples:
        messages.append({"role": "user", "content": user_template.format(text=ex["text"])})
        messages.append({"role": "assistant", "content": ex["label"]})
    return messages



In [None]:

zero_shot_preds = []
one_shot_preds = []
three_shot_preds = []

one_example = create_examples(df, n_per_class=1)
three_examples = create_examples(df, n_per_class=3)

for i, text in enumerate(tqdm(tweets)):



   # --- Zero-shot
    zs_prompt = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message_template.format(text=text)}
    ]
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o",
            messages=zs_prompt,
            max_tokens=1,
            temperature=0
        )
        label = response.choices[0].message.content.strip().lower()
    except Exception as e:
        label = "error"
    zero_shot_preds.append(label)

     # --- One-shot

    one_prompt = create_prompt(system_message, one_example, user_message_template)
    one_prompt.append({"role": "user", "content": user_message_template.format(text=text)})
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o",
            messages=one_prompt,
            max_tokens=1,
            temperature=0
        )
        label = response.choices[0].message.content.strip().lower()
    except Exception as e:
        label = "error"
    one_shot_preds.append(label)

    # --- Three-shot
    few_prompt = create_prompt(system_message, three_examples, user_message_template)
    few_prompt.append({"role": "user", "content": user_message_template.format(text=text)})
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o",
            messages=few_prompt,
            max_tokens=1,
            temperature=0
        )
        label = response.choices[0].message.content.strip().lower()
    except Exception as e:
        label = "error"
    three_shot_preds.append(label)




    time.sleep(0.2)  # avoid rate limit


100%|██████████| 1600/1600 [48:55<00:00,  1.83s/it]


In [None]:
df["gpt4o_zero_shot"] = zero_shot_preds
df["gpt4o_one_shot"] = one_shot_preds
df["gpt4o_few_shot"] = three_shot_preds
df.to_csv("saudi_gpt4o_results.csv", index=False)