In [15]:
from sklearn.metrics import f1_score
import pandas as pd
from sklearn.metrics import f1_score
from pathlib import Path

my_df = pd.read_csv(r'..\data\humaid\plabel\train\union.tsv', sep='\t')
their_folder = Path("..\data\humaid\k_zero_shot")

rows = []

for event_file in their_folder.glob("*.tsv"):
    their_df = pd.read_csv(event_file, sep='\t')
    event = '_'.join(event_file.stem.split('_')[:3])

    merged = pd.merge(my_df[my_df['event'] == event], their_df, on='tweet_id')
    merged = merged[['class_label', 'label', 'gpt5_label']]

    my_f1 = f1_score(merged['class_label'], merged['label'], average='macro')
    their_f1 = f1_score(merged['class_label'], merged['gpt5_label'], average='macro')

    rows.append({'event': event, 'mine': my_f1, 'theirs': their_f1})

pd.DataFrame(rows).to_clipboard()





In [2]:
from openai import OpenAI
import csv, pandas as pd
import os, time, random
from dotenv import load_dotenv

# --- Initialize API client ---
load_dotenv(dotenv_path="../.env")
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# --- Load HumAID test data ---
gold_table = pd.read_csv(
    "../data/humaid/joined/test.tsv",
    sep="\t",
    quoting=csv.QUOTE_NONE
)
gold_table = gold_table[gold_table["class_label"] != "other_relevant_information"].reset_index(drop=True)

# --- Define classification prompt template ---
base_prompt = """Read the category names and their definitions below, then classify the following tweet into the appropriate category. 
In your response, mention only the category name.

Category name: category definition
- Caution and advice: Reports of warnings issued or lifted, guidance and tips related to the disaster.
- Sympathy and support: Tweets with prayers, thoughts, and emotional support.
- Requests or urgent needs: Reports of urgent needs or supplies such as food, water, clothing, money, etc.
- Displaced people and evacuations: People who have relocated due to the crisis, even for a short time.
- Injured or dead people: Reports of injured or dead people due to the disaster.
- Missing or found people: Reports of missing or found people due to the disaster.
- Infrastructure and utility damage: Reports of any type of damage to infrastructure such as buildings, houses, roads, power lines, etc.
- Rescue volunteering or donation effort: Reports of any type of rescue, volunteering, or donation efforts.
- Not humanitarian: If the tweet does not convey humanitarian aid-related information.

Tweet: {tweet_text}
Category:
"""

# --- Define classifier function ---
# def classify_tweet(tweet, max_retries=3):
#     return "TEST"

def classify_tweet(tweet, max_retries=3):
    prompt = base_prompt.replace("{tweet_text}", tweet)
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[{"role": "user", "content": prompt}],
                temperature=0,
                max_tokens=10,
            )
            return response.choices[0].message.content.strip().replace(' ', '_').lower()
        except Exception as e:
            wait = 2 ** attempt + random.random()
            print(f"Error: {e} — retrying in {wait:.1f}s")
            time.sleep(wait)
    return "ERROR"

# --- Run predictions ---
labels = []
for i, row in gold_table.iterrows():
    pred = classify_tweet(str(row["tweet_text"]))
    if(pred == "ERROR"):
        raise RuntimeError(f"Error encountered at row {i}. Stopping run.")
    labels.append(pred)
    if (i + 1) % 50 == 0:
        print(f"Processed {i+1}/{len(gold_table)} tweets...")
        # Optional checkpoint save
        pd.DataFrame({"tweet_text": gold_table["tweet_text"][:i+1], "prediction": labels}).to_csv(
            "../gpt4o_mini_predictions_partial.tsv", index=False, sep="\t"
        )

# --- Save final predictions ---
gold_table["prediction"] = labels
gold_table.to_csv("../gpt4o_mini_predictions.tsv", index=False, sep="\t")
print("Done. Saved to ../gpt4o_mini_predictions.tsv.")


Processed 50/10423 tweets...
Processed 100/10423 tweets...
Processed 150/10423 tweets...
Processed 200/10423 tweets...
Processed 250/10423 tweets...
Processed 300/10423 tweets...
Processed 350/10423 tweets...
Processed 400/10423 tweets...
Processed 450/10423 tweets...
Processed 500/10423 tweets...
Processed 550/10423 tweets...
Processed 600/10423 tweets...
Processed 650/10423 tweets...
Processed 700/10423 tweets...
Processed 750/10423 tweets...
Processed 800/10423 tweets...
Processed 850/10423 tweets...
Processed 900/10423 tweets...
Processed 950/10423 tweets...
Processed 1000/10423 tweets...
Processed 1050/10423 tweets...
Processed 1100/10423 tweets...
Processed 1150/10423 tweets...
Processed 1200/10423 tweets...
Processed 1250/10423 tweets...
Processed 1300/10423 tweets...
Processed 1350/10423 tweets...
Processed 1400/10423 tweets...
Processed 1450/10423 tweets...
Processed 1500/10423 tweets...
Processed 1550/10423 tweets...
Processed 1600/10423 tweets...
Processed 1650/10423 tweets.