In [1]:
import os
import openai
import pandas as pd
import time
import logging
import argparse

In [2]:
print(openai.__version__)

0.28.0


In [6]:
df = pd.read_csv('clean_data.csv')
# Select 10 random rows
random_rows = df.sample(n=15)

# Save the random rows to a CSV file
random_rows.to_csv('test_tweets.csv', index=False)

In [7]:
os.environ["OPENAI_API_KEY"] = "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"


In [24]:
# Load API key from environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")
if not openai.api_key:
    raise ValueError("API key not found. Please set the OPENAI_API_KEY environment variable.")

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Define the new set of categories
categories = [
    "Medical assistance",
    "Shelter request",
    "Supplies needed",
    "Evacuation support",
    "Rescue operations",
    "Mental health support",
    "Infrastructure repair",
    "Animal rescue assistance",
    "No assistance needed",
    "Authority intervention (police, court, judges, ..)"
]

# Function to classify a single tweet using OpenAI's ChatGPT
def classify_tweet(content):
    try:
        prompt = (
            "You are a classifier for emergency-related tweets. "
            "Please classify the following tweet into one of these categories only: "
            + ", ".join(categories) + ". "
            "Respond with the category name only."
        )
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": prompt},
                {"role": "user", "content": content}
            ],
            temperature=0  # Setting temperature to 0 for deterministic output
        )
        label = response.choices[0].message['content'].strip()
        return label
    except openai.error.RateLimitError as e:
        if 'insufficient_quota' in str(e):
            logging.error("Insufficient funds in OpenAI account. Please add more credits.")
            # Optionally wait and retry, or exit the script
            time.sleep(60)  # Wait 1 minute and then retry (optional)
            return classify_tweet(content)  # Retry classification
        else:
            logging.error(f"Rate limit error: {e}")
            return "Rate limit error"
    except Exception as e:
        logging.error(f"Error classifying tweet: {e}")
        return "Error"

# Function to classify tweets in a DataFrame with periodic saving
def classify_tweets(df, output_file, save_interval=50):
    labels = []
    for index, row in df.iterrows():
        content = f"{row['Title']} {row['Snippet']}"
        label = classify_tweet(content)
        labels.append(label)
        
        # Save progress every save_interval tweets
        if (index + 1) % save_interval == 0:
            df["Label"] = labels
            df.to_csv(output_file, index=False)
            logging.info(f"Progress saved to {output_file} at tweet {index + 1}")

        time.sleep(1)  # Adding a delay to avoid rate limiting

    # Final save after all tweets are processed
    df["Label"] = labels
    df.to_csv(output_file, index=False)
    return df

def main(input_file, output_file):
    # Read the CSV file
    try:
        df = pd.read_csv(input_file)
    except FileNotFoundError:
        logging.error("Input CSV file not found.")
        return

    # Classify tweets
    logging.info("Classifying tweets...")
    df = classify_tweets(df, output_file, 50)

    # Save the classified tweets to a new CSV file
    df.to_csv(output_file, index=False)
    logging.info(f"Classified tweets saved to {output_file}")

    # Print summary of classifications
    summary = df["Label"].value_counts()
    print("Classification Summary:")
    print(summary)

In [25]:
# Replace this section in Jupyter or interactive environments
input_file = "test_tweets.csv"
output_file = "classified_test_tweets.csv"
main(input_file, output_file)


2024-11-05 13:46:24,148 - INFO - Classifying tweets...
2024-11-05 13:46:48,000 - INFO - Classified tweets saved to classified_test_tweets.csv


Classification Summary:
Label
Authority intervention      6
No assistance needed        2
Animal rescue assistance    2
Rescue operations           2
Supplies needed             1
Evacuation support          1
Medical assistance          1
Name: count, dtype: int64


In [13]:
labeled = pd.read_csv('classified_test_tweets.csv')
labeled.head()

Unnamed: 0,Title,Snippet,Label
0,Israel pounds Beirut suburb Dahiyeh with airst...,Israel's air force pounded Beirut's southern s...,Rescue operations
1,PRESS RELEASE: FirstBank Joins Partnership For...,"October 31, (THEWILL) - Lagos- October 30, 202...",No assistance needed
2,Iran's supreme leader threatens Israel and US ...,"DUBAI, United Arab Emirates (AP) -- Iran's sup...",No assistance needed
3,91 Wholesome Animal Adoption Pics That Warmed ...,91 Wholesome Animal Adoption Pics That Warmed ...,Animal rescue assistance
4,After Detroit man slits throat of 7-year-old Y...,Authorities in the United States' city of Detr...,Mental health support


In [14]:
labeled

Unnamed: 0,Title,Snippet,Label
0,Israel pounds Beirut suburb Dahiyeh with airst...,Israel's air force pounded Beirut's southern s...,Rescue operations
1,PRESS RELEASE: FirstBank Joins Partnership For...,"October 31, (THEWILL) - Lagos- October 30, 202...",No assistance needed
2,Iran's supreme leader threatens Israel and US ...,"DUBAI, United Arab Emirates (AP) -- Iran's sup...",No assistance needed
3,91 Wholesome Animal Adoption Pics That Warmed ...,91 Wholesome Animal Adoption Pics That Warmed ...,Animal rescue assistance
4,After Detroit man slits throat of 7-year-old Y...,Authorities in the United States' city of Detr...,Mental health support
5,"Clarksville Pets of the Week for October 28th,...","Clarksville, TN - Every week, Clarksville Onli...",Animal rescue assistance
6,Where are the Israeli captives taken in the Ha...,Despite pressure to agree to a deal that would...,No assistance needed
7,President Macron says France will provide 100 ...,France announced a 100-million euro aid packag...,Supplies needed
8,West Asia crisis hits Iran's status as favoure...,Families of Kashmiri students in Iran are anxi...,Evacuation support
9,"Israeli strikes kill 27 in Lebanon, including ...","QANA, Lebanon -- Israeli airstrikes pounded ar...",Rescue operations
