In [2]:
import os
import openai
import pandas as pd
import time
import logging
import argparse

In [3]:
print(openai.__version__)

0.28.0


In [28]:
# Load the clean data
df = pd.read_csv('clean_data.csv')

# Select 50% of the data
half_data = df.sample(frac=0.5, random_state=1)

# Drop the selected rows from the original DataFrame
remaining_data = df.drop(half_data.index)

# Save the half data to 'test_tweets.csv'
half_data.to_csv('test_tweets.csv', index=False)

# Save the remaining data back to 'clean_data.csv' or to a new file if preferred
remaining_data.to_csv('clean_data_remaining.csv', index=False)

In [None]:
os.environ["OPENAI_API_KEY"] = "your key"


In [5]:
# Load API key from environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")
if not openai.api_key:
    raise ValueError("API key not found. Please set the OPENAI_API_KEY environment variable.")

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Define the new set of categories
categories = [
    "Medical assistance",
    "Shelter request",
    "Supplies needed",
    "Evacuation support",
    "Rescue operations",
    "Mental health support",
    "Infrastructure repair",
    "Animal rescue assistance",
    "No assistance needed",
    "Authority intervention (police, court, judges, ..)"
]

# Function to classify a single tweet using OpenAI's ChatGPT
def classify_tweet(content):
    try:
        prompt = (
            "You are a classifier for emergency-related tweets. "
            "Please classify the following tweet into one of these categories only: "
            + ", ".join(categories) + ". "
            "Respond with the category name only."
        )
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": prompt},
                {"role": "user", "content": content}
            ],
            temperature=0  # Setting temperature to 0 for deterministic output
        )
        label = response.choices[0].message['content'].strip()
        return label
    except openai.error.RateLimitError as e:
        if 'insufficient_quota' in str(e):
            logging.error("Insufficient funds in OpenAI account. Please add more credits.")
            # Optionally wait and retry, or exit the script
            time.sleep(60)  # Wait 1 minute and then retry (optional)
            return classify_tweet(content)  # Retry classification
        else:
            logging.error(f"Rate limit error: {e}")
            return "Rate limit error"
    except Exception as e:
        logging.error(f"Error classifying tweet: {e}")
        return "Error"

# Function to classify tweets in a DataFrame
def classify_tweets(df):
    labels = []
    for index, row in df.iterrows():
        content = f"{row['Title']} {row['Snippet']}"
        label = classify_tweet(content)
        labels.append(label)
        time.sleep(1)  # Adding a delay to avoid rate limiting
    df["Label"] = labels
    return df

def main(input_file, output_file):
    # Read the CSV file
    try:
        df = pd.read_csv(input_file)
    except FileNotFoundError:
        logging.error("Input CSV file not found.")
        return

    # Classify tweets
    logging.info("Classifying tweets...")
    df = classify_tweets(df)

    # Save the classified tweets to a new CSV file
    df.to_csv(output_file, index=False)
    logging.info(f"Classified tweets saved to {output_file}")

    # Print summary of classifications
    summary = df["Label"].value_counts()
    print("Classification Summary:")
    print(summary)

In [8]:
# Replace this section in Jupyter or interactive environments
input_file = "/home/moegho/Desktop/490_Project/clean_data_remaining.csv"
output_file = "classified_data_2.csv"
main(input_file, output_file)


2024-11-06 14:39:57,836 - INFO - Classifying tweets...
2024-11-06 17:14:29,774 - INFO - Classified tweets saved to classified_data_2.csv


Classification Summary:
Label
No assistance needed                                   2234
Authority intervention                                 1593
Rescue operations                                       665
Evacuation support                                      316
Medical assistance                                      286
Animal rescue assistance                                241
Infrastructure repair                                   165
Supplies needed                                         110
Shelter request                                          53
Mental health support                                    50
Authority intervention (police, court, judges, ...)      46
Authority intervention (police, court, judges)           33
Authority intervention (police, court, judges, ..)        6
Emergency assistance for Education in Lebanon             3
Humanitarian aid                                          3
Emergency assistance needed                               3
Humanitari