In [6]:
import os
import openai
import pandas as pd
import time
import logging
import argparse

In [13]:
df = pd.read_csv('new_data.csv')
# Select 10 random rows
random_rows = df.sample(n=10)

# Save the random rows to a CSV file
random_rows.to_csv('test_tweets.csv', index=False)

In [9]:
os.environ["OPENAI_API_KEY"] = ""


In [10]:
# Load API key from environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")
if not openai.api_key:
    raise ValueError("API key not found. Please set the OPENAI_API_KEY environment variable.")

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Function to classify a single tweet using OpenAI's ChatGPT
def classify_tweet(content):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a classifier for emergency-related tweets. Please classify the following tweet into one of these categories: Medical assistance needed, Shelter request, Supplies needed, Evacuation alert, Rescue teams, Animals in danger, No emergency."},
                {"role": "user", "content": f"{content}"}
            ]
        )
        label = response['choices'][0]['message']['content'].strip()
        return label
    except Exception as e:
        logging.error(f"Error classifying tweet: {e}")
        return "Error"


# Function to classify tweets in a DataFrame
def classify_tweets(df):
    labels = []
    for index, row in df.iterrows():
        content = f"{row['Title']} {row['Snippet']}"
        label = classify_tweet(content)
        labels.append(label)
        time.sleep(1)  # Adding a delay to avoid rate limiting
    df["Label"] = labels
    return df

def main(input_file, output_file):
    # Read the CSV file
    try:
        df = pd.read_csv(input_file)
    except FileNotFoundError:
        logging.error("Input CSV file not found.")
        return

    # Classify tweets
    logging.info("Classifying tweets...")
    df = classify_tweets(df)

    # Save the classified tweets to a new CSV file
    df.to_csv(output_file, index=False)
    logging.info(f"Classified tweets saved to {output_file}")

    # Print summary of classifications
    summary = df["Label"].value_counts()
    print("Classification Summary:")
    print(summary)


In [14]:
# Replace this section in Jupyter or interactive environments
input_file = "test_tweets.csv"
output_file = "classified_test_tweets.csv"
main(input_file, output_file)


2024-10-29 15:10:27,215 - INFO - Classifying tweets...
2024-10-29 15:10:42,450 - INFO - Classified tweets saved to classified_test_tweets.csv


Classification Summary:
Label
No emergency                 6
Supplies needed              1
Evacuation alert             1
Medical assistance needed    1
Evacuation alert.            1
Name: count, dtype: int64


In [12]:
labeled = pd.read_csv('classified_test_tweets.csv')
labeled.head()

Unnamed: 0,Keyword,Title,Snippet,Label
0,disease outbreak in Lebanon,Antigenic drift and subtype interference shape...,Influenza viruses continually evolve new antig...,No emergency
1,Lebanon urgent assistance,'Families ran for their lives': Syria receives...,"The UN High Commissioner for Refugees, Filippo...",Evacuation alert
2,Tyre emergency,The war in Gaza long felt personal for Palesti...,"SEBLINE, Lebanon (AP) — The war in Gaza was al...",No emergency
3,Lebanon flooding,FEMAâs âdisaster equityâ focus means LGB...,When Homeland Security Secretary Alejandro May...,No emergency
4,Lebanon,Walkie-Talkies Explode in Lebanon as Part of a...,Beirut said that the new explosions have kille...,Medical assistance needed
