In [1]:
import openai
import pandas as pd
import time

def classify_tweets(api_key, tweets_df, output_file="classified_tweets.csv"):
    openai.api_key = api_key
    
    def create_prompt(candidate_name, party_name, tweets):
        tweets_text = "\n".join([f"{idx+1}: {text}" for idx, text in enumerate(tweets)])
        prompt = (f"Classify these tweets of the presidential candidate {candidate_name} of the party {party_name} "
                  "with the following scores:\n"
                  "1: only if it is self-referential or makes emphasis on the individual qualities of the candidate\n"
                  "-1: only if it makes emphasis on the party or political movement the candidate belongs to.\n"
                  "0: if it does not belong to any of the previous categories\n\n"
                  "Only return the tweet number and classification score. No additional text.\n\n"
                  f"{tweets_text}")
        return prompt

    results = []
    
    grouped = tweets_df.groupby(['candidate_name', 'party_name'])
    
    for (candidate_name, party_name), group in grouped:
        tweets = group['text'].tolist()
        tweet_ids = group['id'].tolist()
        total_tweets = len(tweets)
        
        for i in range(0, total_tweets, 200):
            tweet_batch = tweets[i:i + 200]
            id_batch = tweet_ids[i:i + 200]
            
            prompt = create_prompt(candidate_name, party_name, tweet_batch)
            
            success = False
            while not success:
                try:
                    response = openai.ChatCompletion.create(
                        model="gpt-4o-mini",  # Use the appropriate model
                        messages=[
                            {"role": "system", "content": "You are a helpful assistant."},
                            {"role": "user", "content": prompt}
                        ],
                        temperature=0,  # Ensures the output is deterministic and to the point
                        max_tokens=1000  # Adjust based on expected output size
                    )
                    classifications = response['choices'][0]['message']['content'].strip().split("\n")
                    
                    for j, classification in enumerate(classifications):
                        classification_score = int(classification.split(":")[-1].strip())
                        results.append({
                            "ID": id_batch[j],
                            "Classification Score": classification_score
                        })
                    
                    success = True  # If the request was successful, exit the loop

                except openai.error.RateLimitError as e:
                    error_message = str(e)
                    print(f"Rate limit reached: {e}. Waiting for 60 seconds before retrying...")
                    
                    # Check if the error message indicates a daily limit reached
                    if "Rate limit reached for gpt-4o-mini in organization" in error_message and "on requests per day" in error_message:
                        print("Daily request limit reached. Saving the data and stopping the process.")
                        pd.DataFrame(results).to_csv(output_file, index=False)
                        return pd.DataFrame(results)  # Return what has been processed so far
                    
                    time.sleep(60)  # Wait before retrying

                except openai.error.APIConnectionError as e:
                    print(f"API connection error: {e}. Waiting for 10 seconds before retrying...")
                    time.sleep(10)  # Wait before retrying

                except openai.error.APIError as e:
                    print(f"API error: {e}. Waiting for 10 seconds before retrying...")
                    time.sleep(10)  # Wait before retrying

                except Exception as e:
                    print(f"Unexpected error: {e}. Waiting for 10 seconds before retrying...")
                    time.sleep(10)  # Wait before retrying

    print("Classification process completed successfully.")
    pd.DataFrame(results).to_csv(output_file, index=False)
    return pd.DataFrame(results)



In [18]:
api_key = ""
tweets_df = pd.read_csv("/Users/Fede/Desktop/Twitter_data.csv")
result_df = classify_tweets(api_key, tweets_df)
result_df.to_csv("/Users/Fede/Desktop/classified_tweets.csv", index=False) 


Rate limit reached: Rate limit reached for gpt-4o-mini in organization org-HXKpLejFknNaY6q9ck33dmNV on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.. Waiting for 60 seconds before retrying...
Classification process completed successfully.


Unnamed: 0,ID,Classification Score
0,488,0
1,489,1
2,490,0
3,491,0
4,492,1
...,...,...
436,483,-1
437,484,0
438,485,0
439,486,-1
