# Sentiment analysis for ground truth sample using Claude Sonnet 3.5

setup

In [None]:
!pip install anthropic pandas tqdm

In [None]:
import anthropic
import pandas as pd
import time
import random
from tqdm.notebook import tqdm

Check whether API key exists (should be in your environment variable)

In [None]:
from google.colab import userdata
api_key = userdata.get('ANTHROPIC_API_KEY')
client = anthropic.Anthropic(api_key=api_key)

Define the sentiment analysis prompt template

In [None]:
prompt_template = '''
Analyze the sentiment of the Hacker News comment with the context of the AI-related story title found below under "Input text:" whether the comment is negative (-1), neutral (0), or positive (1) towards AI. First understand the meaning of the story title, then understand the meaning of the comment in the context of the story title, then analyze the sentiment of the comments in the context of the story title whether the comment is negative (-1), neutral (0), or positive (1) towards AI, and conclude the sentiment of the comment to only one best sentiment type.

Sentiment can contain only a single number either:
  -1 (for negative towards AI)
  0 (for neutral towards AI)
  1 (for positive towards AI)

# Input text:
Story title: """
{title}
"""
Comment: """
{comment}
"""

# Output format:
Sentiment: your_sentiment
Explanation: your_explanation_for_the_sentiment
'''

# Function to get sentiment using Claude's API
def get_sentiment_and_explanation(title, comment):
    prompt = prompt_template.format(title=title, comment=comment)

    # Repeats for 3 times in case of errors
    for _ in range(3):
        try:
            # Call the Claude API with the prompt
            response = client.messages.create(
                model="claude-3-sonnet-20240229",
                max_tokens=1000,
                messages=[
                    {"role": "user", "content": prompt}
                ]
            )
            # Extract the text from the response
            result = response.content[0].text.strip()

            # Extract sentiment and explanation from the response text
            sentiment_line = next((line for line in result.split('\n') if "Sentiment:" in line), None)
            explanation_line = next((line for line in result.split('\n') if "Explanation:" in line), None)

            if sentiment_line and explanation_line:
                sentiment = int(sentiment_line.split(":")[1].strip())
                explanation = explanation_line.split(":")[1].strip()
                return sentiment, explanation
            else:
                raise ValueError("Invalid response format")

        except Exception as e:
            print(f"API call failed: {e}. Pausing for 1-2 minutes...")
            time.sleep(random.randint(60, 120))

# Function to process comments and save periodically, continuing from last processed row
def process_comments(df, output_csv_file, save_interval=20):
    # Load existing file if exists, continue from where it left off
    try:
        existing_df = pd.read_csv(output_csv_file)
        df.update(existing_df)  # Merge with any previously saved data
        print("Resuming from the last saved point...")
    except FileNotFoundError:
        print("No existing output file found. Starting fresh...")

    # Create a tqdm progress bar
    pbar = tqdm(total=len(df), desc="Processing comments")

    for i, row in df.iterrows():
        # Skip rows that already have a sentiment and explanation value
        if pd.notna(row.get('sentiment')) and pd.notna(row.get('explanation')):
            pbar.update(1)
            continue

        # Perform sentiment analysis and get the explanation
        sentiment, explanation = get_sentiment_and_explanation(row['title'], row['comment_text'])
        df.at[i, 'sentiment'] = sentiment
        df.at[i, 'explanation'] = explanation

        # Update progress bar
        pbar.update(1)

        # Save progress to CSV after every `save_interval` rows
        if (i + 1) % save_interval == 0:
            df.to_csv(output_csv_file, index=False)
            print(f"\nSaved progress at row {i + 1}")

    # Close progress bar
    pbar.close()

    # Final save at the end of processing
    df.to_csv(output_csv_file, index=False)
    print(f"Final result saved to {output_csv_file}")


In [None]:
csv_file = 'sentiment_385_sampled_cleanup.csv'  # Replace with the actual path to your CSV file
df = pd.read_csv(csv_file)

# If the 'sentiment' and 'explanation' columns don't exist, add them
if 'sentiment' not in df.columns:
    df['sentiment'] = None
if 'explanation' not in df.columns:
    df['explanation'] = None

# Output file to save the results
output_csv_file = 'hackernews_comments_with_sentiment_CLAUDE.csv'

# Process the comments and periodically save the results
process_comments(df, output_csv_file)

print(f'Sentiment analysis completed and saved to {output_csv_file}')