# sentiment analysis using cohere

setup paths and common variables

In [2]:
!pip install cohere pandas

Collecting cohere
  Downloading cohere-5.11.0-py3-none-any.whl.metadata (3.4 kB)
Collecting boto3<2.0.0,>=1.34.0 (from cohere)
  Downloading boto3-1.35.39-py3-none-any.whl.metadata (6.7 kB)
Collecting fastavro<2.0.0,>=1.9.4 (from cohere)
  Downloading fastavro-1.9.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)
Collecting httpx>=0.21.2 (from cohere)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx-sse==0.4.0 (from cohere)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting parameterized<0.10.0,>=0.9.0 (from cohere)
  Downloading parameterized-0.9.0-py2.py3-none-any.whl.metadata (18 kB)
Collecting sagemaker<3.0.0,>=2.232.1 (from cohere)
  Downloading sagemaker-2.232.2-py3-none-any.whl.metadata (16 kB)
Collecting types-requests<3.0.0,>=2.0.0 (from cohere)
  Downloading types_requests-2.32.0.20240914-py3-none-any.whl.metadata (1.9 kB)
Collecting botocore<1.36.0,>=1.35.39 (from boto3<2.0.0,>=1.34.0->cohere

In [8]:
import cohere
import csv
import pandas as pd
import time
import random

check whether API key exists (should be in your environment variable)

In [14]:
# Initialize the Cohere client with your API key
from google.colab import userdata
api_key = userdata.get('COHERE_API_KEY')
co = cohere.ClientV2(api_key=api_key)

In [21]:
# Define the sentiment analysis prompt template
prompt_template = '''
Analyze the sentiment of the Hacker News comment with the context of the AI-related story title found below under "Input text:" whether the comment is negative (-1), neutral (0), or positive (1) towards AI. First understand the meaning of the story title, then understand the meaning of the comment in the context of the story title, then analyze the sentiment of the comments in the context of the story title whether the comment is negative (-1), neutral (0), or positive (1) towards AI, and conclude the sentiment of the comment to only one best sentiment type.

Sentiment can contain only a single number either:
  -1 (for negative towards AI)
  0 (for neutral towards AI)
  1 (for positive towards AI)

# Input text:
Story title: """
{title}
"""
Comment: """
{comment}
"""

# Output format:
Sentiment: your_sentiment
Explanation: your_explanation_for_the_sentiment
'''

# Function to get sentiment using Cohere's API
def get_sentiment_and_explanation(title, comment):
    prompt = prompt_template.format(title=title, comment=comment)

    # Repeats for 3 times in case of errors
    for _ in range(3):
        try:
            # Call the Cohere API with the prompt
            response = co.chat(
                model='command-r-plus-08-2024',
                messages=[
                    {
                        "role": "user",
                        "content": prompt,
                    }
                ],
            )
            # Extract the text from the response
            result = response.message.content[0].text.strip()

            # Extract sentiment and explanation from the response text
            sentiment_line = next((line for line in result.split('\n') if "Sentiment:" in line), None)
            explanation_line = next((line for line in result.split('\n') if "Explanation:" in line), None)

            if sentiment_line and explanation_line:
                sentiment = int(sentiment_line.split(":")[1].strip())
                explanation = explanation_line.split(":")[1].strip()
                return sentiment, explanation
            else:
                raise ValueError("Invalid response format")

            # Extract and return the sentiment
            # return int(response.message.content[0].text) # Convert the sentiment label to an integer
        except Exception as e:
            print(f"API call failed: {e}. Pausing for 1-2 minutes...")
            time.sleep(random.randint(60, 120))

# Function to process comments and save periodically
def process_comments(df, output_csv_file, save_interval=20):
    for i, row in df.iterrows():
        # Skip rows that already have a sentiment and explanation value
        if pd.notna(row.get('sentiment')) and pd.notna(row.get('explanation')):
            continue

        # Perform sentiment analysis and get the explanation
        sentiment, explanation = get_sentiment_and_explanation(row['title'], row['comment_text'])
        df.at[i, 'sentiment'] = sentiment
        df.at[i, 'explanation'] = explanation

        # Save progress to CSV after every `save_interval` rows
        if (i + 1) % save_interval == 0:
            df.to_csv(output_csv_file, index=False)
            print(f"Saved progress at row {i + 1}")

    # Final save at the end of processing
    df.to_csv(output_csv_file, index=False)
    print(f"Final result saved to {output_csv_file}")



In [None]:
# Load the CSV file with the Hacker News data
csv_file = 'sentiment_385_sampled_cleanup.csv'  # Replace with the actual path to your CSV file
df = pd.read_csv(csv_file)

# If the 'sentiment' and 'explanation' columns don't exist, add them
if 'sentiment' not in df.columns:
    df['sentiment'] = None
if 'explanation' not in df.columns:
    df['explanation'] = None

# Output file to save the results
output_csv_file = 'hackernews_comments_with_sentiment_COHERE.csv'

# Process the comments and periodically save the results
process_comments(df, output_csv_file)

print(f'Sentiment analysis completed and saved to {output_csv_file}')

API call failed: status_code: 429, body: data=None message="You are using a Trial key, which is limited to 10 API calls / minute. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.com/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at support@cohere.com with any questions". Pausing for 1-2 minutes...
