# Import Modules & Setup

In [None]:
!pip install openai pandas tqdm python-dotenv

In [None]:
import openai
import csv
import pandas as pd
import time
import random
from tqdm.notebook import tqdm
import sys
import os
from dotenv import load_dotenv

In [None]:
output_csv_file = '..\\dataset\\sampled_hn_ai_comment_gpt_sentiment.csv'

### Initialize OpenAI API

In [None]:
# Set your OPENAI_API_KEY as an environment variable and then load
load_dotenv()

# Initialize the OpenAI client with your API key
api_key_openai = os.getenv("OPENAI_API_KEY")
client_openai = openai.OpenAI(api_key=api_key_openai)''

### Prompt template

In [None]:
prompt_template = '''
Please perform Sentiment Classification task.
Given the headline and its comment, assign a sentiment label expressed by the author towards "Artificial Intelligence (AI)" from ['negative': -1, 'neutral': 0, 'positive': 1] to its comment.

Note:
Please ignore any link in this input.
If the comment includes both positive and negative aspects, assign the label corresponding to the predominant sentiment.
Avoid misclassifying neutral advice or factual descriptions.

Return label and reason only based on the following examples.

Headline: Anyone else thinks developing AI is a waste of time? I wish the whole world stop using AI for good.
Comment: I agree with you on this.
Comment label:-1
Comment reason:The comment agrees that AI is a waste of time and wishes the world would stop using AI for good.

Headline: "Cody" an AI coding assistant
Comment: I tried to ask it to code a bouncing ball simulation in a box. The output was great. It's runnable and key elements are nailed; however, there're some unrealistic behavior.
Comment label:1
Comment reason:The comment suggests that the Cody project is good at the bouncing ball simulation in a box problem, despite some imperfection.

Headline: "TEXIE," an AI self-driving car that makes your life easier.
Comment: I haven't tried. But, I think the car design looks funny; I would be embarrassed to drive it.
Comment label:0
Comment reason:The comment critiques the car design but neither supports nor opposes AI self-driving technology.

Headline: {title_input}
Comment: {comment_input}
Comment label:
Comment reason:
'''

# Sentiment from GPT

In [None]:
def get_sentiment_and_reason_openai(title, comment):
    prompt = prompt_template.format(title_input=title, comment_input=comment)

    for _ in range(3):
        try:
            response = client_openai.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": "You are an expert at analyzing text."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=150,
                temperature=0.2
            )
            result = response.choices[0].message.content.strip()
            # print(result)

            comment_sentiment_line = next((line for line in result.split('\n') if "comment label:" in line.lower()), None)
            comment_reason_line = next((line for line in result.split('\n') if "comment reason:" in line.lower()), None)

            if comment_sentiment_line:
                try:
                    # Extract sentiment value and clean it
                    comment_sentiment_str = comment_sentiment_line.split(":")[1].strip()

                    if comment_sentiment_str in ['-1', '0', '1']:
                        comment_sentiment = int(comment_sentiment_str)
                    else:
                        print(f"Invalid sentiment for comment: {comment}")
                        print(f"Raw comment sentiment line: {comment_sentiment_line}")
                        print(f"Raw comment reason line: {comment_reason_line}")
                        raise ValueError(f"Invalid sentiment value: {comment_sentiment_line}")

                    comment_reason = comment_reason_line.split(":", 1)[1].strip()
                    return comment_sentiment, comment_reason

                except ValueError as e:
                    print(f"Warning: Could not parse sentiment: {e}")
                    return -999, "Error parsing sentiment"

            else:
                raise ValueError("Invalid response format")

        except Exception as e:
            print(f"API call failed: {e}. Retrying in 1-2 minutes...")
            time.sleep(random.randint(60, 120))

    return -999, "Failed after 3 retries"

In [None]:
"""Function to process story titles and save results"""

def process_titles(df, output_csv_file, save_interval=20):
    # Resume from existing file, if available
    try:
        existing_df = pd.read_csv(output_csv_file)
        df.update(existing_df)  # Merge saved progress
        print("Resuming from the last saved point...")
    except FileNotFoundError:
        print("No existing output file found. Starting fresh...")

    pbar = tqdm(total=len(df), desc="Processing story titles")

    for i, row in df.iterrows():
        # Skip rows already processed
        if pd.notna(row.get('senti_comment_prompt2_1shot_gpt')) and pd.notna(row.get('reason_comment_prompt2_1shot_gpt')):
            pbar.update(1)
            continue

        # Get sentiment and reason for the story title
        comment_sentiment, comment_reason = get_sentiment_and_reason_openai(row['title'], row['comment_text'])
        df.at[i, 'senti_comment_prompt2_1shot_gpt'] = comment_sentiment
        df.at[i, 'reason_comment_prompt2_1shot_gpt'] = comment_reason
        print(f"comment sentiment: {comment_sentiment}")
        print(f"comment reason: {comment_reason}")
        pbar.update(1)

        # Save progress at intervals
        if (i + 1) % save_interval == 0:
            df.to_csv(output_csv_file, index=False)
            print(f"\nSaved progress at row {i + 1}")

    pbar.close()

    # Final save
    df.to_csv(output_csv_file, index=False)
    print(f"Final result saved to {output_csv_file}")


### Load the CSV file with the HN AI comment

In [None]:
df = pd.read_csv(output_csv_file)

# Add columns if missing
if 'senti_comment_prompt2_1shot_gpt' not in df.columns:
    df['senti_comment_prompt2_1shot_gpt'] = None
if 'reason_comment_prompt2_1shot_gpt' not in df.columns:
    df['reason_comment_prompt2_1shot_gpt'] = None

# Process titles
process_titles(df, output_csv_file)

print(f'Sentiment analysis completed and saved to {output_csv_file}')

# Evaluation

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

In [None]:
df = pd.read_csv(output_csv_file)

In [None]:
print(f"Accuracy: {accuracy_score(df['comment_consensus'], df['senti_comment_prompt2_1shot_gpt'])}")
print(f"micro f1: {f1_score(df['comment_consensus'], df['senti_comment_prompt2_1shot_gpt'], average='micro')}")
print(f"macro f1: {f1_score(df['comment_consensus'], df['senti_comment_prompt2_1shot_gpt'], average='macro')}")
print(f"weighted f1: {f1_score(df['comment_consensus'], df['senti_comment_prompt2_1shot_gpt'], average='weighted')}")

Accuracy: 0.7610389610389611
micro f1: 0.7610389610389611
macro f1: 0.7317184802508274
weighted f1: 0.7632091050900491
