In [2]:
import pandas as pd
import json
import os
import openai
from dotenv import load_dotenv
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score

In [3]:
load_dotenv()

key = os.getenv("OPENAI_API_KEY")

openai.api_key = key


In [6]:
def role_based_knowledge_generation():

    prompt = (
        "You are an expert in public health sentiment analysis. Explain what sentiment means "
        "and list common linguistic cues in vaccine and mask hesitancy discourse (e.g., sarcasm, "
        "negative adjectives, strong expressions like 'hate' or 'disgust'). Provide examples."
    )
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )
    
    return response.choices[0].message.content

def sentiment_detection(sentence):

    prompt = f"Does the following sentence express a sentiment regarding vaccines or masks? Answer Yes or No.\nSentence: \"{sentence}\""
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )
    return response.choices[0].message.content

def sentiment_classification(sentence):

    prompt = f"Classify the sentiment of the following sentence as Positive, Negative, or Neutral:\n\"{sentence}\""
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )
    return response.choices[0].message.content

def key_phrase_extraction(sentence):

    prompt = f"Identify the key phrases or words that indicate the sentiment in the sentence \"{sentence}\" and briefly explain why."
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )
    return response.choices[0].message.content

def sentiment_summary(sentence):

    prompt = f"Generate a concise summary of the sentiment expressed in the sentence \"{sentence}\""
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )
    return response.choices[0].message.content

def rbic_sentiment_pipeline(sentence):

    knowledge = role_based_knowledge_generation()
    detection = sentiment_detection(sentence)
    key_phrases = key_phrase_extraction(sentence)
    classification = sentiment_classification(sentence)
    summary = sentiment_summary(sentence)
    

    label_map = {
    "Positive": 1,
    "Neutral": 0,
    "Negative": -1
        }

    # Convert classification string to numeric label
    numeric_label = label_map.get(classification.strip(), None)    

    output = {
        "knowledge_base": knowledge.strip(),
        "sentiment_detected": detection.strip(),
        "sentiment_label": numeric_label,
        "key_phrases": key_phrases.strip(),
        "summary": summary.strip()
    }
    return output

In [None]:

df = pd.read_csv("annotated_data_set.csv")
df = df[df['selftext'].notnull()]

def get_rbic_prediction(text):
    try:
        result = rbic_sentiment_pipeline(text)
        return result['sentiment_label']  # Only get the numeric label
    except Exception as e:
        print(f"Error processing text: {text[:30]}... -> {e}")
        return None

print("Running RBIC sentiment pipeline...")
df['predicted_label'] = df['selftext'].apply(get_rbic_prediction)

# Filter out rows where sentiment extraction failed (null predicted labels)
df = df[df['predicted_label'].notnull()]

# Save the results to a new CSV file while maintaining original order
df.to_csv("posts_with_sentiment.csv", index=False)
print("Sentiment scores saved to posts_with_sentiment.csv")


Running RBIC sentiment pipeline...
Sentiment scores saved to posts_with_sentiment.csv


In [11]:
# Load the CSV file with sentiment scores
df = pd.read_csv("posts_with_sentiment.csv")

# Ensure that manual_label is of integer type
df['manual_label'] = df['manual_label'].fillna(0).astype(int)


# Print classification report for the three classes (Negative: -1, Neutral: 0, Positive: 1)
print("\nClassification Report:")
print(classification_report(df['manual_label'], df['predicted_label'], labels=[-1, 0, 1], target_names=['Negative', 'Neutral', 'Positive']))

# Calculate and print the weighted F1 score
f1 = f1_score(df['manual_label'], df['predicted_label'], average='weighted')
print(f"\nWeighted F1 Score: {f1:.4f}")





Classification Report:
              precision    recall  f1-score   support

    Negative       0.50      0.80      0.62         5
     Neutral       0.56      1.00      0.72         9
    Positive       0.50      0.08      0.14        12

    accuracy                           0.54        26
   macro avg       0.52      0.63      0.49        26
weighted avg       0.52      0.54      0.43        26


Weighted F1 Score: 0.4335
