In [None]:
import pandas as pd
import json
import os
import openai
from dotenv import load_dotenv
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score

In [46]:
load_dotenv()

key = os.getenv("OPENAI_API_KEY")
openai.api_key = key


In [62]:
def role_based_knowledge_generation():

    prompt = (
        "You are an expert in public health sentiment analysis. Explain what sentiment means "
        "and list common linguistic cues in vaccine and mask hesitancy discourse (e.g., sarcasm, "
        "negative adjectives, strong expressions like 'hate' or 'disgust'). Provide examples."
    )
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )
    
    return response.choices[0].message.content

def sentiment_detection(sentence):

    prompt = f"Does the following sentence express a sentiment regarding vaccines or masks? Answer Yes or No.\nSentence: \"{sentence}\""
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )
    return response.choices[0].message.content

def sentiment_classification(sentence):

    prompt = f"Classify the sentiment of the following sentence as Positive, Negative, or Neutral:\n\"{sentence}\""
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )
    return response.choices[0].message.content

def key_phrase_extraction(sentence):

    prompt = f"Identify the key phrases or words that indicate the sentiment in the sentence \"{sentence}\" and briefly explain why."
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )
    return response.choices[0].message.content

def sentiment_summary(sentence):

    prompt = f"Generate a concise summary of the sentiment expressed in the sentence \"{sentence}\""
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.0
    )
    return response.choices[0].message.content

def rbic_sentiment_pipeline(sentence):

    knowledge = role_based_knowledge_generation()
    detection = sentiment_detection(sentence)
    classification = sentiment_classification(sentence)
    key_phrases = key_phrase_extraction(sentence)
    summary = sentiment_summary(sentence)
    

    label_map = {
    "Positive": 1,
    "Neutral": 0,
    "Negative": -1
        }

    # Convert classification string to numeric label
    numeric_label = label_map.get(classification.strip(), None)    

    output = {
        "knowledge_base": knowledge.strip(),
        "sentiment_detected": detection.strip(),
        "sentiment_label": numeric_label,
        "key_phrases": key_phrases.strip(),
        "summary": summary.strip()
    }
    return output

In [None]:
if __name__ == "__main__":
    
    df = pd.read_csv("posts_to_label.csv")  # Adjust filename as needed

    df = df[df['selftext'].notnull()]
    
    
    true_labels = df['manual_label'].astype(int)
    predicted_labels = df['predicted_sentiment'].astype(int)

    
    print("Classification Report:\n")
    print(classification_report(true_labels, predicted_labels, labels=[-1, 0, 1], target_names=["Negative", "Neutral", "Positive"]))

    
    f1 = f1_score(true_labels, predicted_labels, average='weighted')
    print(f"Weighted F1 Score: {f1:.4f}")
    

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer