In [None]:
%pip install transformers

In [None]:
# Load classifier using roBERTa

from transformers import pipeline

classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None, truncation=True)

In [None]:
import kagglehub
import pandas as pd
import os

print("Downloading dataset...")
path = kagglehub.dataset_download("neelgajare/liberals-vs-conservatives-on-reddit-13000-posts")

csv_path = os.path.join(path, "file_name.csv")
df = pd.read_csv(csv_path)

In [None]:
# Gets list of sentiments that are above threshold

def get_sentiment(row_results):
  top_sentiments = []
  signal_threshold = 0.10

  # List of dictionary {'label', 'score'}
  for sentiment in row_results:

    # Avoids Neutral as highest sentiment
    if sentiment['label'] == 'neutral':
        continue

    elif sentiment['score'] >= signal_threshold:
        top_sentiments.append(sentiment['label'])

    else:
        break

  # If no sentiments met the threshold, return neutral
  if top_sentiments:
        return top_sentiments
  else:
      return ['neutral']


def add_sentiment(df):
  print("Filling empty strings in NAN data...")
  df['Title'] = df['Title'].fillna("")
  df['Text'] = df['Text'].fillna("")

  print("Preparing Title and Text inputs...")
  texts = (df["Title"] + " " + df["Text"]).tolist()

  print(f"Running classifier on {len(texts)} items...")
  raw_results = classifier(texts)

  print("Cleaning results...")
  clean_sentiments = [get_sentiment(r) for r in raw_results]

  df['Sentiment'] = clean_sentiments


In [None]:
# Add sentiment column to Liberal vs Conservative Dataset

add_sentiment(df)
df.to_csv("sentiment_results.csv", index=False)

In [None]:
df.explode('Sentiment')['Sentiment'].value_counts().plot(kind='bar')