In [None]:
%pip install transformers

In [None]:
# Load classifier using roBERTa

from transformers import pipeline
from google.colab import drive

drive.mount('/content/drive')

model_path = "/content/drive/My Drive/RedditSentimentAnalysis/my_emotion_model"

emotion_classifier = pipeline(
    "text-classification",
    model=model_path,
    tokenizer=model_path,
    truncation=True,
    max_length=512,
    topk=None
)

In [None]:
import kagglehub
import pandas as pd
import os

print("Downloading dataset...")
path = kagglehub.dataset_download("neelgajare/liberals-vs-conservatives-on-reddit-13000-posts")

csv_path = os.path.join(path, "file_name.csv")
df = pd.read_csv(csv_path)

In [None]:
# Gets list of sentiments that are above threshold

def get_sentiment(row_results):
  top_sentiments = []
  signal_threshold = 0.10

  # List of dictionary {'label', 'score'}
  for sentiment in row_results:

    # Avoids Neutral as highest sentiment
    if sentiment['label'] == 'neutral':
        continue

    elif sentiment['score'] >= signal_threshold:
        top_sentiments.append(sentiment['label'])

    else:
        break

  # If no sentiments met the threshold, return neutral
  if top_sentiments:
        return top_sentiments
  else:
      return ['neutral']


def add_sentiment(df):
  print("Filling empty strings in NAN data...")
  df['Title'] = df['Title'].fillna("")
  df['Text'] = df['Text'].fillna("")

  print("Preparing Title and Text inputs...")
  texts = (df["Title"] + " " + df["Text"]).tolist()

  print(f"Running classifier on {len(texts)} items...")
  raw_results = emotion_classifier(texts, batch_size=32, top_k=None)

  print("Cleaning results...")
  clean_sentiments = [get_sentiment(r) for r in raw_results]

  df['Sentiment'] = clean_sentiments


In [None]:
# Add sentiment column to Liberal vs Conservative Dataset

add_sentiment(df)
df.to_csv("sentiment_results.csv", index=False)

In [None]:
GO_EMOTIONS_LABELS = [
    "admiration", "amusement", "anger", "annoyance", "approval", "caring",
    "confusion", "curiosity", "desire", "disappointment", "disapproval",
    "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief",
    "joy", "love", "nervousness", "optimism", "pride", "realization",
    "relief", "remorse", "sadness", "surprise", "neutral"
]

label_map = {f"LABEL_{i}": label for i, label in enumerate(GO_EMOTIONS_LABELS)}


df_exploded = df.explode('Sentiment')
df_exploded['Sentiment_Name'] = df_exploded['Sentiment'].map(label_map)
df_exploded['Sentiment_Name'].value_counts().plot(kind='bar', figsize=(12, 6), title="Reddit Sentiment Analysis")