Sentiment Analysis by PyABSA

In [1]:
# Import libraries
import pandas as pd  # Data manipulation and saving results to CSV
import sys
sys.path.append('../src')
# Import feature engineering functions
from preprocess import clean_text

In [2]:
# load the dataset
df = pd.read_csv('../data/reddit_raw.csv')
df.head()

Unnamed: 0,subreddit,title,text,url,created,keyword,score
0,ukraine,Volunteering in civilian roles,"Hi,\n\nI’m an American. When the war broke out...",https://www.reddit.com/r/ukraine/comments/1m3v...,2025-07-19 14:54:55,refugee,62
1,ukraine,<3,As a Polish person I just came here to tell yo...,https://www.reddit.com/r/ukraine/comments/1lxf...,2025-07-11 21:03:10,refugee,544
2,ukraine,The Angry Ukrainian Syndrome: Injustice and St...,I found this useful for understanding my own b...,https://www.reddit.com/r/ukraine/comments/1lio...,2025-06-23 20:38:06,refugee,230
3,ukraine,I wrote this letter to my representatives in c...,**Find your representatives here:** [**https:/...,https://www.reddit.com/r/ukraine/comments/1lfq...,2025-06-20 02:45:54,refugee,125
4,ukraine,Looking forward - would love to hear ppl's tho...,"Sorry for the vague question, but I only have ...",https://www.reddit.com/r/ukraine/comments/1kv8...,2025-05-25 19:56:51,refugee,13


In [3]:
# Preprocess the text data
df['text'] = df['text'].apply(clean_text)
df['title'] = df['title'].apply(clean_text)
df.head()

Unnamed: 0,subreddit,title,text,url,created,keyword,score
0,ukraine,Volunteering in civilian roles,"Hi, I’m an American. When the war broke out, I...",https://www.reddit.com/r/ukraine/comments/1m3v...,2025-07-19 14:54:55,refugee,62
1,ukraine,<3,As a Polish person I just came here to tell yo...,https://www.reddit.com/r/ukraine/comments/1lxf...,2025-07-11 21:03:10,refugee,544
2,ukraine,The Angry Ukrainian Syndrome: Injustice and St...,I found this useful for understanding my own b...,https://www.reddit.com/r/ukraine/comments/1lio...,2025-06-23 20:38:06,refugee,230
3,ukraine,I wrote this letter to my representatives in c...,Find your representatives here: Subject: Urgen...,https://www.reddit.com/r/ukraine/comments/1lfq...,2025-06-20 02:45:54,refugee,125
4,ukraine,Looking forward - would love to hear ppl's tho...,"Sorry for the vague question, but I only have ...",https://www.reddit.com/r/ukraine/comments/1kv8...,2025-05-25 19:56:51,refugee,13


Supervised Sentiment Classifier with PyABSA

In [None]:
from datasets import load_dataset
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
from transformers import Trainer, TrainingArguments
from transformers import DataCollatorWithPadding
import torch

# 1. Load a dataset
dataset = load_dataset("tweet_eval", "sentiment")

# 2. Tokenize
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_function(example):
    return tokenizer(example["text"], truncation=True, padding =True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# 3. Load model
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

# 4. Train
training_args = TrainingArguments(
    output_dir="./results",
    eval_steps=500,  # evaluate every 500 steps
    eval_strategy="steps",
    num_train_epochs=3,
    per_device_train_batch_size=16,
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    data_collator=data_collator
)

trainer.train()


Map: 100%|██████████| 12284/12284 [00:00<00:00, 47047.61 examples/s]


Step,Training Loss,Validation Loss
500,0.2721,0.434729


In [None]:
sentiment_pipe = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
sentiment_pipe(["I found this Reddit post fantastically discouraging!"])

In [None]:
# Example: sentiment prediction on your Reddit titles or texts
#df['predicted_sentiment'] = df['text'].apply(lambda x: sentiment_pipe(x)[0]['label'])
#df['text','predicted_sentiment'].head()
# Save the results to a CSV file
#df[['title','text','subreddit', 'created', 'keyword', 'score', 'sentiment_pyabsa']].to_csv('../data/reddit_sentiment_predicted.csv', index=False)  
