In [1]:
# --- 1. Setup and Imports ---
import pandas as pd
import numpy as np
import time
import random
import os
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve, average_precision_score, accuracy_score
from sklearn.preprocessing import label_binarize

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from textblob import TextBlob
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\lingyi\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [2]:
# --- 2. Load 1000 Labeled Evaluation Data ---
print("Loading manually labeled 1000-row dataset...")
base_path = os.path.abspath(os.path.join(os.getcwd(), '..'))
data_path = os.path.join(base_path, "data")

# Load labeled polarity & subjectivity
polarity_file = os.path.join(data_path, "1000_GT_polarity_labels.csv")
subjectivity_file = os.path.join(data_path, "1000_GT_subj_labels.csv")

df_polarity = pd.read_csv(polarity_file)
df_subjectivity = pd.read_csv(subjectivity_file)

# Clean column names
pol_cols = [col.strip().replace(" ", "_").replace(".", "") for col in df_polarity.columns]
subj_cols = [col.strip().replace(" ", "_").replace(".", "") for col in df_subjectivity.columns]
df_polarity.columns = pol_cols
df_subjectivity.columns = subj_cols

# Merge both by cleaned_text
df_eval = pd.merge(
    df_polarity[["cleaned_text", "GT_Polarity_Label"]],
    df_subjectivity[["cleaned_text", "GT_Subjective_Label"]],
    on="cleaned_text", how="inner"
)
print(f"Evaluation dataset ready: {df_eval.shape[0]} rows")

Loading manually labeled 1000-row dataset...
Evaluation dataset ready: 1092 rows


In [3]:
# --- Performance Benchmarking for Q4f ---
print("\n=== Training Time and Inference Benchmarks ===")
benchmark_results = []



=== Training Time and Inference Benchmarks ===


In [4]:
# --- 3. Subjectivity Classification (Random Forest & Naive Bayes) ---
print("\n=== Subjectivity Classification ===")
X_subj = df_eval["cleaned_text"]
y_subj = df_eval["GT_Subjective_Label"]
X_train_subj, X_test_subj, y_train_subj, y_test_subj = train_test_split(X_subj, y_subj, test_size=0.2, random_state=42)

# Random Forest Subjectivity
pipeline_subj_rf = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('clf', RandomForestClassifier(random_state=42))
])
t0 = time.time()
pipeline_subj_rf.fit(X_train_subj, y_train_subj)
t1 = time.time()
y_pred_subj_rf = pipeline_subj_rf.predict(X_test_subj)
t2 = time.time()
benchmark_results.append(["Random Forest (Subjective)", round(t1 - t0, 2), round(t2 - t1, 2), accuracy_score(y_test_subj, y_pred_subj_rf)])

print("Random Forest Subjectivity Report:")
print(classification_report(y_test_subj, y_pred_subj_rf, target_names=["Objective", "Subjective"]))

# Naive Bayes Subjectivity
pipeline_subj_nb = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('clf', MultinomialNB())
])
t0 = time.time()
pipeline_subj_nb.fit(X_train_subj, y_train_subj)
t1 = time.time()
y_pred_subj_nb = pipeline_subj_nb.predict(X_test_subj)
t2 = time.time()
benchmark_results.append(["Naive Bayes (Subjective)", round(t1 - t0, 2), round(t2 - t1, 2), accuracy_score(y_test_subj, y_pred_subj_nb)])

print("Naive Bayes Subjectivity Report:")
print(classification_report(y_test_subj, y_pred_subj_nb, target_names=["Objective", "Subjective"]))


=== Subjectivity Classification ===
Random Forest Subjectivity Report:
              precision    recall  f1-score   support

   Objective       0.78      0.37      0.50       102
  Subjective       0.62      0.91      0.74       117

    accuracy                           0.66       219
   macro avg       0.70      0.64      0.62       219
weighted avg       0.69      0.66      0.63       219

Naive Bayes Subjectivity Report:
              precision    recall  f1-score   support

   Objective       0.76      0.38      0.51       102
  Subjective       0.62      0.90      0.74       117

    accuracy                           0.66       219
   macro avg       0.69      0.64      0.62       219
weighted avg       0.69      0.66      0.63       219



In [5]:

# --- 4. Polarity Classification (Random Forest & Naive Bayes) ---
print("\n=== Polarity Classification ===")
X_pol = df_eval["cleaned_text"]
y_pol = df_eval["GT_Polarity_Label"]
X_train_pol, X_test_pol, y_train_pol, y_test_pol = train_test_split(X_pol, y_pol, test_size=0.2, stratify=y_pol, random_state=42)

# Naive Bayes Polarity
pipeline_nb = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('clf', MultinomialNB())
])
t0 = time.time()
pipeline_nb.fit(X_train_pol, y_train_pol)
t1 = time.time()
y_pred_nb = pipeline_nb.predict(X_test_pol)
t2 = time.time()
benchmark_results.append(["Naive Bayes (Polarity)", round(t1 - t0, 2), round(t2 - t1, 2), accuracy_score(y_test_pol, y_pred_nb)])

# Random Forest Polarity
pipeline_rf = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('clf', RandomForestClassifier(random_state=42))
])
t0 = time.time()
pipeline_rf.fit(X_train_pol, y_train_pol)
t1 = time.time()
y_pred_rf = pipeline_rf.predict(X_test_pol)
t2 = time.time()
benchmark_results.append(["Random Forest (Polarity)", round(t1 - t0, 2), round(t2 - t1, 2), accuracy_score(y_test_pol, y_pred_rf)])

# VADER
sia = SentimentIntensityAnalyzer()
t0 = time.time()
def vader_predict(text):
    score = sia.polarity_scores(text)["compound"]
    return 1 if score >= 0.05 else 0 if score <= -0.05 else 2
y_pred_vader = df_eval["cleaned_text"].apply(vader_predict)
t1 = time.time()
benchmark_results.append(["VADER", 0.00, round(t1 - t0, 2), accuracy_score(y_pol, y_pred_vader)])

# TextBlob
from textblob import TextBlob
t0 = time.time()
def textblob_predict(text):
    polarity = TextBlob(text).sentiment.polarity
    return 1 if polarity > 0.1 else 0 if polarity < -0.1 else 2
y_pred_blob = df_eval["cleaned_text"].apply(textblob_predict)
t1 = time.time()
benchmark_results.append(["TextBlob", 0.00, round(t1 - t0, 2), accuracy_score(y_pol, y_pred_blob)])



=== Polarity Classification ===


In [6]:

# --- Summary Table ---
benchmark_df = pd.DataFrame(benchmark_results, columns=["Model", "Train Time (s)", "Inference Time (s)", "Accuracy"])
print("\n=== Performance Benchmark Table (Q4f) ===")
print(benchmark_df.to_string(index=False))



=== Performance Benchmark Table (Q4f) ===
                     Model  Train Time (s)  Inference Time (s)  Accuracy
Random Forest (Subjective)            1.00                0.02  0.657534
  Naive Bayes (Subjective)            0.02                0.01  0.657534
    Naive Bayes (Polarity)            0.03                0.01  0.561644
  Random Forest (Polarity)            1.14                0.02  0.589041
                     VADER            0.00                0.27  0.542125
                  TextBlob            0.00                0.28  0.489011
