In [5]:
import os

try:
    # Running as normal Python script inside src/
    this_file = os.path.abspath(__file__)
    src_root = os.path.dirname(this_file)                        # EMOTION-PRED/src
    project_root = os.path.dirname(src_root)                    # EMOTION-PRED/
except NameError:
    # Running inside Jupyter (likely src/notebooks or src/)
    cwd = os.getcwd()

    # If running inside src/notebooks â†’ go up one level
    if cwd.endswith("notebooks"):
        src_root = os.path.abspath(os.path.join(cwd, ".."))
        project_root = os.path.dirname(src_root)
    else:
        # Running from project root directly
        project_root = cwd
        src_root = os.path.join(project_root, "src")

# Final unified paths
results_root = os.path.join(src_root, "results")
data_root = os.path.join(src_root, "data","MAMS-ACSA","raw","data_jsonl")
print(f"ðŸ“‚ Project root: {project_root}"
      f"\nðŸ“‚ Source root: {src_root}"
      f"\nðŸ“‚ Results root: {results_root}"
      f"\nðŸ“‚ Data root: {data_root}")
# 3 â€” JSONL files
TRAIN_JSONL = os.path.join(data_root, "train.jsonl")
VAL_JSONL   = os.path.join(data_root, "val.jsonl")
TEST_JSONL  = os.path.join(data_root, "test.jsonl")
SAMPLE_JSONL = os.path.join(data_root, "sample.jsonl")
print("Using dataset directory:", data_root)



ðŸ“‚ Project root: /Users/hd/Desktop/EMOTION-PRED
ðŸ“‚ Source root: /Users/hd/Desktop/EMOTION-PRED/src
ðŸ“‚ Results root: /Users/hd/Desktop/EMOTION-PRED/src/results
ðŸ“‚ Data root: /Users/hd/Desktop/EMOTION-PRED/src/data/MAMS-ACSA/raw/data_jsonl
Using dataset directory: /Users/hd/Desktop/EMOTION-PRED/src/data/MAMS-ACSA/raw/data_jsonl


In [6]:
import json
import os
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, f1_score

# -----------------------------
# LOAD DATA (triple-level)
# -----------------------------
PATH = os.path.join(data_root, "cleaned_300.jsonl")

rows = [json.loads(l) for l in open(PATH, "r", encoding="utf-8")]

# Convert each triple to ONE training sample
texts = []
labels = []

for row in rows:
    review = row["input"]
    for t in row["output"]:
        aspect = t["aspect"]
        polarity = t["polarity"]
        emotion = t["emotion"]

        # Create a composite text feature
        text = f"{review} [ASPECT={aspect}] [POLARITY={polarity}]"
        
        texts.append(text)
        labels.append(emotion)

# -----------------------------
# BUILD MODEL PIPELINE
# -----------------------------
model = Pipeline([
    ("tfidf", TfidfVectorizer(
        max_features=5000,
        ngram_range=(1,2),
        stop_words="english"
    )),
    ("clf", LogisticRegression(max_iter=200))
])

# -----------------------------
# TRAIN / TEST SPLIT
# -----------------------------
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =  train_test_split(texts, labels, test_size=0.2, random_state=42)


# -----------------------------
# TRAIN MODEL
# -----------------------------
model.fit(X_train, y_train)

# -----------------------------
# EVALUATE
# -----------------------------
pred = model.predict(X_test)

print("\n========================")
print("DUMMY BASELINE REPORT")
print("========================\n")
print(classification_report(y_test, pred, zero_division=0))

macro = f1_score(y_test, pred, average="macro")
micro = f1_score(y_test, pred, average="micro")

print("Macro-F1:", round(macro, 4))
print("Micro-F1:", round(micro, 4))


DUMMY BASELINE REPORT

                precision    recall  f1-score   support

    Admiration       0.40      0.17      0.24        12
     Annoyance       0.32      0.39      0.35        18
      Approval       0.33      0.09      0.14        11
     Confusion       0.00      0.00      0.00         4
Disappointment       0.14      0.09      0.11        11
   Disapproval       0.00      0.00      0.00         4
   Frustration       0.00      0.00      0.00         4
     Gratitude       0.00      0.00      0.00         2
     Impressed       0.00      0.00      0.00         2
   Indifferent       0.52      1.00      0.68        44
           Joy       0.00      0.00      0.00         4
        Relief       0.00      0.00      0.00         1
  Satisfaction       0.00      0.00      0.00         4
      Surprise       0.00      0.00      0.00         2

      accuracy                           0.45       123
     macro avg       0.12      0.12      0.11       123
  weighted avg       0