Lets use MAMS dataset

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import os

try:
    # Running as normal Python script inside src/
    this_file = os.path.abspath(__file__)
    src_root = os.path.dirname(this_file)                        # EMOTION-PRED/src
    project_root = os.path.dirname(src_root)                    # EMOTION-PRED/
except NameError:
    # Running inside Jupyter (likely src/notebooks or src/)
    cwd = os.getcwd()

    # If running inside src/notebooks â†’ go up one level
    if cwd.endswith("notebooks"):
        src_root = os.path.abspath(os.path.join(cwd, ".."))
        project_root = os.path.dirname(src_root)
    else:
        # Running from project root directly
        project_root = cwd
        src_root = os.path.join(project_root, "src")

# Final unified paths
results_root = os.path.join(src_root, "results")
data_root = os.path.join(src_root, "data")
print(f"ðŸ“‚ Project root: {project_root}"
      f"\nðŸ“‚ Source root: {src_root}"
      f"\nðŸ“‚ Results root: {results_root}"
      f"\nðŸ“‚ Data root: {data_root}")

ðŸ“‚ Project root: /Users/hd/Desktop/EMOTION-PRED
ðŸ“‚ Source root: /Users/hd/Desktop/EMOTION-PRED/src
ðŸ“‚ Results root: /Users/hd/Desktop/EMOTION-PRED/src/results
ðŸ“‚ Data root: /Users/hd/Desktop/EMOTION-PRED/src/data


In [3]:
from datasets import load_dataset

# load the dataset
ds = load_dataset("NEUDM/mams")

# print dataset info
print(ds)

Repo card metadata block was not found. Setting CardData to empty.


DatasetDict({
    train: Dataset({
        features: ['task_type', 'dataset', 'input', 'output', 'situation', 'label', 'extra', 'instruction'],
        num_rows: 7446
    })
    validation: Dataset({
        features: ['task_type', 'dataset', 'input', 'output', 'situation', 'label', 'extra', 'instruction'],
        num_rows: 900
    })
    test: Dataset({
        features: ['task_type', 'dataset', 'input', 'output', 'situation', 'label', 'extra', 'instruction'],
        num_rows: 900
    })
})


In [4]:
train_ds = ds["train"]
train_ds = train_ds.to_pandas()


# inspect row and column count
print(f"Number of rows: {train_ds.shape[0]}")
print(f"Number of columns: {train_ds.shape[1]}")

# inspect column names
print("Column names:", train_ds.columns.tolist())

Number of rows: 7446
Number of columns: 8
Column names: ['task_type', 'dataset', 'input', 'output', 'situation', 'label', 'extra', 'instruction']


In [5]:
# Columns to keep
keep_cols = ["input", "output"]
clean_df = train_ds[keep_cols].copy()


In [6]:
import ast
import pandas as pd

# Take 50 rows
df_head = clean_df["input"].head(50).to_frame()

# 1) Convert "['text']" â†’ "text"
def clean_list_string(x):
    try:
        parsed = ast.literal_eval(x)   # safely parse list-string
        if isinstance(parsed, list) and len(parsed) > 0:
            return parsed[0]
        return x
    except:
        return x

df_head["sentence"] = df_head["input"].apply(clean_list_string)

# 2) Add empty aspect column (required by pipeline)
df_head["aspect_term"] = ""

# 3) Save in the correct 2-column format
input_csv = os.path.join(data_root, "temp_50.csv")
df_head[["sentence", "aspect_term"]].to_csv(input_csv, index=False)


In [7]:
import sys, os

# Add src/ to Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [8]:
from emotion_pipeline import run_full_emotion_pipeline

run_full_emotion_pipeline(
    input_csv=input_csv,
    dataset_name="sample50",
    results_root=results_root,
)

Project root: /Users/hd/Desktop/EMOTION-PRED
Source root: /Users/hd/Desktop/EMOTION-PRED/src
Results root: /Users/hd/Desktop/EMOTION-PRED/src/results

Starting full emotion pipeline

Saving outputs to: /Users/hd/Desktop/EMOTION-PRED/src/results/emotion_sample50

Annotating with: j-hartmann/emotion-english-distilroberta-base
  Model type: roberta | arch=['RobertaForSequenceClassification']
   Saved â†’ /Users/hd/Desktop/EMOTION-PRED/src/results/emotion_sample50/j_hartmann_emotion_english_distilroberta_base_annotated.csv

Annotating with: j-hartmann/emotion-english-roberta-large
  Model type: roberta | arch=['RobertaForSequenceClassification']
   Saved â†’ /Users/hd/Desktop/EMOTION-PRED/src/results/emotion_sample50/j_hartmann_emotion_english_roberta_large_annotated.csv

Annotating with: nateraw/bert-base-uncased-emotion
  Model type: bert | arch=['BertForSequenceClassification']
   Saved â†’ /Users/hd/Desktop/EMOTION-PRED/src/results/emotion_sample50/nateraw_bert_base_uncased_emotion_ann

In [9]:
import os
import pandas as pd

RESULT_DIR = os.path.join(results_root, "emotion_sample50")

csv_files = [f for f in os.listdir(RESULT_DIR) if f.endswith(".csv")]

MODEL_NAME_MAP = {
    "j_hartmann_emotion_english_roberta_large": "roberta_large",
    "nateraw_bert_base_uncased_emotion": "bert_base_emotion",
    "j_hartmann_emotion_english_distilroberta_base": "roberta_emotion",
    "joeddav_distilbert_base_uncased_go_emotions_student": "go_emotions",
    "cardiffnlp_twitter_roberta_base_emotion": "tweet_eval",
    "mrm8488_t5_base_finetuned_emotion": "t5_emotion",
    "SamLowe_roberta_base_go_emotions": "go_emotions_roberta",
}

merged = pd.DataFrame()

for f in csv_files:
    full_path = os.path.join(RESULT_DIR, f)
    df = pd.read_csv(full_path)

    base = f.replace("_annotated.csv", "")
    colname = MODEL_NAME_MAP.get(base, base)   # fallback = raw name if missing

    merged[colname] = df["emotion_auto"]

pd.set_option("display.max_colwidth", None)
print(merged.head(20))

   go_emotions_roberta roberta_large bert_base_emotion roberta_emotion  \
0              neutral       neutral               joy         neutral   
1              neutral       neutral               joy         neutral   
2              neutral         anger             anger         neutral   
3              neutral       neutral               joy         neutral   
4              neutral         anger             anger         neutral   
5              neutral       neutral               joy         neutral   
6              neutral      surprise               joy        surprise   
7              neutral       neutral               joy         neutral   
8              neutral         anger             anger            fear   
9              neutral       neutral             anger         neutral   
10             neutral       neutral              love         neutral   
11             neutral       neutral             anger         neutral   
12             neutral       neutral  