In [None]:
import ast
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datasets import load_dataset

sns.set(style="whitegrid")

# -----------------------------------------
# 1. Load dataset
# -----------------------------------------
ds = load_dataset("NEUDM/mams")
train = ds["train"].to_pandas()

# -----------------------------------------
# 2. Extract aspect list from output column
#    output = "[['food','positive'], ['service','negative']]"
# -----------------------------------------

def extract_aspects(output_str):
    try:
        parsed = ast.literal_eval(output_str)
        if isinstance(parsed, list):
            return [p[0] for p in parsed]   # extract aspect term
    except:
        return []
    return []

train["aspect_list"] = train["output"].apply(extract_aspects)

# -----------------------------------------
# 3. Count number of aspects per row
# -----------------------------------------
train["aspect_count"] = train["aspect_list"].apply(len)

# -----------------------------------------
# 4. Show distribution of aspect counts
# -----------------------------------------
print("=== Aspect Count Distribution ===")
print(train["aspect_count"].value_counts().sort_index())

# -----------------------------------------
# 5. Plot bar graph
# -----------------------------------------
plt.figure(figsize=(6,4))
sns.countplot(x=train["aspect_count"], palette="viridis")
plt.title("Number of Aspects per Row (MAMS Train Set)")
plt.xlabel("Aspect Count per Sentence")
plt.ylabel("Number of Rows")
plt.tight_layout()
plt.show()

In [None]:
from datasets import load_dataset
import pandas as pd
import ast
import matplotlib.pyplot as plt
import seaborn as sns

import warnings 
warnings.filterwarnings("ignore")
sns.set(style="whitegrid")

# ------------------------------------------------
# 1. Load dataset
# ------------------------------------------------
ds = load_dataset("NEUDM/mams")
train = ds["train"].to_pandas()

# ------------------------------------------------
# 2. Extract polarities from output column
#    output = "[['food','positive'], ['service','negative']]"
# ------------------------------------------------

def extract_polarities(output_str):
    try:
        parsed = ast.literal_eval(output_str)
        if isinstance(parsed, list):
            return [p[1] for p in parsed]   # take polarity from [aspect, polarity]
    except:
        return []
    return []

train["polarity_list"] = train["output"].apply(extract_polarities)

# ------------------------------------------------
# 3. Flatten into one big list of all polarities
# ------------------------------------------------
all_polarities = []

for plist in train["polarity_list"]:
    all_polarities.extend(plist)

# Convert to DataFrame for counting
pol_df = pd.DataFrame({"polarity": all_polarities})

# ------------------------------------------------
# 4. Count and plot
# ------------------------------------------------
counts = pol_df["polarity"].value_counts()
percent = pol_df["polarity"].value_counts(normalize=True) * 100

print("=== Polarity Counts (Aspect-level) ===")
print(counts)
print("\n=== Polarity Percent (%) ===")
print(percent.round(2))

# Bar plot
plt.figure(figsize=(6,4))
sns.countplot(x=pol_df["polarity"], palette="viridis")
plt.title("MAMS Training Set â€“ Polarity Distribution (Aspect-level)")
plt.xlabel("Polarity")
plt.ylabel("Count")
plt.tight_layout()
plt.show()