In [1]:
import os
import csv
import hashlib
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from sklearn.metrics import average_precision_score

from gest.data.gest import GEST
from gest.service.evaluation.graph_matching.graph import GESTGraph
from gest.service.evaluation.graph_matching.similarity import (
    SimilarityService,
    SimilarityEngine,
)
from gest.service.evaluation.graph_matching.solver import SolverType
from gest.service.evaluation.graph_matching.embedding_type_enum import EmbeddingType
from gest.service.other.text_similarity.text_similarity_evaluator import (
    TextSimilarityEvaluator,
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
RESULTS_CSV_PATH = "/workspaces/GEST/notebooks/data/synthetic-val-1_vs_synthetic-val-2_evaluation_graph_matching.csv"
NEG_RESULTS_CSV_PATH = "/workspaces/GEST/notebooks/data/synthetic-val-1_vs_synthetic-val-2_evaluation_graph_matching_negatives.csv"
TEXT_SIMILARITY_CSV_PATH = "/workspaces/GEST/notebooks/data/synthetic-val-1_vs_synthetic-val-2_evaluation_text_similarity.csv"

REQUIRED_COLUMNS = {"dataset", "id", "text", "gest"}
EVAL_NEG_PER_POS = 4

In [3]:
synthetic = pd.read_csv("/workspaces/GEST/data/gest.csv")

In [4]:
def ensure_required_columns(df: pd.DataFrame, name: str, required_columns: set):
    missing = required_columns - set(df.columns)
    if missing:
        raise ValueError(f"{name} is missing required columns: {sorted(missing)}")


def ensure_duplicated_pairs(df: pd.DataFrame, name: str) -> pd.DataFrame:
    dup_mask = df.duplicated(["dataset", "id"], keep=False)
    if not dup_mask.any():
        raise ValueError(f"{name} has no duplicated (dataset, id) pairs.")
    dups = df.loc[dup_mask, ["dataset", "id", "text", "gest"]].copy()

    # Text uniqueness summary
    per_key = dups.groupby(["dataset", "id"])["text"].nunique()
    same_text_keys = (per_key == 1).sum()
    different_text_keys = (per_key > 1).sum()
    print(
        f"{name}: {len(dups)} rows across {len(per_key)} duplicated keys "
        f"(keys with identical text only: {same_text_keys}; keys with > 1 unique texts: {different_text_keys})."
    )

    # Keep only keys that actually have > 1 unique texts as we don't want identical-text pairs
    dups = dups.merge(
        per_key[per_key > 1].rename("n_unique_texts").reset_index(),
        on=["dataset", "id"],
        how="inner",
    ).drop(columns="n_unique_texts")

    if dups.empty:
        raise ValueError(
            "All duplicated keys have identical text only; nothing to pair with different text."
        )
    return dups

In [5]:
ensure_required_columns(synthetic, "synthetic", REQUIRED_COLUMNS)

In [6]:
# Rows that belong to duplicated (dataset,id) keys with ≥2 unique texts
dups = ensure_duplicated_pairs(synthetic, "synthetic")

# Stable row ids to remove self and mirrored pairs later
dups = dups.reset_index(names="row_id")

# Self-merge within key
pairs = dups.merge(
    dups,
    on=["dataset", "id"],
    how="inner",
    suffixes=("_val1", "_val2"),
)

# Keep only unique unordered pairs (i < j)
pairs = pairs[pairs["row_id_val1"] < pairs["row_id_val2"]].copy()

# Drop pairs with identical text and identical gest JSON
pairs = pairs[
    (pairs["text_val1"] != pairs["text_val2"])
    & (pairs["gest_val1"] != pairs["gest_val2"])
].copy()

# Tidy columns for downstream parsing
pairs = pairs.rename(
    columns={
        "gest_val1": "gest_synthetic_val1",
        "gest_val2": "gest_synthetic_val2",
    }
)[["dataset", "id", "gest_synthetic_val1", "gest_synthetic_val2"]]

synthetic: 194 rows across 97 duplicated keys (keys with identical text only: 0; keys with > 1 unique texts: 97).


In [7]:
num_keys = pairs[["dataset", "id"]].drop_duplicates().shape[0]
print(
    f"Found {len(pairs)} unique pairs across {num_keys} duplicated (dataset, id) keys (with differing text)."
)

Found 97 unique pairs across 97 duplicated (dataset, id) keys (with differing text).


In [8]:
tqdm.pandas(desc="Parsing Synthetic Validation 1 GESTs")
pairs["g1"] = pairs["gest_synthetic_val1"].progress_apply(
    lambda s: GESTGraph(gest=GEST.model_validate_json(s))
)

Parsing Synthetic Validation 1 GESTs: 100%|██████████| 97/97 [00:00<00:00, 534.90it/s]


In [9]:
tqdm.pandas(desc="Parsing Synthetic Validation 2 GESTs")
pairs["g2"] = pairs["gest_synthetic_val2"].progress_apply(
    lambda s: GESTGraph(gest=GEST.model_validate_json(s))
)

Parsing Synthetic Validation 2 GESTs: 100%|██████████| 97/97 [00:00<00:00, 2218.53it/s]


In [10]:
configurations = [
    {
        "name": "Spectral_GloVe50",
        "engine_params": {
            "solver_type": SolverType.SPECTRAL,
            "embedding_type": EmbeddingType.GLOVE50,
            "use_edges": True,
        },
    },
    {
        "name": "NGM_GloVe50",
        "engine_params": {
            "solver_type": SolverType.NGM,
            "embedding_type": EmbeddingType.GLOVE50,
            "use_edges": True,
        },
    },
    {
        "name": "Spectral_GloVe50_NoEdges",
        "engine_params": {
            "solver_type": SolverType.SPECTRAL,
            "embedding_type": EmbeddingType.GLOVE50,
            "use_edges": False,
        },
    },
    {
        "name": "Spectral_GloVe300",
        "engine_params": {
            "solver_type": SolverType.SPECTRAL,
            "embedding_type": EmbeddingType.GLOVE300,
            "use_edges": True,
        },
    },
    {
        "name": "NGM_GloVe300",
        "engine_params": {
            "solver_type": SolverType.NGM,
            "embedding_type": EmbeddingType.GLOVE300,
            "use_edges": True,
        },
    },
    {
        "name": "Spectral_GloVe300_NoEdges",
        "engine_params": {
            "solver_type": SolverType.SPECTRAL,
            "embedding_type": EmbeddingType.GLOVE300,
            "use_edges": False,
        },
    },
    {
        "name": "Spectral_W2V300",
        "engine_params": {
            "solver_type": SolverType.SPECTRAL,
            "embedding_type": EmbeddingType.W2V_GOOGLE,
            "use_edges": True,
        },
    },
    {
        "name": "NGM_W2V300",
        "engine_params": {
            "solver_type": SolverType.NGM,
            "embedding_type": EmbeddingType.W2V_GOOGLE,
            "use_edges": True,
        },
    },
    {
        "name": "Spectral_W2V300_NoEdges",
        "engine_params": {
            "solver_type": SolverType.SPECTRAL,
            "embedding_type": EmbeddingType.W2V_GOOGLE,
            "use_edges": False,
        },
    },
]

In [11]:
for config in configurations:
    config_name = config["name"]
    print(f"\nStarting Evaluation for '{config_name}'.")

    processed_pairs = set()
    if os.path.exists(RESULTS_CSV_PATH):
        temp_df = pd.read_csv(RESULTS_CSV_PATH)
        processed_for_config = temp_df[temp_df["configuration"] == config_name]
        processed_pairs = set(
            zip(processed_for_config["dataset"], processed_for_config["id"])
        )

    if processed_pairs:
        print(
            f"Found {len(processed_pairs)} previously computed results for this configuration."
        )

    pairs["is_processed"] = [
        (d, i) in processed_pairs for d, i in zip(pairs["dataset"], pairs["id"])
    ]
    pairs_to_process = pairs[~pairs["is_processed"]].copy()

    if pairs_to_process.empty:
        print(f"All pairs for '{config_name}' are already processed. Skipping.")
        continue

    print(f"Processing {len(pairs_to_process)} new pairs for '{config_name}'.")
    engine = SimilarityEngine(**config["engine_params"])
    similarity_service = SimilarityService(engine=engine)

    def compute_similarity(row) -> float:
        try:
            return similarity_service.graph_similarity_normalized(row["g1"], row["g2"])
        except Exception as e:
            print(
                f"Exception occurred on (dataset={row['dataset']}, id={row['id']}): \n{e}"
            )
            return 0.0

    csv_header = ["dataset", "id", "configuration", "similarity"]
    write_header = not os.path.exists(RESULTS_CSV_PATH)

    with open(RESULTS_CSV_PATH, "a", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=csv_header)
        if write_header:
            writer.writeheader()

        for _, row in tqdm(
            pairs_to_process.iterrows(),
            total=len(pairs_to_process),
            desc=f"Calculating for {config_name}",
        ):
            score = compute_similarity(row)
            writer.writerow(
                {
                    "dataset": row["dataset"],
                    "id": row["id"],
                    "configuration": config_name,
                    "similarity": score,
                }
            )


Starting Evaluation for 'Spectral_GloVe50'.
Found 97 previously computed results for this configuration.
All pairs for 'Spectral_GloVe50' are already processed. Skipping.

Starting Evaluation for 'NGM_GloVe50'.
Found 97 previously computed results for this configuration.
All pairs for 'NGM_GloVe50' are already processed. Skipping.

Starting Evaluation for 'Spectral_GloVe50_NoEdges'.
Found 97 previously computed results for this configuration.
All pairs for 'Spectral_GloVe50_NoEdges' are already processed. Skipping.

Starting Evaluation for 'Spectral_GloVe300'.
Found 97 previously computed results for this configuration.
All pairs for 'Spectral_GloVe300' are already processed. Skipping.

Starting Evaluation for 'NGM_GloVe300'.
Found 97 previously computed results for this configuration.
All pairs for 'NGM_GloVe300' are already processed. Skipping.

Starting Evaluation for 'Spectral_GloVe300_NoEdges'.
Found 97 previously computed results for this configuration.
All pairs for 'Spectral_G

In [12]:
def fisher_score(pos_vals: pd.Series, neg_vals: pd.Series) -> float:
    mu1, mu0 = pos_vals.mean(), neg_vals.mean()
    v1, v0 = pos_vals.var(ddof=1), neg_vals.var(ddof=1)
    return float(((mu1 - mu0) ** 2) / (v1 + v0 + 1e-12))


def pr_auc(scores: pd.Series, labels: pd.Series) -> float:
    return float(average_precision_score(labels, scores))


def top1_accuracy(df: pd.DataFrame, score_col: str) -> float:
    hits = []
    for _, g in df.groupby(["dataset", "id"]):
        if g["label"].sum() == len(g):
            continue
        g = g.sort_values(score_col, ascending=False)
        hits.append(int(g.iloc[0]["label"] == 1))
    return float(np.mean(hits)) if hits else float("nan")


def point_biserial_corr(scores: pd.Series, labels: pd.Series) -> float:
    s = scores.to_numpy(dtype=float)
    y = labels.to_numpy(dtype=float)
    if s.std() < 1e-12 or y.std() < 1e-12:
        return float("nan")
    return float(np.corrcoef(s, y)[0, 1])


def _stable_rng(dataset: str, ex_id: str, seed: int = 0) -> np.random.Generator:
    h = hashlib.sha256(f"{dataset}::{ex_id}::{seed}".encode()).digest()
    return np.random.default_rng(int.from_bytes(h[:4], "big"))

In [13]:
def _neg_pairs_for_keys(
    pairs_df: pd.DataFrame, subset_keys, neg_per_pos: int
) -> pd.DataFrame:
    rows = []
    by_ds = {ds: grp.sort_values("id") for ds, grp in pairs_df.groupby("dataset")}
    for ds, ex_id in subset_keys:
        ex_id = str(ex_id)
        grp = by_ds.get(ds)
        if grp is None or len(grp) < 2:
            continue
        candidates = [str(i) for i in grp["id"].tolist() if str(i) != ex_id]
        if not candidates:
            continue
        rng = _stable_rng(ds, ex_id)
        k = min(neg_per_pos, len(candidates))
        choose = rng.choice(candidates, size=k, replace=len(candidates) < k)
        for neg_id in np.atleast_1d(choose):
            rows.append({"dataset": ds, "id": ex_id, "neg_id": str(neg_id)})
    return pd.DataFrame(rows)

In [14]:
pairs_all = pairs[["dataset", "id", "g1", "g2"]].copy()

In [15]:
def _ensure_negative_scores_cached(
    config_name: str, engine_params: dict, keys
) -> pd.DataFrame:
    """Cache negative scores for desired (dataset,id) keys and return the subset."""
    if os.path.exists(NEG_RESULTS_CSV_PATH):
        neg_cache = pd.read_csv(NEG_RESULTS_CSV_PATH)
        neg_cache["id"] = neg_cache["id"].astype(str)
        neg_cache["neg_id"] = neg_cache["neg_id"].astype(str)
    else:
        neg_cache = pd.DataFrame(
            columns=["dataset", "id", "neg_id", "configuration", "similarity"]
        )

    have = (
        neg_cache[neg_cache["configuration"] == config_name]
        if len(neg_cache)
        else neg_cache
    )
    have_keys = set(zip(have["dataset"], have["id"], have["neg_id"]))

    desired_pairs = _neg_pairs_for_keys(pairs_all, keys, EVAL_NEG_PER_POS)
    if desired_pairs.empty:
        return have

    desired_keys = set(
        zip(desired_pairs["dataset"], desired_pairs["id"], desired_pairs["neg_id"])
    )
    to_compute_keys = desired_keys - have_keys

    if to_compute_keys:
        g1_map = {
            (d, str(i)): g
            for d, i, g in pairs_all[["dataset", "id", "g1"]].itertuples(index=False)
        }
        g2_map = {
            (d, str(i)): g
            for d, i, g in pairs_all[["dataset", "id", "g2"]].itertuples(index=False)
        }

        engine = SimilarityEngine(**engine_params)
        sim = SimilarityService(engine=engine)

        print(
            f"Processing {len(to_compute_keys)} new negative pairs for '{config_name}'."
        )
        rows = []
        for ds, ex_id, neg_id in tqdm(
            list(to_compute_keys),
            total=len(to_compute_keys),
            desc=f"Negatives for {config_name}",
        ):
            try:
                g1 = g1_map[(ds, str(ex_id))]
                g2 = g2_map[(ds, str(neg_id))]
                score = sim.graph_similarity_normalized(g1, g2)
            except Exception:
                score = 0.0
            rows.append(
                {
                    "dataset": ds,
                    "id": str(ex_id),
                    "neg_id": str(neg_id),
                    "configuration": config_name,
                    "similarity": score,
                }
            )

        write_header = not os.path.exists(NEG_RESULTS_CSV_PATH)
        with open(NEG_RESULTS_CSV_PATH, "a", newline="") as f:
            w = csv.DictWriter(
                f, fieldnames=["dataset", "id", "neg_id", "configuration", "similarity"]
            )
            if write_header:
                w.writeheader()
            for r in rows:
                w.writerow(r)

        neg_cache = pd.read_csv(NEG_RESULTS_CSV_PATH)
        neg_cache["id"] = neg_cache["id"].astype(str)
        neg_cache["neg_id"] = neg_cache["neg_id"].astype(str)

    neg_sub = neg_cache[neg_cache["configuration"] == config_name]
    return neg_sub.merge(desired_pairs, on=["dataset", "id", "neg_id"], how="right")

In [16]:
results_df = pd.read_csv(RESULTS_CSV_PATH)
summary_rows = []

for config in configurations:
    name = config["name"]

    pos_scores_df = results_df[results_df["configuration"] == name][
        ["dataset", "id", "similarity"]
    ].copy()
    if pos_scores_df.empty:
        print(f"\nStarting Evaluation for '{name}'.")
        print("Found 0 previously computed results for this configuration.")
        print(f"All pairs for '{name}' are already processed. Skipping.")
        continue

    print(f"\nStarting Evaluation for '{name}' (negatives & metrics).")
    keys = set(zip(pos_scores_df["dataset"], pos_scores_df["id"].astype(str)))

    existing = (
        pd.read_csv(NEG_RESULTS_CSV_PATH)
        if os.path.exists(NEG_RESULTS_CSV_PATH)
        else pd.DataFrame(
            columns=["dataset", "id", "neg_id", "configuration", "similarity"]
        )
    )
    existing = existing[(existing["configuration"] == name)]
    existing["id"] = existing["id"].astype(str)
    existing["neg_id"] = existing["neg_id"].astype(str)
    already = existing.merge(
        _neg_pairs_for_keys(pairs_all, keys, EVAL_NEG_PER_POS),
        on=["dataset", "id", "neg_id"],
        how="inner",
    )
    print(
        f"Found {len(already)} previously computed negative results for this configuration."
    )

    neg_cached = _ensure_negative_scores_cached(name, config["engine_params"], keys)

    pos_scores_df = pos_scores_df.rename(columns={"similarity": "score"}).copy()
    pos_scores_df["id"] = pos_scores_df["id"].astype(str)
    pos_scores_df["label"] = 1

    neg_scores_df = (
        neg_cached[["dataset", "id", "neg_id", "similarity"]]
        .rename(columns={"similarity": "score"})
        .copy()
    )
    neg_scores_df["id"] = neg_scores_df["id"].astype(str)
    neg_scores_df["label"] = 0

    eval_df = pd.concat(
        [
            pos_scores_df[["dataset", "id", "label", "score"]],
            neg_scores_df[["dataset", "id", "label", "score"]],
        ],
        ignore_index=True,
    )

    corr = 100.0 * point_biserial_corr(eval_df["score"], eval_df["label"])
    acc = 100.0 * top1_accuracy(eval_df, "score")
    fsc = fisher_score(pos_scores_df["score"], neg_scores_df["score"])
    auc = 100.0 * pr_auc(eval_df["score"], eval_df["label"])

    summary_rows.append(
        {"Configuration": name, "Corr": corr, "Acc": acc, "F": fsc, "AUC": auc}
    )



Starting Evaluation for 'Spectral_GloVe50' (negatives & metrics).
Found 388 previously computed negative results for this configuration.

Starting Evaluation for 'NGM_GloVe50' (negatives & metrics).
Found 388 previously computed negative results for this configuration.

Starting Evaluation for 'Spectral_GloVe50_NoEdges' (negatives & metrics).
Found 388 previously computed negative results for this configuration.

Starting Evaluation for 'Spectral_GloVe300' (negatives & metrics).
Found 388 previously computed negative results for this configuration.

Starting Evaluation for 'NGM_GloVe300' (negatives & metrics).
Found 388 previously computed negative results for this configuration.

Starting Evaluation for 'Spectral_GloVe300_NoEdges' (negatives & metrics).
Found 388 previously computed negative results for this configuration.

Starting Evaluation for 'Spectral_W2V300' (negatives & metrics).
Found 388 previously computed negative results for this configuration.

Starting Evaluation for '

In [17]:
eval_summary = (
    pd.DataFrame(summary_rows).sort_values("Configuration").set_index("Configuration")
)
pd.set_option("display.precision", 3)
print("\nEvaluation metrics for each experiment:")
print("------------------------------------")
print(eval_summary.to_string())


Evaluation metrics for each experiment:
------------------------------------
                             Corr     Acc      F     AUC
Configuration                                           
NGM_GloVe300               26.098  41.237  0.156  38.240
NGM_GloVe50                16.916  29.897  0.077  30.323
NGM_W2V300                  9.945  23.711  0.031  24.235
Spectral_GloVe300          24.770  43.299  0.188  34.724
Spectral_GloVe300_NoEdges  38.293  58.763  0.506  49.409
Spectral_GloVe50           16.115  32.990  0.082  29.936
Spectral_GloVe50_NoEdges   28.369  51.546  0.269  43.011
Spectral_W2V300             9.688  28.866  0.029  24.768
Spectral_W2V300_NoEdges     6.983  55.670  0.018  22.876


In [18]:
def compute_text_similarity_for_pairs(
    pairs_df: pd.DataFrame,
    dups_df: pd.DataFrame,
    out_csv_path: str = TEXT_SIMILARITY_CSV_PATH,
    evaluator=TextSimilarityEvaluator(),
):
    left_text = dups_df.rename(
        columns={"gest": "gest_synthetic_val1", "text": "text_val1"}
    )[["dataset", "id", "gest_synthetic_val1", "text_val1"]]
    right_text = dups_df.rename(
        columns={"gest": "gest_synthetic_val2", "text": "text_val2"}
    )[["dataset", "id", "gest_synthetic_val2", "text_val2"]]

    pairs_with_text = (
        pairs_df.merge(
            left_text, on=["dataset", "id", "gest_synthetic_val1"], how="left"
        )
        .merge(right_text, on=["dataset", "id", "gest_synthetic_val2"], how="left")
        .copy()
    )

    sims = []
    for t1, t2 in tqdm(
        zip(pairs_with_text["text_val1"], pairs_with_text["text_val2"]),
        total=len(pairs_with_text),
        desc="Text similarity",
    ):
        sims.append(float(evaluator.compute_text_similarity(str(t1), str(t2))))

    pairs_with_text["text_similarity"] = sims
    mean_score = float(np.mean(sims)) if sims else float("nan")

    cols_to_save = ["dataset", "id", "text_val1", "text_val2", "text_similarity"]
    pairs_with_text[cols_to_save].to_csv(out_csv_path, index=False)

    print("\nText similarity evaluation:")
    print("------------------------------------")
    print(
        f"Computed text similarity for {len(pairs_with_text)} pairs; mean = {mean_score:.6f}"
    )
    print(f"Saved pairwise text similarity to: {out_csv_path}")

    return pairs_with_text, mean_score

2025-08-26 10:00:52,206 sentence_transformers.SentenceTransformer [INFO] Use pytorch device_name: cpu
2025-08-26 10:00:52,207 sentence_transformers.SentenceTransformer [INFO] Load pretrained SentenceTransformer: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2


In [19]:
compute_text_similarity_for_pairs(pairs, dups)

Batches: 100%|██████████| 1/1 [00:00<00:00,  2.59it/s]]
Batches: 100%|██████████| 1/1 [00:00<00:00,  9.48it/s]2.56it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  7.13it/s]4.43it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.29it/s]5.31it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 12.48it/s]5.60it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  8.97it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  9.23it/s]7.29it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  7.96it/s]7.67it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 10.17it/s]7.68it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  8.50it/s]8.19it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.94it/s] 8.19it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  9.59it/s] 7.70it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  8.06it/s] 8.09it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.85it/s] 8.00it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  5.80it/s] 7.54it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  4.88it/


Text similarity evaluation:
------------------------------------
Computed text similarity for 97 pairs; mean = 0.735890
Saved pairwise text similarity to: /workspaces/GEST/notebooks/data/synthetic-val-1_vs_synthetic-val-2_evaluation_text_similarity.csv





(                 dataset             id  \
 0   ActivityNet Captions  v_IQGg87yZZjs   
 1   ActivityNet Captions  v_ZJ6BFrKcRe0   
 2   ActivityNet Captions  v_J3DxJ8gI95U   
 3   ActivityNet Captions  v_KRGiJIHSd9E   
 4   ActivityNet Captions  v_Iq9cAZxki9Y   
 ..                   ...            ...   
 92  ActivityNet Captions  v_UgSLUt8X1Lc   
 93  ActivityNet Captions  v_fgoXpih2Kws   
 94  ActivityNet Captions  v_tznMNEWglxY   
 95  ActivityNet Captions  v_bmoS216hsoc   
 96  ActivityNet Captions  v_fOuFF7dGPtI   
 
                                   gest_synthetic_val1  \
 0   {"actor0":{"action":"Exists","entities":["acto...   
 1   {"actor0":{"action":"Exists","entities":["acto...   
 2   {"actor0":{"action":"Exists","entities":["acto...   
 3   {"actor0":{"action":"Exists","entities":["acto...   
 4   {"actor0":{"action":"Exists","entities":["acto...   
 ..                                                ...   
 92  {"actor0":{"action":"Exists","entities":["acto...   
 93  {