In [122]:
from typing import Literal
import os
import umap
import numpy as np
import pandas
import pickle
import json
import re
import ast
import os

import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import colors as mcolors, cm, lines as mlines
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.cm import ScalarMappable
from scipy.spatial import distance

from scipy.spatial import distance

# from google.colab import userdata
# from google.colab import files

import google.generativeai as genai
# from google.genai import types
# client = genai.Client(api_key=userdata.get('GOOGLE_API_KEY'))
GOOGLE_GENAI_KEY = os.getenv("GOOGLE_AI_API_KEY")
genai.configure(api_key=GOOGLE_GENAI_KEY)

DATA_PATH = os.path.join(os.getenv("DATA_PATH"), "archive")
DATA_PATH

'/Users/tig1/Programming/TOPOL/data/archive'

In [123]:
def generate_narrative_comparison_prompt(text1, text2):
    return f"""
Role: You are a semantic narrative analyst. Your task is to identify all strong, directional narrative polarity dimensions that differentiate text1 from text2. These dimensions represent conceptual shifts in discourse (e.g., tone, stance, topic, moral framing) and must be grounded in explicit sentence-level evidence from both texts.

Each dimension should reflect a transition from a pole **dominant in text1** to a different pole **dominant in text2**. Only report a dimension if it meets all of the following criteria:

1. It is clearly directional (text1 pole → text2 pole). Symmetric or bidirectional contrasts are invalid.
2. The dimension poles, text1 pole and text2 pole, need to be supported by at least a 20 of the sentences in text1 and 20 from text2 respectively. Each "sentence" refers to a distinct, declarative statement that ends in a punctuation sign.
3. The keywords and evidence sentences must clearly match the claimed direction of the shift. For completeness, to the extent of possible, provide keywords different from those contained in evidence sentences.
4. All dimensions must use **non-overlapping evidence** — no shared sentence can be used for more than one dimension.
5. Extract all keywords directly as quoted substrings from the texts. Do not infer, generalize, or paraphrase.
6. Return at most 5 dimensions. If more are possible, prioritize those with highest evidence and directional clarity.
7. If fewer than 25% of text1 or text2 sentence-aligned differences exist for a dimension, or if polarity is weak or ambiguous, do not report it. Return an empty list instead. Do not invent plausible but unsupported contrasts.
8. Avoid including dimensions that differ only in phrasing but share the same semantic direction and evidence.
9. All output must follow the structure in the example and be formatted as a valid JSON list. Do not include commentary or free-text explanations.

To help you understand what a narrative dimension might look like, here are a few **illustrative examples only**:
- "Skepticism to Confidence"
- "Risk Taking to Risk Aversion"
- "Criticism to Trust"
- "Dissapointment to Enjoyment"
- "Sarcasm to Genuineness"

These are for inspiration only — do not anchor your analysis to these labels. Let the dimensions emerge from the texts.

--- Output Format (JSON list) ---

For each dimension, return the following fields:

- "dimension_title": A label summarizing the semantic shift, the lest verbose possible (e.g., "Uncertainty to Optimism").
- "text1_sentence_count": Number of sentences from text1 that represent the text1 pole and how many sentences exist in total in text1.
- "text2_sentence_count": Number of sentences from text2 that represent the text2 pole and how many sentences exist in total in text2.
- "confidence_label_text1": One of ["High", "Medium", "Low"], based on clarity and alignment for the text1 pole.
- "confidence_label_text2": One of ["High", "Medium", "Low"], based on clarity and alignment for the text2 pole.
- "representation_text1": One of ["Representative", "Ambiguous"].
   After selecting 20% sentences in "text1_evidence_sentences", you must still review the **entire remaining text1**.
   If other 20%  **non-selected sentence** expresses an idea, stance, tone, or framing that **clearly contradicts** the polarity of the dimension title (e.g., expresses the opposite pole, or undermines the claimed semantic shift), report "Ambiguous".
   Otherwise, report "Representative".
- "representation_text2": One of ["Representative", "Ambiguous"].
   After selecting 20% sentences in "text2_evidence_sentences", you must still review the **entire remaining text2**.
   If other 20%  **non-selected sentence** expresses an idea, stance, tone, or framing that **clearly contradicts** the polarity of the dimension title (e.g., expresses the opposite pole, or undermines the claimed semantic shift), report "Ambiguous".
   Otherwise, report "Representative".
   In both cases, contradiction includes:
   - Explicit reversal (e.g., tone of trust vs. tone of suspicion)
   - Strong sentiment or intent misalignment
   - Neutralizing statements that refute or weaken the polarity distinction
   Be cautious and conservative: if contradiction is possible, prefer "Ambiguous".
- "text1_number_contradicting_sentences": Number of sentences from text1 that contradict the text1 pole.
- "text2_number_contradicting_sentences": Number of sentences from text2 that contradict the text2 pole.
- "text1_evidence_sentences": literal quotes from text1 illustrating the text1 pole.
- "text2_evidence_sentences": literal quotes from text2 illustrating the text2 pole.
- "text1_keywords": Quoted keywords or phrases typical of the text1 pole (from text1).
- "text2_keywords": Quoted keywords or phrases typical of the text2 pole (from text2).


--- Example Output Format ---

[
  {{
    "dimension_title": "Mistrust to Trust",
    "text1_sentence_count": "24 out of 100",
    "text2_sentence_count": "23 out of 100",
    "confidence_label_text1": "High",
    "confidence_label_text2": "Medium",
    "representation_text1":"Representative",
    "representation_text2":"Ambiguous",
    - "text1_number_contradicting_sentences":"5 out of 100",
    - "text1_number_contradicting_sentences":"30 out of 100",
    "text1_evidence_sentences": [
      "The agency has consistently failed to provide transparent updates.",
      "There is little reason to believe the data hasn't been manipulated.",
      "Public skepticism is justified given the repeated delays."
    ],
    "text2_evidence_sentences": [
      "Recent communications have been clear and consistent.",
      "The public has responded positively to the new transparency measures.",
      "Trust in the agency has notably increased according to the latest survey."
    ],
    "text1_keywords": ["failed to provide", "manipulated", "public skepticism"],
    "text2_keywords": ["transparency", "clear and consistent", "trust"]
  }}
]

--- Begin Text Analysis ---

text1:
{text1}

text2:
{text2}
"""

In [124]:
def safe_json_load(raw_response_text):
    # Strip leading/trailing whitespace and remove non-JSON "explanation" text if any
    raw_text = raw_response_text.strip()

    # Attempt quick fix: if it starts/ends with JSON brackets
    if not raw_text.startswith('[') and '[' in raw_text:
        raw_text = raw_text[raw_text.index('['):]
    if not raw_text.endswith(']') and ']' in raw_text:
        raw_text = raw_text[:raw_text.rindex(']') + 1]

    # Remove or escape invalid escape characters
    def escape_invalid_escapes(s):
        # Fix invalid escape sequences: \x, \u (if malformed), or backslashes not part of valid escape
        s = re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', s)  # Replace lone backslashes
        return s

    try:
        return json.loads(escape_invalid_escapes(raw_text))
    except json.JSONDecodeError as e:
        try:
            # Try ast.literal_eval as fallback (tolerates single quotes, trailing commas)
            return ast.literal_eval(raw_text)
        except Exception as fallback_error:
            print("⚠️ JSON parsing failed.")
            print("JSON error:", e)
            print("Fallback error:", fallback_error)
            return None

In [220]:
def analyze_xAI_files(clusters: list, folder_name: str, cb_type: Literal["default", "random"]):
    folder_path = os.path.join(DATA_PATH, "xAI", folder_name)
    fpath_base = os.path.join(folder_path, f"response_{cb_type}_cluster_CLUSTER_ID.json")

    dimensions = []
    avg_ratio_repr_sentences_t1 = 0
    avg_ratio_repr_sentences_t2 = 0
    avg_ratio_contr_sentences_t1 = 0
    avg_ratio_contr_sentences_t2 = 0
    for cluster in clusters:
        file_path = fpath_base.replace("CLUSTER_ID", str(cluster))
        with open(file_path, "r") as f:
            xai_data = safe_json_load(f.read())

        if xai_data is None:
            dimensions.append(None)
            continue

        nb_dimensions = len(xai_data)
        dimensions.append(nb_dimensions)
        if nb_dimensions > 0:
            nb_repr_sentences_t1 = sum( float(dim["text1_sentence_count"].split(" out of ")[0]) for dim in xai_data )
            nb_repr_sentences_t2 = sum( float(dim["text2_sentence_count"].split(" out of ")[0]) for dim in xai_data )

            nb_contr_sentences_t1 = sum( float(dim["text1_number_contradicting_sentences"].split(" out of ")[0]) for dim in xai_data )
            nb_contr_sentences_t2 = sum( float(dim["text2_number_contradicting_sentences"].split(" out of ")[0]) for dim in xai_data )

            nb_total_sentences_t1 = sum( float(dim["text1_sentence_count"].split(" out of ")[1]) for dim in xai_data )
            nb_total_sentences_t2 = sum( float(dim["text2_sentence_count"].split(" out of ")[1]) for dim in xai_data )

            avg_ratio_repr_sentences_t1 += nb_repr_sentences_t1 / nb_total_sentences_t1 if nb_total_sentences_t1 > 0 else 0
            avg_ratio_repr_sentences_t2 += nb_repr_sentences_t2 / nb_total_sentences_t2 if nb_total_sentences_t2 > 0 else 0
            avg_ratio_contr_sentences_t1 += nb_contr_sentences_t1 / nb_total_sentences_t1 if nb_total_sentences_t1 > 0 else 0
            avg_ratio_contr_sentences_t2 += nb_contr_sentences_t2 / nb_total_sentences_t2 if nb_total_sentences_t2 > 0 else 0

    nb_not_null_dims = sum(1 for dim in dimensions if dim is not None and dim > 0)
    avg_ratio_repr_sentences_t1 /= nb_not_null_dims
    avg_ratio_contr_sentences_t1 /= nb_not_null_dims
    avg_ratio_repr_sentences_t2 /= nb_not_null_dims
    avg_ratio_contr_sentences_t2 /= nb_not_null_dims

    return dimensions, avg_ratio_repr_sentences_t1, avg_ratio_repr_sentences_t2, avg_ratio_contr_sentences_t1, avg_ratio_contr_sentences_t2

---

# U.S. Central Banker Speech Transcripts

### Getting Data

In [126]:
# # upload US df
# files.upload()

In [127]:
with open(os.path.join(DATA_PATH, 'us_speeches_full.pkl'), 'rb') as f:
  us = pickle.load(f)

In [128]:
us.head()

Unnamed: 0,date,text,label,embedding,sentiment_finbert,sentiment_score_finbert,sentiment_distilbert,sentiment_score_distilbert,random_label,reduced_embedding,2D_embedding,cluster,cluster_prob
0,2004-01-03,Ben S Bernanke: Fedspeak Speech by Mr Ben S Be...,0.0,"[0.022694535553455353, -0.017962483689188957, ...","{'negative': 0.04969792491333051, 'neutral': 0...",0.068529,"{'negative': 0.10539953891808788, 'neutral': 0...",0.009538,1.0,"[0.0, 0.1841525137424469, 0.0, 0.0, 0.0, 0.0, ...","[1.028314471244812, 6.176698207855225]",0,0.831043
1,2004-01-03,Ben S Bernanke: Conducting monetary policy at ...,0.0,"[-0.0076704127714037895, -0.01307708490639925,...","{'negative': 0.1097596339467499, 'neutral': 0....",0.0256,"{'negative': 0.18130193762481212, 'neutral': 0...",-0.060588,1.0,"[0.1841525137424469, 0.0, 0.008662130683660507...","[0.7446209788322449, 6.365582466125488]",0,0.777273
2,2004-01-03,Alan Greenspan: Risk and uncertainty in moneta...,0.0,"[0.005307245068252087, 0.0005030583706684411, ...","{'negative': 0.21432017348706722, 'neutral': 0...",0.001264,"{'negative': 0.21410453199808085, 'neutral': 0...",-0.034231,1.0,"[0.0, 0.008662130683660507, 0.0, 1.0, 0.065177...","[-0.16121439635753632, 6.351457118988037]",0,0.471836
3,2004-01-04,Ben S Bernanke: Monetary policy and the econom...,0.0,"[0.021182149648666382, 0.0045381151139736176, ...","{'negative': 0.4849258614704013, 'neutral': 0....",-0.2587,"{'negative': 0.29698414709419013, 'neutral': 0...",-0.119471,0.0,"[0.0, 0.005270513705909252, 1.0, 0.0, 0.106106...","[-0.8581175208091736, 6.496980667114258]",1,0.58148
4,2004-01-04,"Roger W Ferguson, Jr: Lessons from past produc...",0.0,"[0.012355098500847816, 0.03962729126214981, 0....","{'negative': 0.04435151101400455, 'neutral': 0...",0.26832,"{'negative': 0.09881303588358256, 'neutral': 0...",0.137185,0.0,"[0.0, 0.0, 0.0651770681142807, 0.1061060279607...","[-1.4032648801803589, 6.207282066345215]",2,0.562939


In [129]:
cluster_centroids = us.groupby('cluster')['reduced_embedding'].apply(lambda x: np.mean(x.tolist(), axis=0)).to_dict()

In [130]:
def calculate_distance_to_centroid(row):
  cluster_label = row['cluster']
  embedding = row['reduced_embedding']
  centroid = cluster_centroids[cluster_label]
  return distance.euclidean(embedding, centroid)

us['distance_to_centroid'] = us.apply(calculate_distance_to_centroid, axis=1)
us.head()

Unnamed: 0,date,text,label,embedding,sentiment_finbert,sentiment_score_finbert,sentiment_distilbert,sentiment_score_distilbert,random_label,reduced_embedding,2D_embedding,cluster,cluster_prob,distance_to_centroid
0,2004-01-03,Ben S Bernanke: Fedspeak Speech by Mr Ben S Be...,0.0,"[0.022694535553455353, -0.017962483689188957, ...","{'negative': 0.04969792491333051, 'neutral': 0...",0.068529,"{'negative': 0.10539953891808788, 'neutral': 0...",0.009538,1.0,"[0.0, 0.1841525137424469, 0.0, 0.0, 0.0, 0.0, ...","[1.028314471244812, 6.176698207855225]",0,0.831043,1.861178
1,2004-01-03,Ben S Bernanke: Conducting monetary policy at ...,0.0,"[-0.0076704127714037895, -0.01307708490639925,...","{'negative': 0.1097596339467499, 'neutral': 0....",0.0256,"{'negative': 0.18130193762481212, 'neutral': 0...",-0.060588,1.0,"[0.1841525137424469, 0.0, 0.008662130683660507...","[0.7446209788322449, 6.365582466125488]",0,0.777273,1.737437
2,2004-01-03,Alan Greenspan: Risk and uncertainty in moneta...,0.0,"[0.005307245068252087, 0.0005030583706684411, ...","{'negative': 0.21432017348706722, 'neutral': 0...",0.001264,"{'negative': 0.21410453199808085, 'neutral': 0...",-0.034231,1.0,"[0.0, 0.008662130683660507, 0.0, 1.0, 0.065177...","[-0.16121439635753632, 6.351457118988037]",0,0.471836,2.312165
3,2004-01-04,Ben S Bernanke: Monetary policy and the econom...,0.0,"[0.021182149648666382, 0.0045381151139736176, ...","{'negative': 0.4849258614704013, 'neutral': 0....",-0.2587,"{'negative': 0.29698414709419013, 'neutral': 0...",-0.119471,0.0,"[0.0, 0.005270513705909252, 1.0, 0.0, 0.106106...","[-0.8581175208091736, 6.496980667114258]",1,0.58148,2.085039
4,2004-01-04,"Roger W Ferguson, Jr: Lessons from past produc...",0.0,"[0.012355098500847816, 0.03962729126214981, 0....","{'negative': 0.04435151101400455, 'neutral': 0...",0.26832,"{'negative': 0.09881303588358256, 'neutral': 0...",0.137185,0.0,"[0.0, 0.0, 0.0651770681142807, 0.1061060279607...","[-1.4032648801803589, 6.207282066345215]",2,0.562939,1.748426


In [131]:
us.to_pickle(os.path.join(DATA_PATH, 'us.pkl'))
us.to_csv(os.path.join(DATA_PATH, 'us.csv'), index=False)
# files.download(os.path.join(DATA_PATH, 'us.pkl'))
# files.download(os.path.join(DATA_PATH, 'us.csv'))

In [132]:
# filter dataframe to keep 5 texts closer to centroid for each label (CB)

closest_rows = []
# Loop over each unique cluster
for cluster_label in us['cluster'].unique():
    # For each cluster, loop over both labels 0.0 and 1.0
    for label_value in [0.0, 1.0]:
        # Filter by cluster and label
        subset_df = us[(us['cluster'] == cluster_label) & (us['label'] == label_value)]
        # Sort by distance to centroid
        sorted_subset = subset_df.sort_values(by='distance_to_centroid', ascending=True)
        # Take the top 5 closest rows
        closest_5 = sorted_subset.head(5)
        # Add to list
        closest_rows.append(closest_5)
# Combine all rows into a single DataFrame
filtered_us = pandas.concat(closest_rows)
# Display the result
display(filtered_us)

Unnamed: 0,date,text,label,embedding,sentiment_finbert,sentiment_score_finbert,sentiment_distilbert,sentiment_score_distilbert,random_label,reduced_embedding,2D_embedding,cluster,cluster_prob,distance_to_centroid
161,2005-05-26,Edward M Gramlich: The politics of inflation t...,0.0,"[-0.020591001957654953, 0.020150553435087204, ...","{'negative': 0.09860219596885145, 'neutral': 0...",-0.023256,"{'negative': 0.13185844177173245, 'neutral': 0...",0.014417,0.0,"[0.016250040382146835, 0.09567361325025558, 0....","[0.42924734950065613, 5.620391845703125]",0,0.831675,1.061897
11,2004-02-20,Ben S Bernanke: The great moderation Remarks b...,0.0,"[-0.004189498256891966, 0.013443516567349434, ...","{'negative': 0.2875483182187264, 'neutral': 0....",-0.145993,"{'negative': 0.23801955221486942, 'neutral': 0...",-0.132325,1.0,"[0.0, 0.015892913565039635, 0.2394208163022995...","[0.1797330677509308, 5.976470947265625]",0,0.779681,1.151026
35,2004-04-15,Ben S Bernanke: What policymakers can learn fr...,0.0,"[0.002098260447382927, -0.025254737585783005, ...","{'negative': 0.11883163198151371, 'neutral': 0...",-0.013191,"{'negative': 0.08300253236666322, 'neutral': 0...",0.084695,1.0,"[0.0, 0.045815788209438324, 0.0496621392667293...","[0.02742317132651806, 5.9463605880737305]",0,0.698276,1.189319
295,2006-12-01,Ben S Bernanke: Brief overview of the 4th Conf...,0.0,"[-0.010214265435934067, -0.0215043593198061, 0...","{'negative': 0.012262188829481602, 'neutral': ...",0.097884,"{'negative': 0.03792516700923443, 'neutral': 0...",0.456636,0.0,"[0.049073927104473114, 0.12329620867967606, 0....","[0.7557456493377686, 6.492964744567871]",0,0.566235,1.227458
84,2004-10-08,Ben S Bernanke: Panel discussion: what have we...,0.0,"[0.007946697995066643, -0.02205614186823368, 0...","{'negative': 0.06663311976525518, 'neutral': 0...",0.090765,"{'negative': 0.15161310844123363, 'neutral': 0...",0.037049,1.0,"[0.10983461141586304, 0.07033784687519073, 0.0...","[0.461911141872406, 5.6987433433532715]",0,0.895513,1.233190
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,2009-04-14,Ben S Bernanke: Four questions about the finan...,1.0,"[-0.004457445815205574, -0.0033750219736248255...","{'negative': 0.3620061131871559, 'neutral': 0....",-0.177755,"{'negative': 0.3788314346562732, 'neutral': 0....",-0.152529,1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.7012724280357361, 7.857920169830322]",5,0.487129,1.217249
490,2009-01-13,Donald L Kohn: Troubled Asset Relief Program T...,1.0,"[-0.010106954723596573, 0.011569179594516754, ...","{'negative': 0.1767692668363452, 'neutral': 0....",0.062340,"{'negative': 0.18935054428875447, 'neutral': 0...",0.041279,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.261473298072815, 8.466416358947754]",5,0.565671,1.229574
470,2008-10-14,Ben S Bernanke: Remarks Speech by Mr Ben S Ber...,1.0,"[0.012028912082314491, 0.0008171815425157547, ...","{'negative': 0.018208741210401058, 'neutral': ...",0.686682,"{'negative': 0.07237351033836603, 'neutral': 0...",0.605042,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.305116057395935, 8.274698257446289]",5,0.593020,1.266192
594,2010-03-24,Donald L Kohn: Homework assignments for moneta...,1.0,"[-0.02829328551888466, 0.032252129167318344, 0...","{'negative': 0.18509347144175659, 'neutral': 0...",0.016158,"{'negative': 0.2809141305430482, 'neutral': 0....",-0.155301,0.0,"[0.011706608347594738, 0.017629574984312057, 0...","[1.1180145740509033, 7.837385177612305]",5,0.530905,1.309504


In [133]:
filtered_us.shape

(104, 14)

In [134]:
filtered_us.to_pickle(os.path.join(DATA_PATH, 'filtered_us.pkl'))
filtered_us.to_csv(os.path.join(DATA_PATH, 'filtered_us.csv'), index=False)
# files.download(os.path.join(DATA_PATH, 'filtered_us.pkl'))
# files.download(os.path.join(DATA_PATH, 'filtered_us.csv'))

In [135]:
with open(os.path.join(DATA_PATH, 'filtered_us.pkl'), 'rb') as f:
    filtered_us = pickle.load(f)

### xAI on default CB

In [136]:
cluster_xAI_us_default = {}
for cluster in np.sort(filtered_us['cluster'].unique()):

    text1 = filtered_us[(filtered_us['cluster'] == cluster) & (filtered_us['label'] == 0.0)]['text'].tolist()
    text2 = filtered_us[(filtered_us['cluster'] == cluster) & (filtered_us['label'] == 1.0)]['text'].tolist()

    prompt = generate_narrative_comparison_prompt(text1, text2)
    model = genai.GenerativeModel(model_name="gemini-2.5-flash")
    response = model.generate_content(prompt)
    # print(response.text)
    cluster_xAI_us_default[cluster] = response.text

    parsed_llm_out = safe_json_load(response.text)
    with open(os.path.join(DATA_PATH, f"xAI/us/response_default_cluster_{cluster}.json"), "w", encoding="utf-8") as f:
        json.dump(parsed_llm_out, f, ensure_ascii=False, indent=2)

⚠️ JSON parsing failed.
JSON error: Expecting value: line 1 column 1 (char 0)
Fallback error: invalid syntax (<unknown>, line 0)
⚠️ JSON parsing failed.
JSON error: Expecting value: line 1 column 1 (char 0)
Fallback error: invalid syntax (<unknown>, line 0)
⚠️ JSON parsing failed.
JSON error: Expecting value: line 1 column 1 (char 0)
Fallback error: invalid syntax (<unknown>, line 0)
⚠️ JSON parsing failed.
JSON error: Expecting ',' delimiter: line 200 column 41 (char 24607)
Fallback error: unterminated string literal (detected at line 208) (<unknown>, line 208)


In [225]:
with open(os.path.join(DATA_PATH, 'xAI/us/response_default.pkl'), 'wb') as f:
    pickle.dump(cluster_xAI_us_default, f)

cluster_xAI_us_default

{np.int32(0): '```json\n[\n  {\n    "dimension_title": "Amorphous Debate to Empirically Grounded Practice",\n    "text1_sentence_count": "30 out of 91",\n    "text2_sentence_count": "100 out of 247",\n    "confidence_label_text1": "High",\n    "confidence_label_text2": "High",\n    "representation_text1": "Representative",\n    "representation_text2": "Representative",\n    "text1_number_contradicting_sentences": "9 out of 91",\n    "text2_number_contradicting_sentences": "25 out of 247",\n    "text1_evidence_sentences": [\n      "The question of whether a country should adopt inflation targeting raises several issues.",\n      "At the theoretical level, inflation targeting is alleged to promote central bank transparency, clarify communication, and establish a central bank commitment to price stability.",\n      "These advantages are offset by the fact that inflation targeting could lead to rigid or formulaic policies that limit flexibility.",\n      "Given the impossibility of rewriti

In [221]:
clusters = np.sort(filtered_us['cluster'].unique())
analyze_xAI_files(clusters=clusters, folder_name="us", cb_type="default")

([1, None, 1, 5, None, 5, None, 1, 1, 4, 3],
 0.20954720883301167,
 0.2774653167433868,
 0.028034189652694427,
 0.02314168817783679)

### xAI on random CB

In [139]:
# filter dataframe to keep 5 texts closer to centroid for each random_label (CB)

closest_rows = []
# Loop over each unique cluster
for cluster_label in us['cluster'].unique():
    # For each cluster, loop over both labels 0.0 and 1.0
    for label_value in [0.0, 1.0]:
        # Filter by cluster and label
        subset_df = us[(us['cluster'] == cluster_label) & (us['random_label'] == label_value)]
        # Sort by distance to centroid
        sorted_subset = subset_df.sort_values(by='distance_to_centroid', ascending=True)
        # Take the top 5 closest rows
        closest_5 = sorted_subset.head(5)
        # Add to list
        closest_rows.append(closest_5)
# Combine all rows into a single DataFrame
filtered_us_random = pandas.concat(closest_rows)
# Display the result
display(filtered_us_random)

Unnamed: 0,date,text,label,embedding,sentiment_finbert,sentiment_score_finbert,sentiment_distilbert,sentiment_score_distilbert,random_label,reduced_embedding,2D_embedding,cluster,cluster_prob,distance_to_centroid
161,2005-05-26,Edward M Gramlich: The politics of inflation t...,0.0,"[-0.020591001957654953, 0.020150553435087204, ...","{'negative': 0.09860219596885145, 'neutral': 0...",-0.023256,"{'negative': 0.13185844177173245, 'neutral': 0...",0.014417,0.0,"[0.016250040382146835, 0.09567361325025558, 0....","[0.42924734950065613, 5.620391845703125]",0,0.831675,1.061897
295,2006-12-01,Ben S Bernanke: Brief overview of the 4th Conf...,0.0,"[-0.010214265435934067, -0.0215043593198061, 0...","{'negative': 0.012262188829481602, 'neutral': ...",0.097884,"{'negative': 0.03792516700923443, 'neutral': 0...",0.456636,0.0,"[0.049073927104473114, 0.12329620867967606, 0....","[0.7557456493377686, 6.492964744567871]",0,0.566235,1.227458
176,2005-07-21,Donald L Kohn: Monetary policy perspectives on...,0.0,"[0.021056881174445152, 1.1835028089990374e-05,...","{'negative': 0.154878918081522, 'neutral': 0.7...",-0.039476,"{'negative': 0.21137002695884025, 'neutral': 0...",-0.082202,0.0,"[0.0, 0.0, 0.0287750493735075, 0.0, 0.0, 0.0, ...","[0.5301817655563354, 6.945554256439209]",0,0.561632,1.260545
352,2007-07-10,Ben S Bernanke: Inflation expectations and inf...,1.0,"[0.015156310051679611, -0.022968273609876633, ...","{'negative': 0.09718265470403892, 'neutral': 0...",-0.034712,"{'negative': 0.1287059046328068, 'neutral': 0....",0.018403,0.0,"[0.1452760547399521, 0.04346074163913727, 0.04...","[0.46661144495010376, 5.850642681121826]",0,0.898706,1.297223
373,2007-10-12,Donald L Kohn: John Taylor rules Speech by Mr ...,1.0,"[-0.020805181935429573, 0.018746621906757355, ...","{'negative': 0.09884607845119067, 'neutral': 0...",-0.025070,"{'negative': 0.13671114615031651, 'neutral': 0...",0.095145,0.0,"[0.04108520597219467, 0.07672012597322464, 0.0...","[0.540225625038147, 6.207091331481934]",0,0.843821,1.298741
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,2009-04-14,Ben S Bernanke: Four questions about the finan...,1.0,"[-0.004457445815205574, -0.0033750219736248255...","{'negative': 0.3620061131871559, 'neutral': 0....",-0.177755,"{'negative': 0.3788314346562732, 'neutral': 0....",-0.152529,1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.7012724280357361, 7.857920169830322]",5,0.487129,1.217249
422,2008-04-03,Ben S Bernanke: Developments in the financial ...,1.0,"[0.01590459793806076, -0.005630761384963989, 0...","{'negative': 0.4993251341705521, 'neutral': 0....",-0.256105,"{'negative': 0.26336493032673997, 'neutral': 0...",-0.108778,1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.2699601650238037, 8.305346488952637]",5,0.615227,1.361545
72,2004-09-08,Mark W Olson: Protecting the financial infrast...,0.0,"[-0.02123187482357025, 0.034155625849962234, 0...","{'negative': 0.12080634917531695, 'neutral': 0...",0.126813,"{'negative': 0.13732708897441626, 'neutral': 0...",0.240906,1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[1.3766469955444336, 8.021245002746582]",5,0.573744,1.426613
510,2009-04-03,Ben S Bernanke: The Federal Reserve's balance ...,1.0,"[-0.005673880223184824, 0.011421490460634232, ...","{'negative': 0.10627077892422676, 'neutral': 0...",0.185564,"{'negative': 0.162744838998399, 'neutral': 0.5...",0.080252,1.0,"[0.0, 0.0461076982319355, 0.0, 0.0, 0.0, 0.0, ...","[1.0520986318588257, 7.670193672180176]",5,0.648054,1.505205


In [140]:
filtered_us_random.to_pickle(os.path.join(DATA_PATH, 'filtered_us_random.pkl'))
filtered_us_random.to_csv(os.path.join(DATA_PATH, 'filtered_us_random.csv'), index=False)
# files.download(os.path.join(DATA_PATH, 'filtered_us_random.pkl'))
# files.download(os.path.join(DATA_PATH, 'filtered_us_random.csv'))

In [141]:
with open(os.path.join(DATA_PATH, 'filtered_us_random.pkl'), 'rb') as f:
    filtered_us_random = pickle.load(f)

In [142]:
cluster_xAI_us_random = {}
for cluster in np.sort(filtered_us_random['cluster'].unique()):

    text1_random = filtered_us_random[(filtered_us_random['cluster'] == cluster) & (filtered_us_random['random_label'] == 0.0)]['text'].tolist()
    text2_random = filtered_us_random[(filtered_us_random['cluster'] == cluster) & (filtered_us_random['random_label'] == 1.0)]['text'].tolist()

    prompt = generate_narrative_comparison_prompt(text1=text1_random, text2=text2_random)
    model = genai.GenerativeModel(model_name="gemini-2.5-flash")
    response_random = model.generate_content(prompt)
    # print(response_random.text)
    cluster_xAI_us_random[cluster] = response_random.text

    parsed_llm_out = safe_json_load(response_random.text)
    with open(os.path.join(DATA_PATH, f"xAI/us/response_random_cluster_{cluster}.json"), "w", encoding="utf-8") as f:
        json.dump(parsed_llm_out, f, ensure_ascii=False, indent=2)

⚠️ JSON parsing failed.
JSON error: Expecting ',' delimiter: line 74 column 6 (char 6766)
Fallback error: '{' was never closed (<unknown>, line 2)
⚠️ JSON parsing failed.
JSON error: Expecting value: line 84 column 175 (char 12560)
Fallback error: '[' was never closed (<unknown>, line 12)
⚠️ JSON parsing failed.
JSON error: Expecting property name enclosed in double quotes: line 10 column 60 (char 386)
Fallback error: '{' was never closed (<unknown>, line 2)
⚠️ JSON parsing failed.
JSON error: Expecting value: line 1 column 1 (char 0)
Fallback error: invalid syntax (<unknown>, line 0)
⚠️ JSON parsing failed.
JSON error: Expecting ',' delimiter: line 128 column 82 (char 22363)
Fallback error: invalid syntax. Perhaps you forgot a comma? (<unknown>, line 128)
⚠️ JSON parsing failed.
JSON error: Expecting ',' delimiter: line 544 column 6 (char 66118)
Fallback error: '{' was never closed (<unknown>, line 378)


In [226]:
with open(os.path.join(DATA_PATH, 'xAI/us/response_random.pkl'), 'wb') as f:
    pickle.dump(cluster_xAI_us_random, f)

cluster_xAI_us_random

{np.int32(0): '```json\n[\n  {\n    "dimension_title": "Policy Uncertainty to Policy Effectiveness",\n    "text1_sentence_count": "20 out of 636",\n    "text2_sentence_count": "20 out of 546",\n    "confidence_label_text1": "High",\n    "confidence_label_text2": "High",\n    "representation_text1": "Representative",\n    "representation_text2": "Representative",\n    "text1_number_contradicting_sentences": "15 out of 636",\n    "text2_number_contradicting_sentences": "15 out of 546",\n    "text1_evidence_sentences": [\n      "The question of whether a country should adopt inflation targeting raises several issues.",\n      "These advantages are offset by the fact that inflation targeting could lead to rigid or formulaic policies that limit flexibility.",\n      "Given the impossibility of rewriting history - trying to determine either what an inflation-targeting central bank would have done in response to some economic shock had it been a nontargeter or what a nontargeting central bank

In [222]:
clusters = np.sort(filtered_us['cluster'].unique())
analyze_xAI_files(clusters=clusters, folder_name="us", cb_type="random")

([1, 0, None, None, 5, None, 1, 3, 3, 2, 5],
 0.16527703811670777,
 0.26208080201884454,
 0.018909924339254376,
 0.015437366574252678)

---

# Amazon Reviews

### Getting data

In [145]:
with open(os.path.join(DATA_PATH, 'amazon_reviews_full.pkl'), 'rb') as f:
  ama = pickle.load(f)

In [146]:
cluster_centroids = ama.groupby('cluster')['reduced_embedding'].apply(lambda x: np.mean(x.tolist(), axis=0)).to_dict()

def calculate_distance_to_centroid(row):
  cluster_label = row['cluster']
  embedding = row['reduced_embedding']
  centroid = cluster_centroids[cluster_label]
  return distance.euclidean(embedding, centroid)

ama['distance_to_centroid'] = ama.apply(calculate_distance_to_centroid, axis=1)
ama.head()

Unnamed: 0,text,label,embedding,sentiment_finbert,sentiment_score_finbert,sentiment_distilbert,sentiment_score_distilbert,random_label,reduced_embedding,2D_embedding,cluster,cluster_prob,distance_to_centroid
0,Perfect Mantra I've been looking for this mant...,1,"[0.007807049434632063, 0.030451875180006027, -...","{'negative': 0.014095892198383808, 'neutral': ...",0.115722,"{'negative': 0.03922457434237003, 'neutral': 0...",0.882025,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.48859506845474243, 4.569870471954346]",14,0.616196,1.798089
1,Quality I'm just a little disapointed with the...,1,"[0.039840616285800934, 0.024724069982767105, -...","{'negative': 0.3398418128490448, 'neutral': 0....",-0.294519,"{'negative': 0.6277590095996857, 'neutral': 0....",-0.508282,1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[4.481623649597168, 6.564354419708252]",6,0.340284,2.106874
2,Exceptional murder-mystery writer scores big. ...,1,"[0.0034261085093021393, 0.015346183441579342, ...","{'negative': 0.01553837489336729, 'neutral': 0...",0.147671,"{'negative': 0.03976318798959255, 'neutral': 0...",0.853715,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-2.7307851314544678, 5.000671863555908]",2,0.895587,2.394373
3,all the prodding questions you never wanted to...,1,"[-0.0014260949101299047, 0.02775280550122261, ...","{'negative': 0.07864289730787277, 'neutral': 0...",-0.038792,"{'negative': 0.05024813301861286, 'neutral': 0...",0.831368,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-2.088693141937256, 4.060979843139648]",2,0.634214,1.903023
4,Best Wok ever I am so glad I bought this wok. ...,1,"[-0.006502537056803703, -0.06767161935567856, ...","{'negative': 0.032922014594078064, 'neutral': ...",0.025279,"{'negative': 0.04409042187035084, 'neutral': 0...",0.891844,1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[6.155905246734619, 5.463229656219482]",7,0.945293,2.263716


In [147]:
ama.to_pickle(os.path.join(DATA_PATH, 'ama.pkl'))
ama.to_csv(os.path.join(DATA_PATH, 'ama.csv'), index=False)

In [None]:
# filter dataframe to keep 5 texts closer to centroid for each label (CB)

closest_rows = []
# Loop over each unique cluster
for cluster_label in ama['cluster'].unique():
    # For each cluster, loop over both labels 0.0 and 1.0
    for label_value in [0.0, 1.0]:
        # Filter by cluster and label
        subset_df = ama[(ama['cluster'] == cluster_label) & (ama['label'] == label_value)]
        # Sort by distance to centroid
        sorted_subset = subset_df.sort_values(by='distance_to_centroid', ascending=True)
        # Take the top 20 closest rows
        closest_20 = sorted_subset.head(20)
        # Add to list
        closest_rows.append(closest_20)
# Combine all rows into a single DataFrame
filtered_ama = pandas.concat(closest_rows)
# Display the result
display(filtered_ama)

Unnamed: 0,text,label,embedding,sentiment_finbert,sentiment_score_finbert,sentiment_distilbert,sentiment_score_distilbert,random_label,reduced_embedding,2D_embedding,cluster,cluster_prob,distance_to_centroid
7301,Agggh I've done other power yoga videos and li...,0,"[-0.019162150099873543, 0.053017064929008484, ...","{'negative': 0.3332855701446533, 'neutral': 0....",-0.305439,"{'negative': 0.5407126396894455, 'neutral': 0....",-0.330073,1.0,"[0.031794119626283646, 0.0, 0.0, 0.0, 0.0, 0.0...","[0.636360764503479, 4.51371955871582]",14,0.883163,1.353908
9628,Being a resident of Chicago ... Being a reside...,0,"[0.0030176916625350714, 0.03323870897293091, -...","{'negative': 0.020796045660972595, 'neutral': ...",0.178883,"{'negative': 0.23477348685264587, 'neutral': 0...",0.220389,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.47295692563056946, 4.468923568725586]",14,0.353870,1.358502
7828,Disappointing It is a bit disappointing. I am ...,0,"[0.0189848430454731, 0.028111610561609268, -0....","{'negative': 0.10270992666482925, 'neutral': 0...",-0.032657,"{'negative': 0.4646166246384382, 'neutral': 0....",-0.195348,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.6561423540115356, 4.542280197143555]",14,0.494276,1.536280
9488,This is NOT Walk Away The Pounds! These videos...,0,"[0.0033417053055018187, 0.0167968962341547, -0...","{'negative': 0.09457153081893921, 'neutral': 0...",-0.063634,"{'negative': 0.4083065390586853, 'neutral': 0....",-0.050857,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.6596367955207825, 4.516697406768799]",14,0.761211,1.537489
8749,Too Expensive to be this Bad There was very li...,0,"[-0.01316519733518362, 0.0342794805765152, -0....","{'negative': 0.14763221144676208, 'neutral': 0...",-0.114138,"{'negative': 0.7387669682502747, 'neutral': 0....",-0.660461,1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-1.2867707014083862, 7.992288589477539]",14,0.227459,1.556176
...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,Great fun for the family This is a great produ...,1,"[0.07040414214134216, 0.04351133480668068, -0....","{'negative': 0.5203864574432373, 'neutral': 0....",-0.491451,"{'negative': 0.3270013853907585, 'neutral': 0....",-0.069696,1.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[3.884082794189453, 6.503241539001465]",12,0.199362,1.294788
4699,Zippo It is a Zippo lighter. That should be en...,1,"[0.03679865971207619, 0.040332991629838943, -0...","{'negative': 0.014785508625209332, 'neutral': ...",0.050489,"{'negative': 0.06711530312895775, 'neutral': 0...",0.113204,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[5.325911045074463, 6.569398880004883]",12,0.139713,1.339185
1651,Excellent book Fortunate are those to have had...,1,"[-0.005665568634867668, -0.03156633675098419, ...","{'negative': 0.009948411956429482, 'neutral': ...",0.179583,"{'negative': 0.08257834240794182, 'neutral': 0...",0.714408,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-1.6717902421951294, 4.163718223571777]",12,0.221224,1.489687
3072,Christmas Kindle We bought the Kindle for our ...,1,"[0.04128319397568703, 0.05310608074069023, -0....","{'negative': 0.012201996520161629, 'neutral': ...",0.131136,"{'negative': 0.04868211783468723, 'neutral': 0...",0.818670,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.3331199288368225, 3.6129608154296875]",12,0.665226,1.537129


In [149]:
filtered_ama.to_pickle(os.path.join(DATA_PATH, 'filtered_ama.pkl'))
filtered_ama.to_csv(os.path.join(DATA_PATH, 'filtered_ama.csv'), index=False)

### xAI on default CB

In [150]:
cluster_xAI_ama_default = {}
for cluster in np.sort(filtered_ama['cluster'].unique()):

    text1 = filtered_ama[(filtered_ama['cluster'] == cluster) & (filtered_ama['label'] == 0.0)]['text'].tolist()
    text2 = filtered_ama[(filtered_ama['cluster'] == cluster) & (filtered_ama['label'] == 1.0)]['text'].tolist()

    prompt = generate_narrative_comparison_prompt(text1, text2)
    model = genai.GenerativeModel(model_name="gemini-2.5-flash")
    response = model.generate_content(prompt)
    # print(response.text)
    cluster_xAI_ama_default[cluster] = response_random.text

    parsed_llm_out = safe_json_load(response.text)
    with open(os.path.join(DATA_PATH, f"xAI/ama/response_default_cluster_{cluster}.json"), "w", encoding="utf-8") as f:
            json.dump(parsed_llm_out, f, ensure_ascii=False, indent=2)

In [227]:
with open(os.path.join(DATA_PATH, 'xAI/ama/response_default.pkl'), 'wb') as f:
    pickle.dump(cluster_xAI_ama_default, f)

cluster_xAI_ama_default

{np.int32(0): '```json\n[\n  {\n    "dimension_title": "Specific Policy Proposals to Holistic Economic Strategy",\n    "text1_sentence_count": "24 out of 289",\n    "text2_sentence_count": "33 out of 597",\n    "confidence_label_text1": "High",\n    "confidence_label_text2": "High",\n    "representation_text1": "Representative",\n    "representation_text2": "Representative",\n    "text1_number_contradicting_sentences": "0 out of 289",\n    "text2_number_contradicting_sentences": "0 out of 597",\n    "text1_evidence_sentences": [\n      "I will propose a joint approach that treats both programs alike and applies to them the same retirement ages and tax arrangements.",\n      "This joint proposal fully corrects the long-term actuarial deficit for Social Security and would also make a start on a solution for Medicare.",\n      "One of my goals is to standardize treatment across the programs, and I would do that by removing the $NUM,NUM cap on wages and salaries that are taxable for Social

In [223]:
clusters = np.sort(filtered_us['cluster'].unique())
analyze_xAI_files(clusters=clusters, folder_name="ama", cb_type="default")

([1, 0, 1, 0, 0, 0, 2, 2, 0, 0, 0],
 0.5806998939554613,
 0.5914373814041746,
 0.03719512195121952,
 0.041186635944700456)

### xAI on random CB

In [None]:
# filter dataframe to keep 5 texts closer to centroid for each random_label (CB)

closest_rows = []
# Loop over each unique cluster
for cluster_label in ama['cluster'].unique():
    # For each cluster, loop over both labels 0.0 and 1.0
    for label_value in [0.0, 1.0]:
        # Filter by cluster and label
        subset_df = ama[(ama['cluster'] == cluster_label) & (ama['random_label'] == label_value)]
        # Sort by distance to centroid
        sorted_subset = subset_df.sort_values(by='distance_to_centroid', ascending=True)
        # Take the top 20 closest rows
        closest_20 = sorted_subset.head(20)
        # Add to list
        closest_rows.append(closest_20)
# Combine all rows into a single DataFrame
filtered_ama_random = pandas.concat(closest_rows)

In [154]:
filtered_ama_random.to_pickle(os.path.join(DATA_PATH, 'filtered_ama_random.pkl'))
filtered_ama_random.to_csv(os.path.join(DATA_PATH, 'filtered_ama_random.csv'), index=False)

In [155]:
cluster_xAI_ama_random = {}
for cluster in np.sort(filtered_ama_random['cluster'].unique()):

        text1_random = filtered_ama_random[(filtered_ama_random['cluster'] == cluster) & (filtered_ama_random['random_label'] == 0.0)]['text'].tolist()
        text2_random = filtered_ama_random[(filtered_ama_random['cluster'] == cluster) & (filtered_ama_random['random_label'] == 1.0)]['text'].tolist()

        prompt = generate_narrative_comparison_prompt(text1=text1_random, text2=text2_random)
        model = genai.GenerativeModel(model_name="gemini-2.5-flash")
        response_random = model.generate_content(prompt)
        # print(response_random.text)
        cluster_xAI_ama_random[cluster] = response_random.text

        parsed_llm_out = safe_json_load(response_random.text)
        with open(os.path.join(DATA_PATH, f"xAI/ama/response_random_cluster_{cluster}.json"), "w", encoding="utf-8") as f:
                json.dump(parsed_llm_out, f, ensure_ascii=False, indent=2)

In [228]:
with open(os.path.join(DATA_PATH, 'xAI/ama/response_random.pkl'), 'wb') as f:
    pickle.dump(cluster_xAI_ama_random, f)

cluster_xAI_ama_random

{np.int32(0): '[]',
 np.int32(1): '```json\n[\n  {\n    "dimension_title": "Subjective Aesthetic Judgment to Objective Explanatory Detail",\n    "text1_sentence_count": "12 out of 20",\n    "text2_sentence_count": "9 out of 17",\n    "confidence_label_text1": "High",\n    "confidence_label_text2": "High",\n    "representation_text1": "Representative",\n    "representation_text2": "Representative",\n    "text1_number_contradicting_sentences": "0 out of 20",\n    "text2_number_contradicting_sentences": "0 out of 17",\n    "text1_evidence_sentences": [\n      "Pass on this one This movie started off funny and then d r a g g e d.",\n      "I didn\'t even finish it! Pass.",\n      "Magnificent movie !!",\n      "Magnífica película !!",\n      "Superbe film !!",\n      "Los actores son muy impresionante en The Green Mile y la cualidad de las imagenes son espléndida.",\n      "Una película que es necesario comprar !!!",\n      "too much dramatization and unbelieveable from time to time this m

In [224]:
clusters = np.sort(filtered_us['cluster'].unique())
analyze_xAI_files(clusters=clusters, folder_name="ama", cb_type="random")

([0, 2, 1, 0, 2, 1, 0, 0, 2, 0, 1],
 0.44898538961038953,
 0.4608447745478301,
 0.05175865800865801,
 0.023477812177502583)