# visualize surprisal development


In [25]:

import json
import typing as t
from pathlib import Path
from collections import Counter

class JsonProcessor:
    """Class for handling JSON serialization with NumPy type conversion."""

    @staticmethod
    def convert_numpy_types(obj: t.Any) -> t.Any:
        """Recursively convert NumPy types and custom objects in a nested structure to standard Python types."""
        # Handle None
        if obj is None:
            return None

        # Handle SearchResult objects
        if hasattr(obj, "__class__") and obj.__class__.__name__ == "SearchResult":
            # Convert SearchResult to dict
            result_dict = {
                "neurons": JsonProcessor.convert_numpy_types(obj.neurons),
                "delta_loss": JsonProcessor.convert_numpy_types(obj.delta_loss),
            }
            if hasattr(obj, "is_target_size"):
                result_dict["is_target_size"] = obj.is_target_size
            return result_dict

        # Handle NumPy arrays
        if isinstance(obj, np.ndarray):
            return JsonProcessor.convert_numpy_types(obj.tolist())

        # Handle NumPy scalars
        if isinstance(obj, (np.floating, np.float16, np.float32, np.float64)):
            return float(obj)
        if isinstance(
            obj, (np.integer, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64)
        ):
            return int(obj)
        if isinstance(obj, np.bool_):
            return bool(obj)
        if isinstance(obj, np.complex128):
            return complex(obj)

        # Handle Path objects
        if isinstance(obj, Path):
            return str(obj)

        # Handle dictionaries
        if isinstance(obj, dict):
            return {JsonProcessor.convert_numpy_types(k): JsonProcessor.convert_numpy_types(v) for k, v in obj.items()}

        # Handle lists and tuples
        if isinstance(obj, (list, tuple)):
            return [JsonProcessor.convert_numpy_types(item) for item in obj]

        # Handle sets by converting to list
        if isinstance(obj, set):
            return [JsonProcessor.convert_numpy_types(item) for item in obj]

        # Handle objects with a to_dict method
        if hasattr(obj, "to_dict") and callable(obj.to_dict):
            return JsonProcessor.convert_numpy_types(obj.to_dict())

        # Handle objects with __dict__ attribute
        if hasattr(obj, "__dict__") and not isinstance(obj, type):
            return JsonProcessor.convert_numpy_types(obj.__dict__)

        # Return the object as is if no conversion is needed
        return obj

    @classmethod
    def save_json(cls, data: dict, filepath: Path) -> None:
        """Save a nested dictionary with float values to a file."""
        filepath.parent.mkdir(parents=True, exist_ok=True)
        converted_data = cls.convert_numpy_types(data)
        with open(filepath, "w") as f:
            json.dump(converted_data, f, indent=2)

    @staticmethod
    def load_json(filepath: Path) -> dict:
        """Load a JSON file into a dictionary."""
        with open(filepath, encoding="utf-8") as f:
            return json.load(f)


In [36]:
data_path = Path("/Users/jliu/workspace/RAG/results/classify/70.json")


In [37]:
# load freq file
#len(data['neuron_features'].values())
data = JsonProcessor.load_json(data_path)
len_lst = []
index_lst = []
for index, fea in data["neuron_features"].items():
    len_lst.append(len(fea))
    index_lst.append(index)

dict(Counter(len_lst))

{112: 1660, 448: 2, 336: 16, 224: 166}

In [38]:
data.keys()

dict_keys(['step_num', 'neuron_features', 'delta_losses', 'metadata'])

In [28]:
import pandas as pd

In [29]:
df_path = "/Users/jliu/workspace/RAG/results/ablations/entropy_df.csv"

In [30]:
data = pd.read_csv(df_path, nrows=100)

In [33]:
for i in list(data.columns):
    print(i)

Unnamed: 0
str_tokens
unique_token
context
batch
pos
label
token_id
entropy
top_logit
pred
loss
top_logp
ln_final_scale
rank_of_correct_token
correct_token_rank
pred_in_top1
pred_in_top5
5.0_activation
5.1_activation
5.2_activation
5.3_activation
5.4_activation
5.5_activation
5.6_activation
5.7_activation
5.8_activation
5.9_activation
5.10_activation
5.11_activation
5.12_activation
5.13_activation
5.14_activation
5.15_activation
5.16_activation
5.17_activation
5.18_activation
5.19_activation
5.20_activation
5.21_activation
5.22_activation
5.23_activation
5.24_activation
5.25_activation
5.26_activation
5.27_activation
5.28_activation
5.29_activation
5.30_activation
5.31_activation
5.32_activation
5.33_activation
5.34_activation
5.35_activation
5.36_activation
5.37_activation
5.38_activation
5.39_activation
5.40_activation
5.41_activation
5.42_activation
5.43_activation
5.44_activation
5.45_activation
5.46_activation
5.47_activation
5.48_activation
5.49_activation
5.50_activation
5.51_ac