## Visualize Blob in a Single Protein

This notebook extract the blob composition from the result file and create chimerax commands for visualization.

In [1]:
import json

In [3]:
inter_json_path = "outputs/codebook-resume/enzymecommission/structure/2025-09-19-12-42-17/interpretability/partoken_final.json"
inter_dict = json.load(open(inter_json_path, "r"))
print(inter_dict.keys())

dict_keys(['protein_analyses', 'aggregated_stats', 'metadata'])


### Aggregated Results

In [8]:
for k, v in inter_dict['aggregated_stats'].items():
    print(k, v)

total_proteins 1556
accuracy 0.6902313624678663
avg_confidence 0.6735339567907979
avg_clusters_per_protein 4.987146529562982
avg_importance_concentration 0.33428449563289575
avg_coverage 0.347267196728633
avg_cluster_size 14.575454785273624
avg_max_importance 0.4698982443194837
correct_vs_incorrect {'correct': {'count': 1074, 'avg_confidence': 0.7069598569827816, 'avg_concentration': 0.335618876538543}, 'incorrect': {'count': 482, 'avg_confidence': 0.5990538389356305, 'avg_concentration': 0.33131120705890166}}


### Single Protein Result 

In [12]:
# display the first protein in the test ste 
first_protein = inter_dict['protein_analyses'][0]
for k, v in first_protein.items():
    print(k, v)

protein_idx 0
prediction 2
true_label 2
probabilities [0.0259123332798481, 0.11287898570299149, 0.7317851185798645, 0.06568598747253418, 0.04295209050178528, 0.00814878847450018, 0.01263667456805706]
confidence 0.7317851185798645
is_correct True
num_valid_clusters 5
cluster_sizes [15.0, 15.0, 15.0, 15.0, 8.0]
importance_scores [0.2417275756597519, 0.25, 0.2582629919052124, 0.25, 9.436298569198698e-06]
importance_concentration 0.1387512445609932
all_clusters {'indices': [2, 1, 3, 0, 4], 'importance_scores': [0.2582629919052124, 0.25, 0.25, 0.2417275756597519, 9.436298569198698e-06], 'sizes': [15.0, 15.0, 15.0, 15.0, 8.0]}
cluster_composition {'2': [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 96, 99, 100, 101], '1': [62, 63, 64, 65, 66, 67, 71, 72, 73, 74, 75, 76, 77, 78, 79], '3': [41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 57], '0': [1, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40], '4': [58, 59, 60, 61, 68, 69, 70, 80]}
batch_idx 0
global_protein_idx 0


## Visualizing Blob Partition

In [9]:
# Add a new Python cell after the existing one (e.g., after #VSC-9e20bca7) with the following code:
color_list = ['#ff4500', '#90ee90', '#40e0d0', '#daa520', '#ff69b4']

def generate_chimerax_select_commands(cluster_composition, model_id=1, chain='A'):
    """
    Generate ChimeraX select commands for each cluster based on residue lists.
    
    Args:
        cluster_composition (dict): Dictionary with cluster names as keys and lists of residue numbers as values.
        model_id (int): Model number in ChimeraX (default 1).
        chain (str): Chain identifier (default 'A').
    
    Returns:
        dict: Dictionary with cluster names as keys and select commands as values.
    """
    commands = {}
    for cluster, residues in cluster_composition.items():
        residues_sorted = sorted(residues)
        residue_str = ','.join(map(str, residues_sorted))
        select_command = f"select #{model_id}/{chain}:{residue_str}"
        commands[cluster] = select_command
    return commands


In [13]:
item_index = 2
protein_analysis = inter_dict['protein_analyses']
protein = protein_analysis[2]
print(first_protein.keys())
# cluster composition
print(protein['cluster_composition'])
cluster_composition = protein['cluster_composition']
commands = generate_chimerax_select_commands(cluster_composition, item_index)
print(f"color #{item_index} gray")
for cluster, cmd in sorted(commands.items(), key=lambda x: int(x[0])):
    color = color_list[int(cluster)]
    print(f"Cluster {cluster}:\n {cmd}")
    print(f"color sel {color}")

dict_keys(['protein_idx', 'prediction', 'true_label', 'probabilities', 'confidence', 'is_correct', 'num_valid_clusters', 'cluster_sizes', 'importance_scores', 'importance_concentration', 'all_clusters', 'cluster_composition', 'batch_idx', 'global_protein_idx'])
{'2': [11, 12, 29, 30, 33, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53], '4': [123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137], '0': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 40, 41, 42, 43], '1': [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 57], '3': [60, 61, 62, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 93]}
color #2 gray
Cluster 0:
 select #2/A:0,1,2,3,4,5,6,7,8,9,10,40,41,42,43
color sel #ff4500
Cluster 1:
 select #2/A:13,14,15,16,17,18,19,20,21,22,23,24,25,26,57
color sel #90ee90
Cluster 2:
 select #2/A:11,12,29,30,33,44,45,46,47,48,49,50,51,52,53
color sel #40e0d0
Cluster 3:
 select #2/A:60,61,62,67,68,69,70,71,72,73,74,75,76,77,93
color sel #daa520
Cluster 4:
 select #2/A:123,124,125,126,127,1