In [2]:
import json
import sys

file_path = "visualization_analysis/enhanced_gallery.json"

# Read the JSON file
data = None
with open(file_path, 'r') as file:
    data = json.load(file)
print(f"Loaded data from {file_path}")
if isinstance(data, dict):
    print("Top-level keys:", list(data.keys()))
elif isinstance(data, list):
    print(f"Loaded list with {len(data)} items")
    print("First item preview:", json.dumps(data[0], indent=2) if data else "List is empty")


Loaded data from visualization_analysis/enhanced_gallery.json
Top-level keys: ['gallery', 'track_to_person', 'person_counter', 'statistics', 'metadata']


In [2]:
print("Type of gallery:", type(data['gallery']))
if isinstance(data['gallery'], dict):
    print("Gallery keys:", list(data['gallery'].keys())[:5])
elif isinstance(data['gallery'], list):
    print("Gallery size:", len(data['gallery']))
    print("First item:", json.dumps(data['gallery'][0], indent=2))


Type of gallery: <class 'dict'>
Gallery keys: ['Prachit', 'Ashutosh', 'Nayan']


In [3]:
print("Sample track_to_person mapping:")
for track_id, person_id in list(data['track_to_person'].items())[:5]:
    print(f"Track {track_id} → Person {person_id}")


Sample track_to_person mapping:
Track 1 → Person A
Track 2 → Person B
Track 3 → Person C
Track 4 → Person A
Track 5 → Person B


In [4]:
print("Person counter:", data['person_counter'])


Person counter: 1


In [5]:
print("Statistics overview:", json.dumps(data['statistics'], indent=2))


Statistics overview: {
  "total_embeddings_stored": 47,
  "contexts_captured": [
    "forward_front",
    "left_front",
    "right_front",
    "steady_front",
    "backward_front"
  ]
}


In [6]:
print("Metadata:", json.dumps(data['metadata'], indent=2))


Metadata: {
  "max_embeddings_per_context": 20,
  "similarity_threshold": 0.6,
  "min_confidence": 0.3,
  "save_timestamp": "2025-07-09T17:58:11.777116"
}


In [7]:
for key in data:
    print(f"\n===== {key.upper()} =====")
    value = data[key]
    if isinstance(value, dict):
        print(f"{key} has {len(value)} keys")
        for i, (k, v) in enumerate(value.items()):
            print(f"{k}: {str(v)[:100]}")
            if i == 2: break
    elif isinstance(value, list):
        print(f"{key} is a list with {len(value)} items")
        print("First item:", json.dumps(value[0], indent=2) if value else "Empty list")
    else:
        print(f"{key} = {value}")



===== GALLERY =====
gallery has 3 keys
A: {'person_name': 'A', 'embeddings_by_context': {'backward_front': [{'embedding': [0.06711713969707489
B: {'person_name': 'B', 'embeddings_by_context': {'right_front': [{'embedding': [-0.02666701190173626, 
C: {'person_name': 'C', 'embeddings_by_context': {'steady_front': [{'embedding': [-0.000404779188102111

===== TRACK_TO_PERSON =====
track_to_person has 5 keys
1: A
2: B
3: C

===== PERSON_COUNTER =====
person_counter = 1

===== STATISTICS =====
statistics has 2 keys
total_embeddings_stored: 47
contexts_captured: ['forward_front', 'left_front', 'right_front', 'steady_front', 'backward_front']

===== METADATA =====
metadata has 4 keys
max_embeddings_per_context: 20
similarity_threshold: 0.6
min_confidence: 0.3


In [8]:
from collections import defaultdict

# Initialize a defaultdict to collect embeddings by context
embeddings_by_context = defaultdict(list)

# Loop through all persons in the gallery
for person_id, person_data in data['gallery'].items():
    embeddings = person_data.get('embeddings_by_context', {})
    for context, context_embeddings in embeddings.items():
        for item in context_embeddings:
            embedding = item.get('embedding')
            if embedding:
                embeddings_by_context[context].append(embedding)

# Convert defaultdict to regular dict (optional)
embeddings_by_context = dict(embeddings_by_context)

# Print summary
for context, embeddings in embeddings_by_context.items():
    print(f"{context}: {len(embeddings)} embeddings")


backward_front: 5 embeddings
forward_front: 3 embeddings
right_front: 27 embeddings
left_front: 11 embeddings
steady_front: 1 embeddings


In [15]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def get_person_embeddings(person_id):
    return data['gallery'][person_id]['embeddings_by_context']

def compute_average_similarity(person_a_embeddings, person_b_embeddings):
    similarities = {}
    shared_contexts = set(person_a_embeddings.keys()) & set(person_b_embeddings.keys())

    for context in shared_contexts:
        a_vecs = [e['embedding'] for e in person_a_embeddings[context]]
        b_vecs = [e['embedding'] for e in person_b_embeddings[context]]

        # Convert to NumPy arrays for cosine similarity
        a_array = np.array(a_vecs)
        b_array = np.array(b_vecs)

        # Compute pairwise cosine similarities
        sim_matrix = cosine_similarity(a_array, b_array)
        avg_similarity = sim_matrix.mean()

        similarities[context] = avg_similarity

    return similarities

# Get embeddings
person_a_embeddings = get_person_embeddings('Prachit')
person_b_embeddings = get_person_embeddings('Ashutosh')

# Compute similarity
similarity_scores = compute_average_similarity(person_a_embeddings, person_b_embeddings)

# Print results
for context, score in similarity_scores.items():
    print(f"Context '{context}': Average Cosine Similarity = {score:.4f}")

if not similarity_scores:
    print("No shared contexts between Person A and Person B.")


Context 'backward_front': Average Cosine Similarity = 0.8965
Context 'right_front': Average Cosine Similarity = 0.8372
Context 'left_front': Average Cosine Similarity = 0.8942


In [3]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import itertools

# Function to get embeddings for one person
def get_person_embeddings(person_id):
    return data['gallery'][person_id]['embeddings_by_context']

# Compute average cosine similarity across shared contexts
def compute_avg_context_similarity(embeddings_a, embeddings_b):
    shared_contexts = set(embeddings_a.keys()) & set(embeddings_b.keys())
    all_similarities = []

    for context in shared_contexts:
        a_vecs = [e['embedding'] for e in embeddings_a[context]]
        b_vecs = [e['embedding'] for e in embeddings_b[context]]

        if not a_vecs or not b_vecs:
            continue

        a_array = np.array(a_vecs)
        b_array = np.array(b_vecs)

        sim_matrix = cosine_similarity(a_array, b_array)
        avg_sim = sim_matrix.mean()
        all_similarities.append(avg_sim)

    if all_similarities:
        return np.mean(all_similarities)
    else:
        return None  # No shared context

# Get all person IDs
person_ids = list(data['gallery'].keys())

# Initialize similarity matrix
similarity_matrix = {}

for person1 in person_ids:
    similarity_matrix[person1] = {}
    for person2 in person_ids:
        emb1 = get_person_embeddings(person1)
        emb2 = get_person_embeddings(person2)
        sim = compute_avg_context_similarity(emb1, emb2)
        similarity_matrix[person1][person2] = round(sim, 4) if sim is not None else None

# Print matrix
print("\nSimilarity Matrix:")
header = "      " + "  ".join([f"{pid:>5}" for pid in person_ids])
print(header)
for pid1 in person_ids:
    row = f"{pid1:>5}  "
    for pid2 in person_ids:
        val = similarity_matrix[pid1][pid2]
        row += f"{val if val is not None else ' N/A':>5}  "
    print(row)



Similarity Matrix:
      Prachit  Ashutosh  Nayan
Prachit  0.9188  0.819  0.8517  
Ashutosh  0.819  0.9378  0.8361  
Nayan  0.8517  0.8361  0.9233  
