## Gaze Analysis from FacesDir JSON files

In [7]:
# Step 1: Extract + flatten JSONs from tar archives.
# Step 2: Merge JSONs per participant with metadata.
# Step 3: Load merged JSONs into nested DataFrames.
# Step 4: Compute gaze vectors and nearest-person attention.
# Step 5: Aggregate dwell time per participant.

import json
import pandas as pd
import numpy as np
from pathlib import Path
from collections import Counter
import math
import re
import tarfile
import shutil

# --- Config ---
FPS = int(1800 / 120)  # frames per second
base_path = Path("/home/liubov/Bureau/new")
directory = '8-5-2024_#18_INDIVIDUAL_[12]/processed by chunks'
json_folder = 'FacesDir'
directory_path = base_path / directory / json_folder
mapping_csv = base_path / directory / "first_id_mapping_camera_a.csv"

# --- Load mapping ---
print("Loading and preparing data mapping...")
try:
    # Use a hardcoded mapping to ensure correct IDs
    manual_mapping = {'1': 'Patient_1', '2': 'Therapist_1', '3': 'Therapist_2'}
    mapping_df = pd.DataFrame(manual_mapping.items(), columns=['original_id', 'updated_id'])

    # Create ID to camera mapping based on hardcoded values
    id_to_camera = {1: 'Camera_A', 2: 'Camera_B', 3: 'Camera_C'} # Assuming a simple camera mapping
    print("Mapping prepared using hardcoded values.")

except Exception as e:
    print(f"Error creating ID to camera mapping: {e}")
    id_to_camera = {}
    mapping_df = pd.DataFrame(columns=['original_id', 'updated_id'])

# ---------------- 1. Extract tar.gz and flatten Face JSONs ----------------
print("\nStep 1: Extracting and flattening JSONs from archives...")
face_files_with_id = []
chunk_dirs = [d for d in directory_path.iterdir() if d.is_dir()]

for chunk_dir in chunk_dirs:
    for tar_file in chunk_dir.glob("*.tar.gz"):
        archive_name = tar_file.stem.strip()
        try:
            archive_id = int(archive_name)
        except ValueError:
            number_match = re.search(r'(\d+)', archive_name)
            if number_match:
                archive_id = int(number_match.group(1))
            else:
                continue

        extract_dir = chunk_dir / f"{tar_file.stem}_extracted"
        extract_dir.mkdir(exist_ok=True)
        try:
            with tarfile.open(tar_file, "r:gz") as tar:
                for member in tar.getmembers():
                    member.path = Path(member.name).name
                    tar.extract(member, path=extract_dir)
        except Exception as e:
            continue

        for jf in extract_dir.rglob("*.json"):
            final_path = directory_path / jf.name
            if final_path.exists():
                final_path.unlink()
            shutil.move(str(jf), final_path)
            face_files_with_id.append((final_path, archive_id))

        shutil.rmtree(extract_dir)
print(f"Extracted a total of {len(face_files_with_id)} JSON files.")

# ---------------- 2. Merge JSONs per participant with metadata ----------------
def get_frame_number_from_face_file(file_path):
    name = file_path.name.strip()
    frame_patterns = [
        r'frame_(\d+)\.json$', r'(\d+)\.json$', r'face_(\d+)\.json$', r'faces_(\d+)\.json$'
    ]
    for pattern in frame_patterns:
        m = re.search(pattern, name)
        if m:
            return int(m.group(1))
    return float('inf')

def frame_to_timestamp(frame_number, fps=FPS):
    return frame_number / fps if fps > 0 else 0

print("\nStep 2: Merging JSON data per participant...")
faces_by_id = {}
for file_path, archive_id in face_files_with_id:
    if archive_id not in faces_by_id:
        faces_by_id[archive_id] = []
    faces_by_id[archive_id].append(file_path)

for id_key in faces_by_id:
    faces_by_id[id_key].sort(key=get_frame_number_from_face_file)

merged_faces_folder = directory_path / "merged_faces"
merged_faces_folder.mkdir(exist_ok=True)

for archive_id, file_list in faces_by_id.items():
    merged_face_data = []
    camera_id = id_to_camera.get(archive_id, f"unknown_camera_{archive_id}")
    for file_path in file_list:
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
                frame_value = get_frame_number_from_face_file(file_path)
                timestamp = frame_to_timestamp(frame_value, FPS)
                metadata = {
                    'participant_id': archive_id,
                    'camera_id': camera_id,
                    'frame_number': frame_value,
                    'timestamp_seconds': timestamp,
                    'fps': FPS,
                    'source_file': file_path.name
                }
                if isinstance(data, dict):
                    data.update(metadata)
                    if 'faces' in data and isinstance(data['faces'], list):
                        for i, face in enumerate(data['faces']):
                            if isinstance(face, dict):
                                face.update(metadata)
                                face['face_id'] = f"{archive_id}_{frame_value}_{i}"
                    elif 'detections' in data and isinstance(data['detections'], list):
                        for i, detection in enumerate(data['detections']):
                            if isinstance(detection, dict):
                                detection.update(metadata)
                                detection['face_id'] = f"{archive_id}_{frame_value}_{i}"
                    # This is the new check for the specific file format you provided
                    elif 'face_keypoints_3d' in data and isinstance(data['face_keypoints_3d'], list) and len(data['face_keypoints_3d']) > 0:
                        data['face_id'] = f"{archive_id}_{frame_value}_0"
                elif isinstance(data, list):
                    for i, entry in enumerate(data):
                        if isinstance(entry, dict):
                            entry.update(metadata)
                            entry['face_id'] = f"{archive_id}_{frame_value}_{i}"
                merged_face_data.append(data)
        except (json.JSONDecodeError, UnicodeDecodeError, Exception):
            continue
    if merged_face_data:
        output_file = merged_faces_folder / f"faces_id_{archive_id}_camera_{camera_id}_merged.json"
        with open(output_file, "w", encoding="utf-8") as f:
            json.dump(merged_face_data, f, indent=4, ensure_ascii=False)
print(f"Created {len(faces_by_id)} merged files.")

# ---------------- 3. Compute and save summary statistics ----------------
print("\nStep 3: Computing summary statistics...")
summary_stats = {}
for archive_id, file_list in faces_by_id.items():
    camera_id = id_to_camera.get(archive_id, f"unknown_camera_{archive_id}")
    total_frames = len(file_list)
    frames_with_faces = 0
    total_faces_detected = 0
    max_faces_in_frame = 0
    for file_path in file_list:
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
                faces_in_frame = 0
                if isinstance(data, dict):
                    if 'faces' in data and isinstance(data['faces'], list):
                        faces_in_frame = len(data['faces'])
                    elif 'detections' in data and isinstance(data['detections'], list):
                        faces_in_frame = len(data['detections'])
                    # This is the new logic to handle your file format
                    elif 'face_keypoints_3d' in data and isinstance(data['face_keypoints_3d'], list) and len(data['face_keypoints_3d']) > 0:
                        faces_in_frame = 1 # We assume one face per file in this format
                elif isinstance(data, list):
                    faces_in_frame = len(data)
                if faces_in_frame > 0:
                    frames_with_faces += 1
                    total_faces_detected += faces_in_frame
                    max_faces_in_frame = max(max_faces_in_frame, faces_in_frame)
        except Exception:
            continue
    avg_faces_per_frame = total_faces_detected / frames_with_faces if frames_with_faces > 0 else 0
    summary_stats[f"id_{archive_id}"] = {
        "participant_id": archive_id,
        "camera_id": camera_id,
        "total_frames": total_frames,
        "frames_with_faces": frames_with_faces,
        "total_faces_detected": total_faces_detected,
        "max_faces_in_frame": max_faces_in_frame,
        "avg_faces_per_frame": round(avg_faces_per_frame, 2),
        "face_detection_rate": round(frames_with_faces / total_frames, 4) if total_frames > 0 else 0,
        "fps_config": FPS
    }
print("Summary statistics computed.")

# ---------------- 4. Cleanup intermediate JSONs ----------------
print("\nStep 4: Cleaning up intermediate files...")
all_face_files = [file for file_list in faces_by_id.values() for file in file_list]
for file_path in all_face_files:
    if file_path.exists():
        file_path.unlink()
print("Intermediate files removed.")

# ---------------- 5. Load merged JSONs into nested DataFrames and compute gaze vectors ----------------
print("\nStep 5: Loading data into DataFrames and computing gaze metrics...")
def load_and_process(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        data = json.load(f)
    df = pd.DataFrame(data)
    for col in ["camera_id", "fps", "source_file"]:
        if col in df.columns:
            df.drop(columns=col, inplace=True)
    keypoint_cols = ["eye_right_keypoints_3d", "eye_left_keypoints_3d", "face_keypoints_3d"]
    
    def make_tuple_list(values):
        if not isinstance(values, list) or not values:
            return []
        
        # Check if the list contains a nested list or tuple, which is the error
        if isinstance(values[0], (list, tuple)):
            # Flatten the list if it's nested
            flat_list = [item for sublist in values for item in sublist]
        else:
            flat_list = values
        
        if len(flat_list) % 3 != 0:
            return []
            
        return [(flat_list[i], flat_list[i+1], flat_list[i+2]) for i in range(0, len(flat_list), 3)]

    processed = pd.DataFrame()
    for col in keypoint_cols:
        processed[col] = df[col].apply(make_tuple_list)
    processed["participant_id"] = df["participant_id"]
    processed["frame_number"] = df["frame_number"]
    processed["timestamp_seconds"] = df["timestamp_seconds"]
    return processed

merged_folder = directory_path / "merged_faces"
all_jsons = list(merged_folder.glob("*.json"))
processed_dfs = {}

mapping_dict = dict(zip(mapping_df['original_id'], mapping_df['updated_id']))
for original_id_str, updated_id_str in mapping_dict.items():
    try:
        original_id_int = int(original_id_str)
        matched_files = [f for f in all_jsons if f"faces_id_{original_id_int}_" in f.name]
        if matched_files:
            processed_dfs[updated_id_str] = load_and_process(matched_files[0])
    except ValueError:
        continue



Loading and preparing data mapping...
Mapping prepared using hardcoded values.

Step 1: Extracting and flattening JSONs from archives...
Extracted a total of 35376 JSON files.

Step 2: Merging JSON data per participant...
Created 3 merged files.

Step 3: Computing summary statistics...
Summary statistics computed.

Step 4: Cleaning up intermediate files...
Intermediate files removed.

Step 5: Loading data into DataFrames and computing gaze metrics...
Gaze metrics computed and attention log created.

Step 6: Aggregating dwell times...
Dwell times aggregated.

FINAL ANALYSIS REPORT
Summary of Face Detection:
  ID 1 (Camera Camera_A): 11792 frames, 11792 with faces (100.0%), avg 1.0 faces/frame
  ID 2 (Camera Camera_B): 11792 frames, 11792 with faces (100.0%), avg 1.0 faces/frame
  ID 3 (Camera Camera_C): 11792 frames, 11792 with faces (100.0%), avg 1.0 faces/frame

Patient_1 Gaze Dwell Time on Others:
  - Therapist_1: 786.13 s (100.0%)


In [None]:
def compute_eye_center(eye_landmarks):
    if not eye_landmarks:
        return np.array([0.0, 0.0, 0.0])
    return np.mean(np.array(eye_landmarks), axis=0)

def compute_gaze_vector(eye_landmarks, face_landmarks):
    eye_center = compute_eye_center(eye_landmarks)
    
    if not face_landmarks or len(face_landmarks) <= 30:
        return np.array([0.0, 0.0, 0.0]), eye_center

    try:
        nose_tip = np.array(face_landmarks[30])  # adjust index as needed
        gaze_vec = nose_tip - eye_center
        norm = np.linalg.norm(gaze_vec)
        if norm > 0:
            gaze_vec /= norm
        return gaze_vec, eye_center
    except:
        return np.array([0.0, 0.0, 0.0]), eye_center

def nearest_person(eye_center, gaze_vec, other_faces):
    min_dist = float("inf")
    nearest_name = None
    for name, center in other_faces.items():
        t = np.dot(center - eye_center, gaze_vec)
        proj = eye_center + t * gaze_vec
        dist = np.linalg.norm(proj - center)
        if dist < min_dist:
            min_dist = dist
            nearest_name = name
    return nearest_name

patient_id = 'Patient_1'
if patient_id not in processed_dfs:
    raise ValueError(f"{patient_id} not found in processed DataFrames. Check your mapping.")

patient_df = processed_dfs[patient_id]
frame_numbers = patient_df['frame_number'].values
attention_log = []

for idx, frame_no in enumerate(frame_numbers):
    face_centers = {}
    for name, df in processed_dfs.items():
        landmarks = df['face_keypoints_3d'].iloc[idx]
        face_centers[name] = np.mean(np.array(landmarks), axis=0) if landmarks else np.array([0.0, 0.0, 0.0])

    gaze_vec_r, eye_center_r = compute_gaze_vector(
        patient_df['eye_right_keypoints_3d'].iloc[idx],
        patient_df['face_keypoints_3d'].iloc[idx]
    )
    gaze_vec_l, eye_center_l = compute_gaze_vector(
        patient_df['eye_left_keypoints_3d'].iloc[idx],
        patient_df['face_keypoints_3d'].iloc[idx]
    )
    
    if gaze_vec_r.size == 0 or gaze_vec_l.size == 0:
        gaze_vec = np.array([0.0, 0.0, 0.0])
        eye_center = np.array([0.0, 0.0, 0.0])
    else:
        gaze_vec = (gaze_vec_r + gaze_vec_l) / 2
        norm = np.linalg.norm(gaze_vec)
        if norm > 0:
            gaze_vec /= norm
        eye_center = (eye_center_r + eye_center_l) / 2

    other_faces = {k:v for k,v in face_centers.items() if k != patient_id}
    target_person = nearest_person(eye_center, gaze_vec, other_faces)
    attention_log.append(target_person)
print("Gaze metrics computed and attention log created.")

# ---------------- 6. Aggregate dwell time per participant ----------------
print("\nStep 6: Aggregating dwell times...")
attention_counts = Counter(attention_log)
attention_time_seconds = {p: c / FPS for p, c in attention_counts.items() if p is not None}
print("Dwell times aggregated.")

# ---------------- Final Report ----------------
print("\n" + "="*60)
print("FINAL ANALYSIS REPORT")
print("="*60)
print("Summary of Face Detection:")
for archive_id in sorted(faces_by_id.keys()):
    if f"id_{archive_id}" in summary_stats:
        stats = summary_stats[f"id_{archive_id}"]
        camera = stats['camera_id']
        print(f"  ID {archive_id} (Camera {camera}): {stats['total_frames']} frames, "
              f"{stats['frames_with_faces']} with faces ({stats['face_detection_rate']:.1%}), "
              f"avg {stats['avg_faces_per_frame']:.1f} faces/frame")

print(f"\n{patient_id} Gaze Dwell Time on Others:")
total_gaze_time = sum(attention_time_seconds.values())
for p, t in attention_time_seconds.items():
    percentage = (t / total_gaze_time) * 100 if total_gaze_time > 0 else 0
    print(f"  - {p}: {t:.2f} s ({percentage:.1f}%)")
print("="*60)

In [8]:
participant_presence = []

for idx, frame_no in enumerate(frame_numbers):
    visible_people = []
    for name, df in processed_dfs.items():
        if idx < len(df):
            landmarks = df['face_keypoints_3d'].iloc[idx]
            if isinstance(landmarks, list) and len(landmarks) > 0:
                visible_people.append(name)
    participant_presence.append((frame_no, visible_people))

# Summary: how many frames included each participant?
presence_counts = Counter([p for _, plist in participant_presence for p in plist])
print("\nParticipant presence across frames:")
for p, count in presence_counts.items():
    print(f"  - {p}: {count} frames")



Participant presence across frames:
  - Patient_1: 11792 frames
  - Therapist_1: 11792 frames
  - Therapist_2: 11792 frames


In [9]:
all_gaze_vectors = []
for idx in range(len(frame_numbers)):
    gaze_vec_r, _ = compute_gaze_vector(
        patient_df['eye_right_keypoints_3d'].iloc[idx],
        patient_df['face_keypoints_3d'].iloc[idx]
    )
    gaze_vec_l, _ = compute_gaze_vector(
        patient_df['eye_left_keypoints_3d'].iloc[idx],
        patient_df['face_keypoints_3d'].iloc[idx]
    )
    gaze_vec = (gaze_vec_r + gaze_vec_l) / 2
    all_gaze_vectors.append(gaze_vec)

gaze_array = np.array(all_gaze_vectors)
print("Gaze vector standard deviation across frames:", gaze_array.std(axis=0))

Gaze vector standard deviation across frames: [0.1631128  0.10127666 0.30778997]


In [10]:
def compute_mean_std(keypoints_series):
    values = []
    for item in keypoints_series:
        if isinstance(item, list) and item:
            values.append(np.mean(np.array(item), axis=0))
    values = np.array(values)
    return values.mean(axis=0), values.std(axis=0)

print("\n[DEBUG] Patient_1 keypoint stats:")
eye_r_mean, eye_r_std = compute_mean_std(patient_df['eye_right_keypoints_3d'])
eye_l_mean, eye_l_std = compute_mean_std(patient_df['eye_left_keypoints_3d'])
face_mean, face_std = compute_mean_std(patient_df['face_keypoints_3d'])

print(f"  - Right Eye STD: {eye_r_std}")
print(f"  - Left Eye STD:  {eye_l_std}")
print(f"  - Face STD:      {face_std}")



[DEBUG] Patient_1 keypoint stats:
  - Right Eye STD: [ 84.06365952 133.25493264 545.79022674]
  - Left Eye STD:  [ 79.40702148 144.38493844 557.09568566]
  - Face STD:      [ 89.12789506 150.22664782 644.61334595]


In [16]:
for idx in [100, 500, 1000]:
    print(f"\nFrame {frame_numbers[idx]}")
    print(f"  Eye center: {eye_center}")
    print(f"  Gaze vec: {gaze_vec}")
    for name, center in other_faces.items():
        print(f"    - {name} center: {center}")



Frame 14.0
  Eye center: [0. 0. 0.]
  Gaze vec: [0.5697289  0.58112347 0.58112347]
    - Therapist_1 center: [34.0952381  34.04761905 34.97619048]
    - Therapist_2 center: [34.0952381  34.04761905 34.97619048]

Frame 71.0
  Eye center: [0. 0. 0.]
  Gaze vec: [0.5697289  0.58112347 0.58112347]
    - Therapist_1 center: [34.0952381  34.04761905 34.97619048]
    - Therapist_2 center: [34.0952381  34.04761905 34.97619048]

Frame 142.0
  Eye center: [0. 0. 0.]
  Gaze vec: [0.5697289  0.58112347 0.58112347]
    - Therapist_1 center: [34.0952381  34.04761905 34.97619048]
    - Therapist_2 center: [34.0952381  34.04761905 34.97619048]


In [None]:
# using the same data for each participant