# Data Exploration

This notebook explores the GestureFlow dataset and visualizes gesture trajectories.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import json
from pathlib import Path

%matplotlib inline

## Load Data

In [None]:
# Load generated gesture data
data_path = Path('../data/processed/gestures_en.json')

if data_path.exists():
    with open(data_path, 'r') as f:
        data = json.load(f)
    print(f"Loaded {len(data)} samples")
else:
    print("Data file not found. Please generate data first.")

## Visualize Gesture Trajectories

In [None]:
# Plot sample trajectories
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for idx, ax in enumerate(axes):
    if idx < len(data):
        sample = data[idx]
        trajectory = np.array(sample['trajectory'])
        
        ax.plot(trajectory[:, 0], trajectory[:, 1], 'b-', alpha=0.6)
        ax.scatter(trajectory[0, 0], trajectory[0, 1], c='green', s=100, label='Start')
        ax.scatter(trajectory[-1, 0], trajectory[-1, 1], c='red', s=100, label='End')
        ax.set_title(f"Word: {sample['word']}")
        ax.set_xlabel('X')
        ax.set_ylabel('Y')
        ax.legend()
        ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Dataset Statistics

In [None]:
# Calculate statistics
if data:
    trajectory_lengths = [len(sample['trajectory']) for sample in data]
    
    print(f"Total samples: {len(data)}")
    print(f"Average trajectory length: {np.mean(trajectory_lengths):.2f}")
    print(f"Min trajectory length: {np.min(trajectory_lengths)}")
    print(f"Max trajectory length: {np.max(trajectory_lengths)}")
    
    plt.figure(figsize=(10, 6))
    plt.hist(trajectory_lengths, bins=30, edgecolor='black')
    plt.xlabel('Trajectory Length')
    plt.ylabel('Frequency')
    plt.title('Distribution of Trajectory Lengths')
    plt.grid(True, alpha=0.3)
    plt.show()