# Match Trajectory Export

Generate per-participant position trajectories for every match timeline JSON and persist them as PNG plots.

In [2]:
from pathlib import Path
import json
import math

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.switch_backend('Agg')  # ensure compatibility when running headless

DATA_DIR = Path('.')
TIMELINE_DIR = DATA_DIR / 'timeline'
OUTPUT_ROOT = DATA_DIR / 'trajectories'
OUTPUT_ROOT.mkdir(exist_ok=True)

AXISLESS_ROOT = DATA_DIR / 'trajectories_axisless'
AXISLESS_ROOT.mkdir(exist_ok=True)

MINUTE_ROOT = DATA_DIR / 'image_dataset'
MINUTE_ROOT.mkdir(exist_ok=True)

timeline_paths = sorted(TIMELINE_DIR.glob('*.json'))
print(f'Timeline files found: {len(timeline_paths):,}')
print(f'Trajectory output root: {OUTPUT_ROOT.resolve()}')
print(f'Axis-free trajectory output: {AXISLESS_ROOT.resolve()}')

MAX_MATCHES = None  # Set to an integer to limit the number of matches processed during testing


Timeline files found: 9,998
Trajectory output root: D:\current\stats302\group project\code\trajectories
Axis-free trajectory output: D:\current\stats302\group project\code\trajectories_axisless


In [3]:
def extract_positions(frames):
    """Return a DataFrame with timestamped x/y positions per participant."""
    records = []
    for frame in frames:
        timestamp = frame.get('timestamp')
        participant_frames = frame.get('participantFrames', {})
        if not participant_frames:
            continue
        for participant_id, participant_frame in participant_frames.items():
            position = participant_frame.get('position')
            if not position:
                continue
            x = position.get('x')
            y = position.get('y')
            if x is None or y is None:
                continue
            records.append({
                'timestamp': timestamp,
                'participant_id': int(participant_id),
                'x': x,
                'y': y,
            })
    if not records:
        return pd.DataFrame(columns=['timestamp', 'participant_id', 'x', 'y'])
    df = pd.DataFrame(records)
    return df.sort_values(['participant_id', 'timestamp']).reset_index(drop=True)


def save_participant_plots(positions_df, match_id, output_root):
    """Save one trajectory plot per participant for a single match."""
    if positions_df.empty:
        return {'status': 'no_positions', 'participants_plotted': 0}

    participants = sorted(positions_df['participant_id'].unique())
    match_dir = output_root / match_id
    match_dir.mkdir(parents=True, exist_ok=True)

    legend_handles = None
    legend_labels = None

    for participant_id in participants:
        group = positions_df[positions_df['participant_id'] == participant_id]
        if group.empty:
            continue

        fig, ax = plt.subplots(figsize=(4, 4))
        ax.plot(group['x'], group['y'], color='tab:blue', linewidth=1.2)
        ax.scatter(group['x'].iloc[0], group['y'].iloc[0], s=35, color='green', label='start')
        ax.scatter(group['x'].iloc[-1], group['y'].iloc[-1], s=35, color='red', label='end')
        ax.set_title(f'Participant {participant_id}')
        ax.set_xlabel('X position')
        ax.set_ylabel('Y position')
        ax.set_aspect('equal', adjustable='box')
        ax.tick_params(labelsize=8)

        handles, labels = ax.get_legend_handles_labels()
        if handles and legend_handles is None:
            legend_handles, legend_labels = handles, labels

        fig.tight_layout()
        output_path = match_dir / f'participant_{participant_id:02d}.png'
        fig.savefig(output_path, dpi=150)
        plt.close(fig)

    if legend_handles:
        legend_fig, legend_ax = plt.subplots(figsize=(2, 2))
        legend_ax.axis('off')
        legend_fig.legend(legend_handles, legend_labels, loc='center', frameon=False)
        legend_fig.savefig(match_dir / 'legend.png', dpi=150, bbox_inches='tight')
        plt.close(legend_fig)

    return {
        'status': 'ok',
        'participants_plotted': len(participants),
        'match_dir': match_dir,
    }


def save_axisless_participant_plots(positions_df, match_id, output_root):
    """Save one axis-free trajectory plot per participant for a single match."""
    if positions_df.empty:
        return {'status': 'no_positions', 'participants_plotted': 0}

    participants = sorted(positions_df['participant_id'].unique())
    match_dir = output_root / match_id
    match_dir.mkdir(parents=True, exist_ok=True)

    legend_handles = None
    legend_labels = None

    for participant_id in participants:
        group = positions_df[positions_df['participant_id'] == participant_id]
        if group.empty:
            continue

        fig, ax = plt.subplots(figsize=(4, 4), dpi=150)
        ax.plot(group['x'], group['y'], color='black', linewidth=1.4)
        ax.scatter(group['x'].iloc[0], group['y'].iloc[0], s=30, color='green', label='start')
        ax.scatter(group['x'].iloc[-1], group['y'].iloc[-1], s=30, color='red', label='end')

        ax.set_aspect('equal', adjustable='box')
        ax.margins(0.05)
        ax.axis('off')  # Remove axes, ticks, and frame for CNN-ready images.
        fig.patch.set_facecolor('white')
        ax.set_facecolor('white')
        fig.subplots_adjust(0, 0, 1, 1)

        handles, labels = ax.get_legend_handles_labels()
        if handles and legend_handles is None:
            legend_handles, legend_labels = handles, labels

        output_path = match_dir / f'participant_{participant_id:02d}.png'
        fig.savefig(output_path, dpi=150, bbox_inches='tight', pad_inches=0)
        plt.close(fig)

    if legend_handles:
        legend_fig, legend_ax = plt.subplots(figsize=(1.6, 1.6), dpi=150)
        legend_ax.axis('off')
        legend_fig.legend(legend_handles, legend_labels, loc='center', frameon=False)
        legend_fig.savefig(match_dir / 'legend.png', dpi=150, bbox_inches='tight', pad_inches=0)
        plt.close(legend_fig)

    return {
        'status': 'ok',
        'participants_plotted': len(participants),
        'match_dir': match_dir,
    }


def save_minute_axisless_participant_plots(positions_df, match_id, output_root):
    """Save axis-free trajectories per participant for each minute snapshot."""
    if positions_df.empty:
        return {'status': 'no_positions', 'minutes_processed': 0, 'participants': 0, 'plots_written': 0}

    work_df = positions_df.copy()
    work_df['minute'] = (work_df['timestamp'] // 60000).astype(int)
    minutes = sorted(work_df['minute'].unique())
    participants = sorted(work_df['participant_id'].unique())
    match_root = output_root / match_id

    total_plots = 0
    for minute in minutes:
        minute_dir = match_root / f'min{minute + 1:02d}'
        minute_dir.mkdir(parents=True, exist_ok=True)
        for existing in minute_dir.glob('*.png'):
            existing.unlink()

        minute_df = work_df[work_df['minute'] <= minute]
        for participant_id in participants:
            group = minute_df[minute_df['participant_id'] == participant_id]
            if group.empty:
                continue

            fig, ax = plt.subplots(figsize=(4, 4), dpi=150)
            ax.plot(group['x'], group['y'], color='black', linewidth=1.4)
            ax.scatter(group['x'].iloc[0], group['y'].iloc[0], s=30, color='green', label='start')
            ax.scatter(group['x'].iloc[-1], group['y'].iloc[-1], s=30, color='red', label='end')
            ax.set_aspect('equal', adjustable='box')
            ax.margins(0.05)
            ax.axis('off')
            fig.patch.set_facecolor('white')
            ax.set_facecolor('white')
            fig.subplots_adjust(0, 0, 1, 1)

            output_path = minute_dir / f'{match_id}_min{minute + 1:02d}_{participant_id}.png'
            fig.savefig(output_path, dpi=150, bbox_inches='tight', pad_inches=0)
            plt.close(fig)
            total_plots += 1

    return {
        'status': 'ok',
        'minutes_processed': len(minutes),
        'participants': len(participants),
        'plots_written': total_plots,
    }


In [4]:
def process_match(path):
    """Load a timeline JSON, extract positions, and persist participant trajectories."""
    match_id = path.stem
    try:
        with path.open(encoding='utf-8') as f:
            timeline = json.load(f)
    except Exception as exc:
        return {
            'match_id': match_id,
            'status': 'load_failed',
            'error': str(exc),
        }

    frames = timeline.get('info', {}).get('frames', [])
    positions_df = extract_positions(frames)
    standard_result = save_participant_plots(positions_df, match_id, OUTPUT_ROOT)
    axisless_result = save_axisless_participant_plots(positions_df, match_id, AXISLESS_ROOT)
    minute_result = save_minute_axisless_participant_plots(positions_df, match_id, MINUTE_ROOT)

    summary = {
        'match_id': match_id,
        'frames': len(frames),
        'position_rows': len(positions_df),
        'participants_plotted': standard_result.get('participants_plotted', 0),
        'status': standard_result.get('status', 'unknown'),
        'axisless_status': axisless_result.get('status', 'unknown'),
        'minute_status': minute_result.get('status', 'unknown'),
        'minutes_processed': minute_result.get('minutes_processed', 0),
        'minute_plots': minute_result.get('plots_written', 0),
    }
    if standard_result.get('status') != 'ok':
        summary['detail'] = standard_result.get('detail')
    if axisless_result.get('status') != 'ok':
        summary['axisless_detail'] = axisless_result.get('detail')
    if minute_result.get('status') != 'ok':
        summary['minute_detail'] = minute_result.get('detail')
    return summary


In [5]:
process_paths = timeline_paths if MAX_MATCHES is None else timeline_paths[:MAX_MATCHES]
print(f'Matches to process: {len(process_paths):,}')

summaries = []
for idx, path in enumerate(process_paths, start=1):
    summary = process_match(path)
    summaries.append(summary)
    if idx % 100 == 0 or summary.get('status') != 'ok':
        print(f"Processed {idx}/{len(process_paths)} matches - {summary['match_id']} status: {summary['status']} | axisless: {summary.get('axisless_status')} | minute: {summary.get('minute_status')}")

summary_df = pd.DataFrame(summaries)
summary_df.head()


Matches to process: 9,998
Processed 100/9998 matches - KR_7690613577 status: ok | axisless: ok | minute: ok
Processed 200/9998 matches - KR_7694864475 status: ok | axisless: ok | minute: ok
Processed 300/9998 matches - KR_7699898590 status: ok | axisless: ok | minute: ok
Processed 400/9998 matches - KR_7705360186 status: ok | axisless: ok | minute: ok
Processed 500/9998 matches - KR_7709078287 status: ok | axisless: ok | minute: ok
Processed 600/9998 matches - KR_7713205360 status: ok | axisless: ok | minute: ok
Processed 700/9998 matches - KR_7715564289 status: ok | axisless: ok | minute: ok
Processed 800/9998 matches - KR_7717320972 status: ok | axisless: ok | minute: ok
Processed 900/9998 matches - KR_7719923088 status: ok | axisless: ok | minute: ok
Processed 1000/9998 matches - KR_7723092478 status: ok | axisless: ok | minute: ok
Processed 1100/9998 matches - KR_7725401024 status: ok | axisless: ok | minute: ok
Processed 1200/9998 matches - KR_7727113754 status: ok | axisless: ok 

SystemError: tile cannot extend outside image

In [6]:
summary_df['status'].value_counts()
summary_df['axisless_status'].value_counts()


NameError: name 'summary_df' is not defined

In [7]:
# Investigate any matches that did not produce trajectories
summary_df[summary_df['status'] != 'ok'].head(20)

Unnamed: 0,match_id,frames,position_rows,participants_plotted,status,axisless_status,minute_status,minutes_processed,minute_plots


In [8]:
# Ensure each match directory contains ten participant plots
from pathlib import Path

def count_pngs(match_dir):
    return len(list(Path(match_dir).glob('participant_*.png')))

if summary_df.empty:
    print('No matches processed yet.')
else:
    checks = []
    for match_id in summary_df['match_id']:
        match_dir = OUTPUT_ROOT / match_id
        png_count = count_pngs(match_dir)
        checks.append({'match_id': match_id, 'png_count': png_count})
    check_df = pd.DataFrame(checks)
    check_df.head()


Run the processing cell above to generate all trajectories. After completion you should find one subdirectory per match inside `trajectories/`, each containing ten participant trajectory images (`participant_01.png` … `participant_10.png`) and a legend image.

In [9]:
# Ensure each match directory contains ten axis-free participant plots
from pathlib import Path

def count_axisless_pngs(match_dir):
    return len(list(Path(match_dir).glob('participant_*.png')))

if summary_df.empty:
    print('No matches processed yet.')
else:
    axisless_checks = []
    for match_id in summary_df['match_id']:
        match_dir = AXISLESS_ROOT / match_id
        png_count = count_axisless_pngs(match_dir)
        axisless_checks.append({'match_id': match_id, 'png_count': png_count})
    pd.DataFrame(axisless_checks).head()


---
Axis-free trajectories are generated alongside the original plots and saved in `trajectories_axisless`.
