# ⚽ Football Player Tracker — Exploratory Analysis

End-to-end walkthrough of both tracking files: data loading, cleaning, GPS validation, IMU conversion, and all analytics steps.

Run all cells top-to-bottom. Figures are Plotly (interactive).

In [None]:
import sys, os
sys.path.insert(0, os.path.join(os.getcwd(), '..'))

import pandas as pd
import numpy as np
import plotly.io as pio
pio.renderers.default = 'notebook'

from src.loader import load_file
from src.gps_analytics import (
    haversine_distance, gps_speed_validation, total_distance,
    speed_zone_distribution, goalkeeper_clustering
)
from src.imu_analytics import (
    detect_outliers, detect_special_movements, asymmetry_analysis,
    detect_action_events, estimate_footedness, fatigue_analysis
)
from src.plots import (
    plot_speed_timeseries, plot_speed_scatter, plot_speed_error_histogram,
    plot_position_heatmap, plot_speed_zone_bar, plot_speed_trajectory,
    plot_acc_trajectory, plot_movement_events, plot_action_events, plot_fatigue
)

print('Imports OK')

## 1. Load Files

In [None]:
# File 1: GPS + IMU
df1, meta1 = load_file('../data/new_player_data_2026_02_06_174048.csv')

# File 2: IMU-only — timestamps reconstructed from 16.02.2026 19:30 CET at 500 ms intervals
df2, meta2 = load_file(
    '../data/player_activity_imu_2026_02_16.csv',
    imu_only_start='16.02.2026 19:30',
    imu_only_freq_ms=500
)

print('File 1 meta:', meta1)
print()
print('File 2 meta:', meta2)

## 2. Exploratory Analysis

In [None]:
print('=== FILE 1 ===')
print(f'Shape: {df1.shape}')
print(f'Columns: {df1.columns.tolist()}')
df1.head(3)

In [None]:
num_cols = [c for c in df1.select_dtypes('number').columns if not c.endswith('_raw')]
df1[num_cols].describe().T.round(4)

In [None]:
print('=== FILE 2 ===')
print(f'Shape: {df2.shape}')
print(f'Timestamp reconstructed: {meta2["timestamp_reconstructed"]}')
df2.head(3)

## 3. GPS Speed Validation (File 1 only)

In [None]:
metrics, df1_val = gps_speed_validation(df1)
print('Validation metrics:')
for k, v in metrics.items():
    print(f'  {k}: {v}')

In [None]:
plot_speed_timeseries(df1_val).show()

In [None]:
plot_speed_scatter(df1_val).show()

In [None]:
plot_speed_error_histogram(df1_val).show()

## 4. Position Heatmap & Goalkeeper Clustering (File 1)

In [None]:
df1_hav = haversine_distance(df1)
dist_m = total_distance(df1_hav)
print(f'Total distance: {dist_m:.1f} m  ({dist_m/1000:.3f} km)')

df1_cl, cluster_summary = goalkeeper_clustering(df1_hav, n_clusters=3)
print()
print('Cluster summary:')
cluster_summary

In [None]:
plot_position_heatmap(df1_cl, cluster_summary).show()

In [None]:
plot_speed_trajectory(df1_cl).show()

## 5. Speed Distribution (File 1)

In [None]:
zone_df = speed_zone_distribution(df1_cl)
display(zone_df)
plot_speed_zone_bar(zone_df).show()

## 6. IMU: Outliers & Special Movements (both files)

In [None]:
for df, label in [(df1, 'File 1'), (df2, 'File 2')]:
    df = detect_outliers(df)
    df = detect_special_movements(df)
    print(f'--- {label} ---')
    print(f'  Acc outliers: {df["outlier_acc"].sum()}')
    print(f'  Twists: {df["is_twist"].sum()}')
    print(f'  Leans:  {df["is_lean"].sum()}')
    print(f'  Turns:  {df["is_turn"].sum()}')

In [None]:
df1_mv = detect_outliers(detect_special_movements(df1))
plot_movement_events(df1_mv).show()

## 7. Action Events: Shots, Passes, Headers (both files)

In [None]:
for df, label in [(df1, 'File 1'), (df2, 'File 2')]:
    df_ev = detect_action_events(df)
    foot = estimate_footedness(df_ev)
    print(f'--- {label} ---')
    print(f'  Probable shots:   {df_ev["event_shot"].sum()}')
    print(f'  Probable passes:  {df_ev["event_pass"].sum()}')
    print(f'  Probable headers: {df_ev["event_header"].sum()}')
    print(f'  Footedness: {foot}')
    print()

In [None]:
df1_ev = detect_action_events(df1)
plot_action_events(df1_ev).show()

## 8. Asymmetry Analysis (both files)

In [None]:
for df, label in [(df1, 'File 1'), (df2, 'File 2')]:
    result = asymmetry_analysis(df)
    print(f'--- {label} ---')
    for k, v in result.items():
        print(f'  {k}: {v}')
    print()

## 9. Fatigue Analysis (File 1 — GPS session with timestamps)

In [None]:
fat_df = fatigue_analysis(df1, window_minutes=10)
display(fat_df)
plot_fatigue(fat_df).show()

## 10. What is NOT available for File 2

File 2 has **no GPS data** (latitude/longitude/speed are all zero). The following analyses are therefore not available:

- GPS Speed Validation
- Position Heatmap / Goalkeeper Clustering
- Speed Distribution (device speed is zero)
- Speed Trajectory
- Total Distance

All **IMU-based** analyses (movements, action events, asymmetry) work normally.