In [1]:
# Swing Mechanics Analysis — Graduate-Style Notebook (Patched)
# Author: Beija Richardson
# This notebook loads five batter CSVs, cleans them, detects swing events, computes metrics,
# and saves outputs locally on Windows.

import os
import glob
import warnings
from pathlib import Path
from textwrap import dedent

import numpy as np
import pandas as pd


warnings.filterwarnings("ignore", category=RuntimeWarning, module="numpy")


def load_and_clean_data(pattern=r"C:\Users\Beija\Downloads\Data\Data\swing_mechanics\data\batter*.csv"):
    """Load all batter CSVs, validate path & permissions, and perform cleaning.
    Cleaning steps:
      - Drop fully-empty columns
      - Interpolate numeric gaps
      - Sort by time and drop exact duplicate timestamps
      - Ensure strictly increasing time by spreading any ties with a tiny epsilon
    """
    folder = os.path.dirname(pattern)
    if not os.path.exists(folder):
        raise FileNotFoundError(
            f"⚠️ Folder not found: {folder}\n"
            "Confirm the data folder, e.g. C:\\Users\\Beija\\Downloads\\Data\\Data\\swing_mechanics\\data"
        )
    files = sorted(glob.glob(pattern))
    if not files:
        raise FileNotFoundError(f"⚠️ No CSV files matching pattern: {pattern}")

    print(f"✅ Found {len(files)} files:")
    for f in files:
        print("   ", f)

    data = {}
    for file in files:
        try:
            df = pd.read_csv(file)
        except PermissionError:
            raise PermissionError(
                f"🚫 Permission denied when reading: {file}\n"
                "Close Excel/other apps or run Jupyter/VS Code as Administrator."
            )
       
        df = df.dropna(axis=1, how='all')

        num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
        if num_cols:
            df[num_cols] = df[num_cols].interpolate(limit_direction='both')

        time_cols = [c for c in df.columns if 'time' in c.lower()]
        if not time_cols:
            raise KeyError(f"No time column found in {file}")
        tcol = time_cols[0]

        df = df.sort_values(by=tcol).drop_duplicates(subset=tcol).reset_index(drop=True)

        df = df.loc[df[tcol].notna()].copy()
        df[tcol] = df[tcol].astype(float)
        eps = 1e-9
        d = df[tcol].diff().fillna(1.0)
        ties = d == 0
        if ties.any():
            df.loc[ties, tcol] += eps * np.arange(1, ties.sum() + 1)

        key = Path(file).stem  # e.g., 'batter1'
        data[key] = df

    print("✅ Data loaded and cleaned successfully.")
    return data

DATA = load_and_clean_data()

def get_time_col(df):
    return [c for c in df.columns if 'time' in c.lower()][0]

def pick_column(df, contains):
    cols = [c for c in df.columns if all(s in c.lower() for s in contains)]
    return cols[0] if cols else None

def identify_events(df, threshold=10.0):
    """Detect swing start, max bat speed, and contact time using smoothed angular velocity."""
    tcol = get_time_col(df)
    bat_col = pick_column(df, ['bat', 'angular_velocity']) or pick_column(df, ['barrel', 'angular_velocity'])
    if bat_col is None:
        wcols = [c for c in df.columns if 'angular_velocity' in c.lower()]
        bat_w = df[wcols].mean(axis=1) if wcols else pd.Series(np.zeros(len(df)))
    else:
        bat_w = df[bat_col]

    smooth = bat_w.rolling(5, min_periods=1, center=True).mean()
    idx_start = smooth[smooth > threshold].index.min()
    swing_start = df.loc[idx_start, tcol] if pd.notna(idx_start) else np.nan

    idx_peak = smooth.idxmax()
    t_peak = df.loc[idx_peak, tcol]

    contact_col = pick_column(df, ['contact'])
    if contact_col and df[contact_col].notna().any():
        contact = df[contact_col].dropna().iloc[0]
    else:
        contact = t_peak + 0.05 

    return swing_start, t_peak, contact, bat_w


def compute_metrics(df):
    tcol = get_time_col(df)
    t = df[tcol]
    swing_start, t_peak, t_contact, bat_w = identify_events(df)

    peak_speed = bat_w.rolling(5, min_periods=1, center=True).mean().max()

    ttc = (t_contact - swing_start) if pd.notna(swing_start) else np.nan

    hip_cols = [c for c in df.columns if ('hip' in c.lower() and 'angle' in c.lower())]
    sh_cols = [c for c in df.columns if ('shoulder' in c.lower() and 'angle' in c.lower())]
    if hip_cols and sh_cols:
        sep_series = (df[sh_cols].mean(axis=1) - df[hip_cols].mean(axis=1)).abs()
        separation = sep_series.quantile(0.95)
    else:
        separation = np.nan

    t_vals = t.to_numpy(dtype=float)
    w_vals = bat_w.to_numpy(dtype=float)

    dw = np.diff(w_vals, prepend=w_vals[0])
    dt = np.diff(t_vals, prepend=t_vals[0])
    dt[dt <= 0] = np.nan  # guard

    with np.errstate(divide='ignore', invalid='ignore'):
        inst_accel = dw / dt

    bat_accel = np.nanmax(np.abs(inst_accel))

    if 'cog_velo_x' in df.columns and 'cog_velo_z' in df.columns:
        attack_angle = np.degrees(np.arctan2(df['cog_velo_z'].mean(), df['cog_velo_x'].mean()))
    else:
        attack_angle = np.nan

    return {
        'Swing Start (s)': swing_start,
        'Max Speed (°/s)': float(peak_speed),
        'Time to Contact (s)': float(ttc) if pd.notna(ttc) else np.nan,
        'Hip–Shoulder Separation (°)': float(separation) if pd.notna(separation) else np.nan,
        'Bat Acceleration (°/s²)': float(bat_accel),
        'Attack Angle (°)': float(attack_angle) if pd.notna(attack_angle) else np.nan
    }

records = []
for name, df in DATA.items():
    m = compute_metrics(df)
    m['Batter'] = name
    records.append(m)

summary_df = pd.DataFrame(records)
summary_df = summary_df[['Batter'] + [c for c in summary_df.columns if c != 'Batter']]


input_glob = r"C:\Users\Beija\Downloads\Data\Data\swing_mechanics\data\batter*.csv"
OUTPUT_DIR = Path(input_glob).parent / "outputs"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

summary_csv_path = OUTPUT_DIR / "swing_metrics_summary.csv"
notes_md_path = OUTPUT_DIR / "swing_coach_notes.md"


def make_coach_notes(df):
    lines = ['Results & Notes (Coach‑Friendly)\n']
    for _, row in df.iterrows():
        lines.append(dedent(f"""
        • {row['Batter']}: Peak Speed {row['Max Speed (°/s)']:.1f} °/s; Time‑to‑Contact {row['Time to Contact (s)']:.3f} s;
          Separation ≈ {row['Hip–Shoulder Separation (°)']:.1f}°; Attack Angle ≈ {row['Attack Angle (°)']:.1f}°.
          Interpretation: Speed & separation indicate power/sequencing; time‑to‑contact reflects efficiency; attack angle around 5–15° supports line‑drive trajectories.
        """))
    return "\n".join(lines)

notes_text = make_coach_notes(summary_df.copy())

summary_df.to_csv(summary_csv_path, index=False)
with open(notes_md_path, "w", encoding="utf-8") as f:
    f.write(notes_text)

print("✅ Completed. Saved:", summary_csv_path)
print("✅ Completed. Saved:", notes_md_path)


✅ Found 5 files:
    C:\Users\Beija\Downloads\Data\Data\swing_mechanics\data\batter1.csv
    C:\Users\Beija\Downloads\Data\Data\swing_mechanics\data\batter2.csv
    C:\Users\Beija\Downloads\Data\Data\swing_mechanics\data\batter3.csv
    C:\Users\Beija\Downloads\Data\Data\swing_mechanics\data\batter4.csv
    C:\Users\Beija\Downloads\Data\Data\swing_mechanics\data\batter5.csv
✅ Data loaded and cleaned successfully.
✅ Completed. Saved: C:\Users\Beija\Downloads\Data\Data\swing_mechanics\data\outputs\swing_metrics_summary.csv
✅ Completed. Saved: C:\Users\Beija\Downloads\Data\Data\swing_mechanics\data\outputs\swing_coach_notes.md
