In [21]:
import sqlite3
import os
import re

# Path to the folder containing your txt files
folder_path = 'D:/Athletic Screen 2.0/Output Files/'
db_path = 'D:/Athletic Screen 2.0/Output Files/movement_database_v2.db'

# Delete the database file if it exists to start fresh
if os.path.exists(db_path):
    os.remove(db_path)
    print(f"Deleted existing database at {db_path}")

# Connect to the SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Define the corrected table schemas for each movement
table_schemas = {
    'CMJ': '''CREATE TABLE IF NOT EXISTS CMJ (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT,
                trial_name TEXT,
                JH_IN REAL,
                Peak_Power REAL,
                PP_FORCEPLATE REAL,
                Force_at_PP REAL,
                Vel_at_PP REAL,
                PP_W_per_kg REAL
              )''',

    'PPU': '''CREATE TABLE IF NOT EXISTS PPU (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        name TEXT,
        date TEXT,
        trial_name TEXT,
        JH_IN REAL,
        Peak_Power REAL,
        PP_FORCEPLATE REAL,
        Force_at_PP REAL,
        Vel_at_PP REAL,
        PP_W_per_kg REAL
    )''',
    
    'DJ':  '''CREATE TABLE IF NOT EXISTS DJ (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT,
                trial_name TEXT,
                JH_IN REAL,
                Peak_Power REAL,
                PP_FORCEPLATE REAL,
                Force_at_PP REAL,
                Vel_at_PP REAL,
                PP_W_per_kg REAL,
                CT REAL,
                RSI REAL
              )''',

    'SLV': '''CREATE TABLE IF NOT EXISTS SLV (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT, 
                trial_name TEXT,
                side TEXT,
                JH_IN REAL,
                PP_FORCEPLATE REAL,
                Force_at_PP REAL,
                Vel_at_PP REAL,
                PP_W_per_kg REAL
              )''',
    'NMT': '''CREATE TABLE IF NOT EXISTS NMT (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT, 
                trial_name TEXT,
                NUM_TAPS_10s REAL,
                NUM_TAPS_20s REAL,
                NUM_TAPS_30s REAL,
                NUM_TAPS REAL
              )'''
}

# Create the tables in the database (if they don't exist)
for schema in table_schemas.values():
    cursor.execute(schema)

# Function to extract the client's name from the first line of the file
def extract_name(line):
    match = re.search(r'Data\\(.*?)[_\\]', line)
    if match:
        return match.group(1)
    return None

def extract_date(line):
    """
    Looks for a segment like 2025-05-22_ in the first-line path returned by Cortex.
    Returns '2025-05-22' or None if not found.
    """
    m = re.search(r'\\(\d{4}-\d{2}-\d{2})_', line)
    return m.group(1) if m else None
# ─────────────────────────────────────────────────────────────────────────────

# Function to insert data into the appropriate table
def insert_data_into_table(table_name, name, trial_name, variables):
    """
    `variables` still contains the leading dummy “1”.
    After we drop that each txt gives:
        CMJ : 8 numbers
        DJ  : 8 numbers
        SLV : 6 numbers
        NMT : 4 numbers
    We pick only the columns we store.
    """
    v = variables[1:]          # drop the leading “1”

    if table_name == 'CMJ':
        # keep indices 0,1,4,5,6,7  (→ six values)
        vals = [v[i] for i in (0, 1, 4, 5, 6, 7)]
        cursor.execute(
            """INSERT INTO CMJ
               (name, date, trial_name,
                JH_IN, Peak_Power,
                PP_FORCEPLATE, Force_at_PP, Vel_at_PP,
                PP_W_per_kg)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (name, date, trial_name, *vals)
        )
    
    elif table_name == 'PPU':
        # mirror CMJ selection/order
        vals = [v[i] for i in (0, 1, 4, 5, 6, 7)]
        cursor.execute(
            """INSERT INTO PPU
               (name, date, trial_name,
                JH_IN, Peak_Power,
                PP_FORCEPLATE, Force_at_PP, Vel_at_PP,
                PP_W_per_kg)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (name, date, trial_name, *vals)
        )

    elif table_name == 'DJ':
        # keep every value (8 numbers)
        cursor.execute(
            """INSERT INTO DJ
               (name, date, trial_name,
                JH_IN, Peak_Power,
                PP_FORCEPLATE, Force_at_PP, Vel_at_PP,
                CT, RSI, PP_W_per_kg)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (name, date, trial_name, *v)        # 8 numbers + 2 strings = 10
        )

    elif table_name == 'SLV':
        side = 'Left' if 'SLVL' in trial_name else 'Right'
        # keep indices 0,2,3,4,5  (→ five values)
        vals = [v[i] for i in (0, 2, 3, 4, 5)]
        cursor.execute(
            """INSERT INTO SLV
               (name, date, trial_name, side,
                JH_IN,
                PP_FORCEPLATE, Force_at_PP, Vel_at_PP,
                PP_W_per_kg)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (name, date, trial_name, side, *vals)
        )

    elif table_name == 'NMT':
        cursor.execute(
            """INSERT INTO NMT
               (name, date, trial_name,
                NUM_TAPS_10s, NUM_TAPS_20s, NUM_TAPS_30s, NUM_TAPS)
               VALUES (?, ?, ?, ?, ?, ?, ?)""",
            (name, date, trial_name, *v)
        )
# Loop through the txt files in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith('.txt'):
        trial_name = os.path.splitext(file_name)[0]
        
        # Determine which table the file belongs to
        if 'CMJ' in trial_name:
            table_name = 'CMJ'
        elif 'PPU' in trial_name:
            table_name = 'PPU'
        elif 'DJ' in trial_name:
            table_name = 'DJ'
        elif 'SLVL' in trial_name or 'SLVR' in trial_name:
            table_name = 'SLV'
        elif 'NMT' in trial_name:
            table_name = 'NMT'
        else:
            continue

        # Load the data from the txt file
        file_path = os.path.join(folder_path, file_name)
        try:
            with open(file_path, 'r') as f:
                # Extract the name from the first line
                first_line = f.readline().strip()
                name = extract_name(first_line)
                date = extract_date(first_line)

                # Print the extracted name to verify
                print(f"File: {file_name}, Extracted Name: {name}")

                if not name:
                    print(f"Name extraction failed for {file_name}, skipping.")
                    continue

                # Read all lines until we find the line with the actual numeric data
                # --- replace the old for-loop (line_num, line) with this: -------------------
                for line in f:
                    line = line.strip()
                    if not line:
                        continue
                
                    if re.match(r'^[-+]?\d', line):          # first real numeric row
                        variables = [float(v) for v in line.split()]
                        print(f"Processing file: {file_name}, Variables: {variables}")
                        insert_data_into_table(table_name, name, trial_name, variables)
                        break


        except Exception as e:
            print(f"Unexpected error with file {file_name}: {e}")

# Commit the changes and close the connection
conn.commit()
conn.close()

print("Data successfully inserted into the database.")


Deleted existing database at D:/Athletic Screen 2.0/Output Files/movement_database_v2.db
File: CMJ1.txt, Extracted Name: Zach, Vennaro
Processing file: CMJ1.txt, Variables: [1.0, 15.5, 516.0, 2.27, 228.0, 1196.5, 2318.52, 516.07, 14.96]
File: SLVL1.txt, Extracted Name: Zach, Vennaro
Processing file: SLVL1.txt, Variables: [1.0, 9.6, 6216.0, 944.8, 2044.4, 462.1, 11.81]
File: CMJ2.txt, Extracted Name: Zach, Vennaro
Processing file: CMJ2.txt, Variables: [1.0, 16.6, 523.0, 2.38, 219.0, 1207.8, 2307.87, 523.32, 15.1]
File: CMJ3.txt, Extracted Name: Zach, Vennaro
Processing file: CMJ3.txt, Variables: [1.0, 15.5, 516.0, 2.27, 228.0, 1196.5, 2318.52, 516.07, 14.96]
File: DJ1.txt, Extracted Name: Zach, Vennaro
Processing file: DJ1.txt, Variables: [1.0, 19.8, 2187.0, 1894.7, 2686.37, 705.3, 0.63, 1.6, 23.68]
File: DJ2.txt, Extracted Name: Zach, Vennaro
Processing file: DJ2.txt, Variables: [1.0, 18.2, 1750.0, 1871.3, 2507.22, 746.35, 0.67, 1.38, 23.39]
File: DJ3.txt, Extracted Name: Zach, Vennaro

In [2]:
# Creates full report for age group comparison

import sqlite3
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from datetime import date
import tempfile
import docx2txt
from PIL import Image, ImageDraw, ImageFont
import os, re
from scipy import integrate, stats
import glob as globmod

# -------- style to match your dark report --------
plt.rcParams.update({
    "figure.facecolor": "#181818",
    "axes.facecolor"  : "#303030",
    "axes.edgecolor"  : "white",
    "axes.labelcolor" : "slategrey",
    "xtick.color"     : "lightgrey",
    "ytick.color"     : "lightgrey",
    "grid.color"      : "dimgrey",
    "text.color"      : "white",
})
# Corrected file paths with raw strings to handle backslashes properly
client_db_path = r'D:\Athletic Screen 2.0\Output Files\movement_database_v2.db'
reference_db_path = r'D:\Athletic Screen 2.0\Output Files\Athletic_Screen_Pro_data_v2.db'

# Ensure the paths are valid and accessible
if not os.path.exists(client_db_path):
    print(f"Client database not found at {client_db_path}")
if not os.path.exists(reference_db_path):
    print(f"Reference database not found at {reference_db_path}")

# Connect to the client and reference databases
client_conn = sqlite3.connect(client_db_path)
reference_conn = sqlite3.connect(reference_db_path)
client_cursor = client_conn.cursor()
reference_cursor = reference_conn.cursor()

print("Databases opened successfully.")

# Fetch the client's name from the database (assuming the 'name' column is in all tables)
client_cursor.execute("SELECT DISTINCT name FROM CMJ")  # Change table if necessary
client_name = client_cursor.fetchone()[0]  # Get the first row and first column
print(f"Client Name: {client_name}")

# ---------- build unique export paths (date-stamped, no overwrite) -----
client_cursor.execute("SELECT MAX(date) FROM CMJ WHERE name = ?", (client_name,))
assessment_date = client_cursor.fetchone()[0]          # e.g. '2025-05-22'
if not assessment_date:                                # fallback to today
    assessment_date = date.today().strftime("%Y-%m-%d")

reports_dir = r'G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports'
os.makedirs(reports_dir, exist_ok=True)

parts = client_name.split(', ')
client_name_rev = f"{parts[1]}_{parts[0]}" if len(parts) == 2 else client_name
base_name = f"Athletic_Report_{client_name_rev}_{assessment_date}"

output_filename = os.path.join(reports_dir, base_name + ".docx")
img_output_directory = os.path.join(reports_dir, "Images", base_name)
os.makedirs(img_output_directory, exist_ok=True)

# auto-increment if the same file already exists
counter = 1
while os.path.exists(output_filename):
    output_filename = os.path.join(
        reports_dir, f"{base_name}_{counter}.docx"
    )
    img_output_directory = os.path.join(
        reports_dir, "Images", f"{base_name}_{counter}"
    )
    os.makedirs(img_output_directory, exist_ok=True)
    counter += 1
# ----------------------------------------------------------------------

# Helper function to calculate percentile
def calculate_percentile(value, reference_data):
    return stats.percentileofscore(reference_data, value)

# ─── UPDATED generate_bar_graph ────────────────────────────────────────────────
def generate_bar_graph(variable, client_value, reference_data, title, tmpdirname):
    """
    Blue bars  = reference distribution
    ─ red      = client MAX (best trial **within the same movement table**)
    ─ violet   = client MEAN (average of those trials)

    • No code outside this function needs to change.
    • If the caller still passes one score, that is fine; this function looks
      up any matching trials on the same assessment day and combines them.
    • RSI histograms use 0.25-wide bins so you see bars at 1.25, 1.50, 1.75 …
    """
    import numpy as np, matplotlib.pyplot as plt, os, sqlite3, re

    # ───────────── lookup: which movement table are we dealing with? ─────────
    column_table_map = {"CMJ": ["CMJ"], "PPU": ["PPU"], "DJ": ["DJ"], "SLV": ["SLV"], "NMT": ["NMT"]}
    table_guess = None
    for tbl in column_table_map:       
        try:
            reference_cursor.execute(f"SELECT COUNT(*) FROM {tbl}")
            if reference_cursor.fetchone()[0] == len(reference_data):
                table_guess = tbl
                break
        except sqlite3.OperationalError:
            continue
    if table_guess is None:               # fallback
        for tbl in column_table_map:
            try:
                client_cursor.execute(f"SELECT 1 FROM {tbl} LIMIT 1")
                table_guess = tbl
                break
            except sqlite3.OperationalError:
                continue

    # ───────────── gather all trials for this athlete / table / day ──────────
    scores = []
    if table_guess:
        # Some tables (e.g., NMT) may not have trial_name. If so, fall back safely.
        try:
            client_cursor.execute(
                f"SELECT trial_name FROM {table_guess} WHERE name=? LIMIT 1",
                (client_name,),
            )
            row = client_cursor.fetchone()
            date_prefix = None
            if row and row[0]:
                m = re.match(r"(\d{4}[-_]\d{2}[-_]\d{2})", row[0])
                date_prefix = m.group(1) if m else None
    
            if date_prefix:
                q = f"SELECT {variable} FROM {table_guess} WHERE name=? AND trial_name LIKE ?"
                client_cursor.execute(q, (client_name, f"{date_prefix}%"))
            else:
                q = f"SELECT {variable} FROM {table_guess} WHERE name=?"
                client_cursor.execute(q, (client_name,))
    
            scores = [r[0] for r in client_cursor.fetchall() if r[0] is not None]
        except sqlite3.OperationalError:
            # No trial_name column (likely NMT) → just use the single value
            scores = [client_value]
    
    # fallback if still empty
    if not scores:
        scores = [client_value]


    scores = np.asarray(scores, dtype=float)
    c_mean = scores.mean()
    
    # --- choose the extreme we draw as the red line ---------------------------
    if variable.upper() == "CT":        # Contact-Time → use the LOWEST value
        c_extreme = scores.min()
        extreme_label = "Client Min"
    else:                               # every other metric → highest value
        c_extreme = scores.max()
        extreme_label = "Client Max"

    perc_mean = calculate_percentile(c_mean, reference_data)

    # ──────────────────────────── plotting begins ────────────────────────────
    plt.figure(facecolor="#181818")
    ax = plt.subplot(111, facecolor="#303030")

    reference_plotted = False  # flag to avoid double-plotting

    # ---------- RSI special case: 0.25-wide bars & custom ticks -------------
    if variable.upper() == "RSI":
        lo = np.floor(reference_data.min() / 0.25) * 0.25
        hi = np.ceil(reference_data.max() / 0.25) * 0.25
        bins = np.arange(lo, hi + 0.25, 0.25)      # bin edges
        centers = bins[:-1]                         # bar positions

        counts, _ = np.histogram(reference_data, bins=bins)
        ax.bar(
            centers,
            counts,
            width=0.25,
            align="edge",
            color="cornflowerblue",
            alpha=0.7,
            edgecolor="white",
            label="Reference",
        )
        # after counts, _ = np.histogram(...)
        for x, h in zip(centers, counts):
            if h == 0:                        # empty bin → draw thin outline
                ax.bar(x, 1e-6, width=.25, align='edge',
                       color='none', edgecolor='#404040', linewidth=.5)

        ax.set_xticks(centers)
        ax.set_xticklabels([f"{x:.2f}" for x in centers], color="lightgrey")

        reference_plotted = True  # we already drew the reference bars
    else:
        bins = 20  # default bin count

    # ---------- draw reference histogram when not plotted above -------------
    if not reference_plotted:
        ax.hist(
            reference_data,
            bins=bins,
            color="cornflowerblue",
            alpha=0.7,
            edgecolor="white",
            label="Reference",
        )

    # ---------- client mean / max lines -------------------------------------
    ax.axvline(c_extreme, color="red", ls="--", lw=2, label="Client Max")
    ax.axvline(c_mean, color="violet", ls="--", lw=2, label="Client Mean")

    # ---------- cosmetics ----------------------------------------------------
    ax.set_xlabel(variable.replace("_", " "), color="slategrey")
    ax.set_ylabel("Frequency", color="slategrey")
    ax.tick_params(axis="x", colors="lightgrey")
    ax.tick_params(axis="y", colors="lightgrey")
    ax.grid(color="dimgrey")

    txt = (
        f"Percentile (mean): {perc_mean:.1f}%"
        f"\nMean: {c_mean:.2f}"
        f"\nMax:  {c_extreme:.2f}"
    )
    plt.text(
        0.95,
        0.05,
        txt,
        ha="right",
        va="bottom",
        transform=ax.transAxes,
        color="white",
        fontsize=9,
        backgroundcolor="#181818",
    )

    ax.legend(facecolor="black", edgecolor="grey", prop={"size": "small"}, labelcolor="grey")

    # ---------- save ---------------------------------------------------------
    out_path = os.path.join(tmpdirname, f"{variable}_histogram.png")
    plt.savefig(out_path, bbox_inches="tight", facecolor="#181818")
    plt.close()
    return out_path

# Function to generate scatter plot for CMJ
def generate_scatter_plot(client_data, reference_data, x_var, y_var, title, tmpdirname):
    plt.figure(facecolor='#181818', figsize=(6, 6))
    ax = plt.subplot(111, facecolor='#303030')

    # Create scatter plot for reference data (cornflower blue)
    ax.scatter(reference_data[x_var], reference_data[y_var], label='Reference', alpha=0.5, color='cornflowerblue')

    # Create scatter plot for client data (red)
    ax.scatter(client_data[x_var], client_data[y_var], label='Client', color='red', edgecolors='black', s=100)

    # Set axis labels, replacing underscores with spaces
    ax.set_xlabel(x_var.replace('_', ' '), color='slategrey')
    ax.set_ylabel(y_var.replace('_', ' '), color='slategrey')

    # Dynamically set ticks and numbers to light grey
    ax.tick_params(axis='x', colors='lightgrey')
    ax.tick_params(axis='y', colors='lightgrey')

    # Add vertical and horizontal reference lines (light grey)
    ax.axvline(x=np.mean(reference_data[x_var]), color='lightgrey', linestyle='--', linewidth=1)
    ax.axhline(y=np.mean(reference_data[y_var]), color='lightgrey', linestyle='--', linewidth=1)

    # Customize grid style
    ax.grid(color='dimgrey')

    # Add legend
    ax.legend(facecolor='black', edgecolor='grey', prop={'size': 'small'}, labelcolor='grey')

    # Save scatter plot to file
    scatter_filename = os.path.join(tmpdirname, 'cmj_scatter.png')
    plt.savefig(scatter_filename, bbox_inches='tight', facecolor='#181818')
    plt.close()

    return scatter_filename


def load_power_txt(txt_path: str) -> pd.Series:
    """
    Parse exported power file (like your example). Returns a pandas Series of power.
    Assumes: header lines, then a line starting with 'ITEM', then data rows:
             <index>\t<value>
    Skips rows without a numeric second field.
    """
    power_vals = []
    in_data = False
    with open(txt_path, "r", encoding="utf-8", errors="ignore") as f:
        for raw in f:
            line = raw.strip()
            if not in_data:
                if line.startswith("ITEM"):
                    in_data = True
                continue
            # from here on, try to parse the last field as a float
            if not line:
                continue
            parts = re.split(r"\t+", line)
            if len(parts) < 2:
                # sometimes a blank power cell exists on the first data row — skip it
                continue
            try:
                val = float(parts[-1])
                power_vals.append(val)
            except ValueError:
                # non-numeric tail → ignore
                continue
    if not power_vals:
        raise ValueError(f"No power values parsed from {txt_path}")
    return pd.Series(power_vals, name="Power")

def analyze_power_curve(power: pd.Series, fs_hz: float = 1000.0) -> dict:
    """
    Compute useful shape/temporal features.
    power: Series of power (W). fs_hz: sampling rate (Hz). If unknown, 1000 Hz is typical.
    Returns a dict of metrics.
    """
    p = np.asarray(power, dtype=float)
    n = p.size
    t = np.arange(n) / fs_hz

    # basic
    p_peak_idx = int(np.nanargmax(p))
    p_peak     = float(p[p_peak_idx])
    t_peak     = float(t[p_peak_idx])

    # onset/offset via % of peak (robust to baseline drift)
    thr10 = 0.10 * p_peak
    thr50 = 0.50 * p_peak
    thr90 = 0.90 * p_peak

    # first index above 10% of peak
    try:
        onset_idx = int(np.argmax(p >= thr10))
    except ValueError:
        onset_idx = 0
    # first index after peak that falls below 10% (or end)
    post = p[p_peak_idx:]
    off_rel = np.argmax(post < thr10) if np.any(post < thr10) else (post.size - 1)
    offset_idx = p_peak_idx + int(off_rel)

    # 10–90% rise time on rising limb
    rising = p[:p_peak_idx+1]
    try:
        i10 = int(np.argmax(rising >= thr10))
        i90 = int(np.argmax(rising >= thr90))
        rise_time = (i90 - i10) / fs_hz if i90 > i10 else np.nan
        rise_slope = (0.8 * p_peak) / rise_time if rise_time and rise_time > 0 else np.nan
    except ValueError:
        i10 = i90 = None
        rise_time = np.nan
        rise_slope = np.nan

    # FWHM (50% of peak) width
    # left crossing
    try:
        left_idx  = int(np.argmax(rising >= thr50))
    except ValueError:
        left_idx = p_peak_idx
    # right crossing
    falling = p[p_peak_idx:]
    try:
        right_rel = int(np.argmax(falling <= thr50))
        right_idx = p_peak_idx + right_rel
    except ValueError:
        right_idx = p_peak_idx
    fwhm_sec = (right_idx - left_idx) / fs_hz if right_idx > left_idx else np.nan

    # Work/impulse of power (area under curve) over the active window
    a = max(0, onset_idx)
    b = min(n - 1, max(offset_idx, p_peak_idx))
    auc_joules = float(np.trapezoid(np.nan_to_num(p[a:b+1], nan=0.0), dx=1.0/fs_hz))

    # timing “balance”: center of mass of power curve (0..1)
    # (earlier vs. later power concentration)
    weights = p[a:b+1].clip(min=0)
    if weights.sum() > 0:
        t_window = t[a:b+1]
        t_com = float(np.sum(t_window * weights) / np.sum(weights))
        t_com_norm = (t_com - t[a]) / max(1e-9, (t[b] - t[a]))
    else:
        t_com = np.nan
        t_com_norm = np.nan

    # variability around peak (local coefficient of variation in ±50 ms)
    w = int(0.05 * fs_hz)
    lo = max(0, p_peak_idx - w)
    hi = min(n, p_peak_idx + w + 1)
    local = p[lo:hi]
    cv_local = float(np.std(local) / np.mean(local)) if np.mean(local) > 0 else np.nan

    return {
        "n_samples": n,
        "fs_hz": fs_hz,
        "peak_power_w": p_peak,
        "time_to_peak_s": t_peak,
        "rise_time_10_90_s": float(rise_time),
        "rise_slope_w_per_s": float(rise_slope),
        "fwhm_s": float(fwhm_sec),
        "auc_j": auc_joules,
        "onset_idx": a,
        "offset_idx": b,
        "peak_idx": p_peak_idx,
        "t_com_s": t_com,
        "t_com_norm_0to1": t_com_norm,
        "cv_local_peak": cv_local,
        "i10_idx": int(i10) if isinstance(i10, int) else None,
        "i90_idx": int(i90) if isinstance(i90, int) else None,
        "left50_idx": left_idx,
        "right50_idx": right_idx,
    }

def plot_power_curve(power: pd.Series,
                     metrics: dict,
                     out_path: str,
                     title: str = "Power Curve",
                     annotate: bool = True):
    """
    Plot power vs. time with annotations (peak, 10–90 rise, FWHM, AUC window).
    """
    p = np.asarray(power, dtype=float)
    t = np.arange(p.size) / metrics["fs_hz"]

    fig, ax = plt.subplots(figsize=(6, 3.6))
    ax.plot(t, p, lw=2, label="Power")
    ax.grid(True)
    ax.set_xlabel("Time (s)")
    ax.set_ylabel("Power (W)")
    ax.set_title(title, pad=8, color="white")

    # annotate regions
    a, b = metrics["onset_idx"], metrics["offset_idx"]
    ax.axvspan(t[a], t[b], color="white", alpha=0.07, label="active window")

    # peak
    pk = metrics["peak_idx"]
    ax.plot([t[pk]], [p[pk]], "o", ms=6, color="tomato", label="Peak")
    ax.axhline(p[pk]*0.5, ls="--", lw=1, color="grey")
    ax.vlines([t[metrics["left50_idx"]], t[metrics["right50_idx"]]],
              ymin=0, ymax=p[pk]*0.5, linestyles="--", colors="grey", lw=1)

    # 10–90 rise (if available)
    if metrics["i10_idx"] is not None and metrics["i90_idx"] is not None:
        ax.plot([t[metrics["i10_idx"]], t[metrics["i90_idx"]]],
                [p[metrics["i10_idx"]], p[metrics["i90_idx"]]],
                lw=3, color="deepskyblue", label="10–90% rise")

    if annotate:
        txt = (f"Peak: {metrics['peak_power_w']:.1f} W @ {metrics['time_to_peak_s']:.3f} s"
               f"\nRise 10–90: {metrics['rise_time_10_90_s']:.3f} s"
               f"\nFWHM: {metrics['fwhm_s']:.3f} s"
               f"\nWork (AUC): {metrics['auc_j']:.1f} J"
               f"\nTiming COM: {metrics['t_com_norm_0to1']:.2f} (0 early…1 late)")
        ax.text(0.99, 0.02, txt, ha="right", va="bottom",
                transform=ax.transAxes, fontsize=9, color="white",
                bbox=dict(boxstyle="round,pad=0.25", facecolor="#181818", edgecolor="#444"))

    ax.legend(facecolor="black", edgecolor="grey", prop={"size": "small"}, labelcolor="grey")
    fig.tight_layout()
    fig.savefig(out_path, bbox_inches="tight")
    plt.close(fig)

def overlay_power_trials(traces: list[pd.Series],
                         fs_hz: float,
                         out_path: str,
                         title: str = "Power (all trials)",
                         align: str = "peak",          # "peak", "onset10", or "none"
                         window_s: tuple[float, float] | None = (0.30, 0.40),
                         show_mean: bool = True):
    """
    Overlay multiple power traces and align them in time.

    align:
      - "peak"    → align each trial's max power to t=0
      - "onset10" → align first sample ≥ 10% of that trial's peak to t=0
      - "none"    → no alignment; left edges at t=0

    window_s: (pre, post) seconds to show around t=0 (None to show full extent)
    """

    # --- convert input to arrays; find alignment index per trial --------------
    arrs = [np.asarray(s, dtype=float) for s in traces if len(s) > 0]
    if not arrs:
        raise ValueError("overlay_power_trials: no non-empty traces provided")

    def _align_index(x: np.ndarray) -> int:
        if align == "none":
            return 0
        # guard against NaNs
        if not np.any(np.isfinite(x)):
            return 0
        # peak index
        try:
            pk = int(np.nanargmax(x))
        except ValueError:
            pk = 0
        if align == "peak":
            return pk
        elif align == "onset10":
            thr = 0.10 * (x[pk] if np.isfinite(x[pk]) else np.nanmax(x))
            # first index >= 10% of that trial's peak
            idx = int(np.argmax(x >= thr)) if np.any(x >= thr) else 0
            return idx
        else:
            return 0

    align_idx = [ _align_index(x) for x in arrs ]

    # --- build a padded matrix so all alignment points land at the same column
    max_left  = max(align_idx)                                  # largest left padding needed
    right_len = [len(x) - i for x, i in zip(arrs, align_idx)]   # samples from align idx to end
    max_right = max(right_len)
    L = max_left + max_right                                     # total aligned length

    aligned = np.full((len(arrs), L), np.nan)
    for r, (x, i0) in enumerate(zip(arrs, align_idx)):
        start = max_left - i0
        aligned[r, start:start+len(x)] = x

    # --- time vector: t=0 at the common alignment column ---------------------
    t = (np.arange(L) - max_left) / fs_hz

    # --- optional cropping around t=0 ----------------------------------------
    if window_s is not None:
        pre, post = window_s
        i_lo = max(0, int(np.floor((-pre)  * fs_hz)) + max_left)
        i_hi = min(L, int(np.ceil( (post) * fs_hz)) + max_left)
        aligned = aligned[:, i_lo:i_hi]
        t = t[i_lo:i_hi]
        
    # --- plot ----------------------------------------------------------------
    fig, ax = plt.subplots(figsize=(6, 3.6))
    
    # consistent y-limits (same scale across trials)
    y_max = np.nanmax(aligned)
    y_min = np.nanmin(aligned)
    
    # NEW: mask-based plotting so left/right NaNs don't truncate the curve visually
    coverage_flags = []  # we'll use this to warn about very short traces
    for i in range(aligned.shape[0]):
        row = aligned[i]
        mask = np.isfinite(row)
        cov = np.count_nonzero(mask) / max(1, len(row))
        coverage_flags.append(cov)
        if np.count_nonzero(mask) >= 2:
            ax.plot(t[mask], row[mask], lw=1.2, alpha=0.6)
    
    # optional: highlight which trials are very short (<50% of the window)
    short = [idx for idx, c in enumerate(coverage_flags, start=1) if c < 0.5]
    if short:
        print(f"overlay_power_trials: {len(short)} trial(s) with <50% coverage in the window: {short}. "
              f"Consider changing window_s or checking the export files.")
    
    if show_mean:
        mean_curve = np.nanmean(aligned, axis=0)
        ax.plot(t, mean_curve, lw=2.2, color="cyan", label="Mean")
    
    # vertical line at alignment point
    ax.axvline(0.0, color="grey", lw=1, ls="--",
               label=("Aligned peak" if align == "peak" else "Aligned onset"))
    
    ax.grid(True)
    ax.set_xlabel("Time (s, aligned)")
    ax.set_ylabel("Power (W)")
    ax.set_title(title, pad=8, color="white")
    if show_mean:
        ax.legend(facecolor="black", edgecolor="grey", prop={"size": "small"}, labelcolor="grey")
    
    ax.set_ylim(y_min - 0.05 * abs(y_max - y_min), y_max + 0.05 * abs(y_max - y_min))
    
    fig.tight_layout()
    fig.savefig(out_path, bbox_inches="tight")
    plt.close(fig)

def find_power_files(movement: str,
                     base_dir: str = r'D:\Athletic Screen 2.0\Output Files') -> list[str]:
    """
    Finds exported power files for a movement. Supports:
      CMJ_Power.txt, CMJ1_Power.txt, CMJ2_Power.txt, ...
      DJ_Power.txt,  DJ*_Power.txt
      SLV_Power.txt, SLV*_Power.txt
    Returns a sorted (unique) list of full paths.
    """
    pats = [f"{movement}_Power.txt", f"{movement}*_Power.txt"]
    files = []
    for p in pats:
        files += globmod.glob(os.path.join(base_dir, p))
    return sorted(set(files))

def build_aligned_matrix(traces: list[pd.Series],
                         fs_hz: float,
                         align: str = "peak",
                         window_s: tuple[float, float] | None = (0.30, 0.40)):
    """
    Returns (aligned 2D array [n_trials x T], time vector [T], indices_info[list]).
    Time is centered so the chosen alignment point is at t=0.
    """
    arrs = [np.asarray(s, dtype=float) for s in traces if len(s) > 0]
    if not arrs:
        raise ValueError("build_aligned_matrix: no non-empty traces")

    def _align_idx(x: np.ndarray) -> int:
        if align == "none":
            return 0
        if not np.any(np.isfinite(x)):
            return 0
        pk = int(np.nanargmax(x))
        if align == "peak":
            return pk
        elif align == "onset10":
            thr = 0.10 * (x[pk] if np.isfinite(x[pk]) else np.nanmax(x))
            return int(np.argmax(x >= thr)) if np.any(x >= thr) else 0
        return 0

    aidx = [_align_idx(x) for x in arrs]
    max_left = max(aidx)
    right_len = [len(x) - i for x, i in zip(arrs, aidx)]
    max_right = max(right_len)
    L = max_left + max_right

    aligned = np.full((len(arrs), L), np.nan)
    for r, (x, i0) in enumerate(zip(arrs, aidx)):
        start = max_left - i0
        aligned[r, start:start+len(x)] = x

    t = (np.arange(L) - max_left) / fs_hz

    if window_s is not None:
        pre, post = window_s
        i_lo = max(0, int(np.floor((-pre)  * fs_hz)) + max_left)
        i_hi = min(L, int(np.ceil( (post) * fs_hz)) + max_left)
        return aligned[:, i_lo:i_hi], t[i_lo:i_hi], aidx
    return aligned, t, aidx

def mean_aligned_curve(traces: list[pd.Series],
                       fs_hz: float,
                       align: str = "peak",
                       window_s: tuple[float, float] | None = (0.30, 0.40)) -> pd.Series:
    aligned, t, _ = build_aligned_matrix(traces, fs_hz, align, window_s)
    mean_curve = np.nanmean(aligned, axis=0)
    return pd.Series(mean_curve, index=t, name="Power")

def analyze_power_curve_advanced(power: pd.Series, fs_hz: float = 1000.0) -> dict:
    """
    Extends your analyze_power_curve with additional, practical features:
      • rpd_max (max rate of power development) & time to RPDmax
      • AUC early (pre-peak) / late (post-peak), % early work
      • decay_90_10 on falling limb
      • skewness, kurtosis
      • spectral centroid (how ‘fast’ the curve is in frequency domain)
    """
    base = analyze_power_curve(power, fs_hz)
    p = np.asarray(power, dtype=float)
    n = p.size
    t = np.arange(n) / fs_hz

    # RPD
    dp = np.gradient(p, 1.0/fs_hz)
    rpd_max = float(np.nanmax(dp))
    rpd_idx = int(np.nanargmax(dp))
    base["rpd_max_w_per_s"]   = rpd_max
    base["time_to_rpd_max_s"] = rpd_idx / fs_hz

    # Early/late work around peak (use on/peak/off from base)
    a, b, pk = base["onset_idx"], base["offset_idx"], base["peak_idx"]
    auc_pre  = float(np.trapezoid(np.nan_to_num(p[a:pk+1],  nan=0.0), dx=1.0/fs_hz)) if pk >= a else np.nan
    auc_post = float(np.trapezoid(np.nan_to_num(p[pk:b+1], nan=0.0), dx=1.0/fs_hz)) if b >= pk else np.nan
    total    = (auc_pre if np.isfinite(auc_pre) else 0) + (auc_post if np.isfinite(auc_post) else 0)
    base["auc_pre_j"]      = auc_pre
    base["auc_post_j"]     = auc_post
    base["work_early_pct"] = float(100.0 * auc_pre / total) if total > 0 else np.nan

    # Decay time 90→10% of peak on falling limb
    peak_val = p[pk]
    fall = p[pk:]
    thr90 = 0.90 * peak_val
    thr10 = 0.10 * peak_val
    i90 = int(np.argmax(fall <= thr90)) if np.any(fall <= thr90) else 0
    i10 = int(np.argmax(fall <= thr10)) if np.any(fall <= thr10) else len(fall)-1
    base["decay_90_10_s"] = (i10 - i90) / fs_hz if i10 > i90 else np.nan

    # Shape stats
    finite = np.isfinite(p)
    base["skewness"] = float(stats.skew(p[finite])) if np.any(finite) else np.nan
    base["kurtosis"] = float(stats.kurtosis(p[finite], fisher=True)) if np.any(finite) else np.nan

    # Spectral centroid
    x = p - np.nanmean(p)
    X = np.abs(np.fft.rfft(np.nan_to_num(x)))
    freqs = np.fft.rfftfreq(x.size, d=1.0/fs_hz)
    base["spectral_centroid_hz"] = float(np.sum(freqs * X) / max(1e-12, np.sum(X)))

    return base

def add_power_analysis_section(doc: Document,
                               movement: str,
                               traces: list[pd.Series],
                               fs_hz: float,
                               tmpdirname: str,
                               reference_cursor,
                               reference_table: str):
    """
    1) Overlay (peaks aligned) + mean curve
    2) Annotated mean power curve
    3) Table of per-trial metrics + Mean/SD
    4) Adds reference percentile for peak power (vs PP_FORCEPLATE in ref DB)
    """
    # --- 1) Overlay (aligned at peak) ---------------------------------------
    overlay_png = os.path.join(tmpdirname, f"{movement}_power_overlay.png")
    overlay_power_trials(
        traces, fs_hz=fs_hz, out_path=overlay_png,
        title=f"{movement} Power – All Trials (peaks aligned)",
        align="peak", window_s=(0.30, 0.40), show_mean=True
    )
    doc.add_paragraph("Power Curves (aligned at peak)", style="Heading 2")
    doc.add_picture(overlay_png, width=Inches(6))

    # --- 2) Mean curve (aligned) + annotated plot ---------------------------
    mean_series = mean_aligned_curve(traces, fs_hz, align="peak", window_s=(0.30, 0.40))
    mean_metrics = analyze_power_curve_advanced(mean_series, fs_hz=fs_hz)

    mean_png = os.path.join(tmpdirname, f"{movement}_power_mean_annotated.png")
    plot_power_curve(mean_series, mean_metrics, mean_png,
                     title=f"{movement} – Mean Power (aligned)")
    doc.add_paragraph("Mean Power Curve (annotated)", style="Heading 3")
    doc.add_picture(mean_png, width=Inches(6))

    # --- 3) Per-trial metrics & summary -------------------------------------
    per = [analyze_power_curve_advanced(s, fs_hz=fs_hz) for s in traces]
    df  = pd.DataFrame(per)

    # Pick the important rows for the document (order here = row order in table)
    metric_rows = [
        ("Peak Power (W)",            "peak_power_w",        "{:.0f}"),
        ("Time to Peak (s)",          "time_to_peak_s",      "{:.3f}"),
        ("RPD max (W/s)",             "rpd_max_w_per_s",     "{:.0f}"),
        ("Time to RPD max (s)",       "time_to_rpd_max_s",   "{:.3f}"),
        ("Rise 10–90% (s)",           "rise_time_10_90_s",   "{:.3f}"),
        ("FWHM (s)",                  "fwhm_s",              "{:.3f}"),
        ("Work (AUC, J)",             "auc_j",               "{:.0f}"),
        ("Early work (%)",            "work_early_pct",      "{:.1f}"),
        ("Decay 90→10% (s)",          "decay_90_10_s",       "{:.3f}"),
        ("Timing CoM (0…1)",          "t_com_norm_0to1",     "{:.2f}"),
        ("Skewness",                  "skewness",            "{:.2f}"),
        ("Kurtosis",                  "kurtosis",            "{:.2f}"),
        ("Spectral centroid (Hz)",    "spectral_centroid_hz","{:.2f}"),
    ]

    # table: Metric | Trial1 | Trial2 | ... | Mean | SD
    tbl = doc.add_table(rows=1 + len(metric_rows), cols=2 + len(traces))
    tbl.style = "Light List" if "Light List" in [s.name for s in doc.styles] else tbl.style
    # header
    hdr = tbl.rows[0].cells
    hdr[0].text = "Metric"
    for i in range(len(traces)):
        hdr[1+i].text = f"Trial {i+1}"
    hdr[-1].text = "Mean ± SD"

    # body
    for r, (label, key, fmt) in enumerate(metric_rows, start=1):
        row_cells = tbl.rows[r].cells
        row_cells[0].text = label
        vals = df.get(key, pd.Series([np.nan]*len(traces))).values
        for i, v in enumerate(vals):
            row_cells[1+i].text = (fmt.format(v) if np.isfinite(v) else "—")
        mu = np.nanmean(vals)
        sd = np.nanstd(vals, ddof=1) if np.count_nonzero(np.isfinite(vals)) > 1 else np.nan
        row_cells[-1].text = (f"{fmt.format(mu)} ± {fmt.format(sd)}"
                              if np.isfinite(mu) and np.isfinite(sd) else
                              (fmt.format(mu) if np.isfinite(mu) else "—"))

    # small spacer
    doc.add_paragraph("")

    # --- 4) Reference percentile for peak power (vs. DB PP_FORCEPLATE) ------
    try:
        reference_cursor.execute(f"SELECT PP_FORCEPLATE FROM {reference_table} WHERE PP_FORCEPLATE IS NOT NULL")
        ref_pp = np.array([r[0] for r in reference_cursor.fetchall()], dtype=float)
    except sqlite3.OperationalError:
        ref_pp = np.array([])

    if ref_pp.size:
        # use the highest peak among trials (or mean peak if you prefer)
        trial_peaks = df["peak_power_w"].values
        best_peak   = float(np.nanmax(trial_peaks)) if trial_peaks.size else np.nan
        pctl        = percentile_vs_reference(best_peak, ref_pp) if np.isfinite(best_peak) else np.nan
        doc.add_paragraph(f"Reference percentile (peak power): {pctl:.1f}%", style="Intense Quote")

def percentile_vs_reference(value: float, reference_values: np.ndarray) -> float:
    """
    Wrapper for percentile (1..99) using scipy-like methodology.
    """
    return stats.percentileofscore(reference_values, value)

# Modified function to generate a histogram comparing left and right leg data
def generate_slv_histogram(variable, left_value, right_value,
                           reference_data, title, tmpdirname):
    """
    Blue bars  = reference distribution
    ─ green    = client LEFT (latest trial value you passed in)
    ─ orange   = client RIGHT

    Text box shows, for each side:
        • mean across all trials from the same assessment day
        • max across those trials
        • percentile of that mean vs. the reference distribution
    """
    import numpy as np, matplotlib.pyplot as plt, os, re, sqlite3

    # ── helper – get all trials for a given side on the same assessment day ──
    def _fetch_side_vals(side):
        # detect yyyy-mm-dd prefix in the first trial_name for this side
        client_cursor.execute(
            "SELECT trial_name FROM SLV WHERE name=? AND side=? LIMIT 1",
            (client_name, side)
        )
        row = client_cursor.fetchone()
        date_prefix = None
        if row and row[0]:
            m = re.match(r'(\d{4}[-_]\d{2}[-_]\d{2})', row[0])
            date_prefix = m.group(1) if m else None

        if date_prefix:
            q = f"SELECT {variable} FROM SLV WHERE name=? AND side=? AND trial_name LIKE ?"
            client_cursor.execute(q, (client_name, side, f'{date_prefix}%'))
        else:
            q = f"SELECT {variable} FROM SLV WHERE name=? AND side=?"
            client_cursor.execute(q, (client_name, side))

        return [r[0] for r in client_cursor.fetchall() if r[0] is not None]

    # pull all trials for each side; fall back to the single value passed in
    left_vals  = np.asarray(_fetch_side_vals('Left')  or [left_value],  dtype=float)
    right_vals = np.asarray(_fetch_side_vals('Right') or [right_value], dtype=float)

    left_mean,  left_max  = left_vals.mean(),  left_vals.max()
    right_mean, right_max = right_vals.mean(), right_vals.max()

    left_pct  = calculate_percentile(left_mean,  reference_data)
    right_pct = calculate_percentile(right_mean, reference_data)

    # ── plot ────────────────────────────────────────────────────────────────
    plt.figure(facecolor='#181818')
    ax = plt.subplot(111, facecolor='#303030')

    ax.hist(reference_data, bins=20, color='cornflowerblue',
            alpha=0.7, edgecolor='white', label='Reference')

    ax.axvline(left_value,  color='green',  ls='--', lw=2, label='Left (latest)')
    ax.axvline(right_value, color='orange', ls='--', lw=2, label='Right (latest)')

    ax.set_xlabel(variable.replace('_', ' '), color='slategrey')
    ax.set_ylabel('Frequency',               color='slategrey')
    ax.tick_params(axis='x', colors='lightgrey')
    ax.tick_params(axis='y', colors='lightgrey')
    ax.grid(color='dimgrey')

    txt = (
        f'LEFT  – mean: {left_mean:.2f}\n'
        f'        max:  {left_max:.2f}\n'
        f'        %ile: {left_pct:.1f}\n'
        f'RIGHT – mean: {right_mean:.2f}\n'
        f'        max:  {right_max:.2f}\n'
        f'        %ile: {right_pct:.1f}'
    )
    plt.text(0.95, 0.05, txt, ha='right', va='bottom',
             transform=ax.transAxes, color='white', fontsize=9,
             backgroundcolor='#181818')

    ax.legend(facecolor='black', edgecolor='grey',
              prop={'size': 'small'}, labelcolor='grey')

    out_path = os.path.join(tmpdirname, f'{variable}_histogram_slv.png')
    plt.savefig(out_path, bbox_inches='tight', facecolor='#181818')
    plt.close()
    return out_path

# Prepare the document
doc = Document()
doc.add_picture("8ctane Baseball - Black abd Blue BG.jpeg", width=Inches(4.0))  # Replace with your logo path
doc.paragraphs[-1].alignment = WD_ALIGN_PARAGRAPH.CENTER

# Adding player name and date
doc.add_paragraph(f"Player's Name: {client_name}")  # Replace client_name with dynamic value
doc.add_paragraph(f"Date: {date.today().strftime('%B %d, %Y')}")

# Create a temporary directory to store images
with tempfile.TemporaryDirectory() as tmpdirname:
    # List of movements to process
    movements = ['CMJ', 'PPU', 'DJ', 'SLV', 'NMT']
    
    for movement in movements:
        # Add movement title
        doc.add_paragraph(f"{movement} Report", style='Title')
        doc.add_paragraph(f"This section includes percentile reports and comparisons for {movement}.", style='Heading 2')

        if movement == 'CMJ':
            # Fetch CMJ data for the client
            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg,
                       Force_at_PP, Vel_at_PP
                FROM CMJ WHERE name = ?
            """, (client_name,))
            client_cmj_data = client_cursor.fetchone()
            reference_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg,
                       Force_at_PP, Vel_at_PP
                FROM CMJ
            """)
            reference_cmj_data = np.array(reference_cursor.fetchall())
            
            # --- CMJ power files & analysis ---
            cmj_files = find_power_files("CMJ")
            if cmj_files:
                cmj_traces = [load_power_txt(pf) for pf in cmj_files]
                add_power_analysis_section(
                    doc, movement="CMJ", traces=cmj_traces, fs_hz=1000,
                    tmpdirname=tmpdirname, reference_cursor=reference_cursor,
                    reference_table="CMJ"
                )

            # Ensure data exists before proceeding
            if client_cmj_data and reference_cmj_data.size > 0:
                # Generate bar graphs for each variable in CMJ
                variables = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg', 'Force_at_PP', 'Vel_at_PP']
                for i, var in enumerate(variables):
                    # Format the variable name by removing underscores
                    formatted_var = var.replace('_', ' ')
                    
                    # Add variable title before the graph
                    doc.add_paragraph(f"{formatted_var} Comparison", style='Heading 2')
                    
                    # Generate the bar graph and add to document
                    bar_image = generate_bar_graph(var, client_cmj_data[i], reference_cmj_data[:, i], f'{formatted_var} Comparison', tmpdirname)
                    doc.add_picture(bar_image, width=Inches(6))
                
                # Generate scatter plot for CMJ (Force_Peak_Power vs. Velo_Peak_Power)
                client_cmj_dict = {'Force_at_PP': client_cmj_data[3],
                   'Vel_at_PP':   client_cmj_data[4]}
                reference_cmj_dict = pd.DataFrame(reference_cmj_data, columns=variables)
                
                # Add scatter plot title and image
                doc.add_paragraph("Force vs. Velocity Scatter Plot", style='Heading 2')
                scatter_image = generate_scatter_plot(client_cmj_dict, reference_cmj_dict,
                                                      'Force_at_PP', 'Vel_at_PP',
                                                      'CMJ: Force vs. Velocity', tmpdirname)
                doc.add_picture(scatter_image, width=Inches(6))
                
            # ───────────────────────── PPU ─────────────────────────        
            elif movement == 'PPU':
                # Fetch PPU data for the client
                client_cursor.execute("""
                    SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg,
                           Force_at_PP, Vel_at_PP
                    FROM PPU WHERE name = ?
                """, (client_name,))
                client_ppu_data = client_cursor.fetchone()
    
                # Reference pull for PPU
                try:
                    reference_cursor.execute("""
                        SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg,
                               Force_at_PP, Vel_at_PP
                        FROM PPU
                    """)
                    reference_ppu_data = np.array(reference_cursor.fetchall(), dtype=float)
                except sqlite3.OperationalError:
                    reference_ppu_data = np.empty((0, 5), dtype=float)
    
                # --- PPU power files & analysis (if exported similarly to CMJ) ---
                ppu_files = find_power_files("PPU")   # expects PPU_Power*.txt pattern
                if ppu_files:
                    ppu_traces = [load_power_txt(pf) for pf in ppu_files]
                    add_power_analysis_section(
                        doc, movement="PPU", traces=ppu_traces, fs_hz=1000,
                        tmpdirname=tmpdirname, reference_cursor=reference_cursor,
                        reference_table="PPU"
                    )
    
                # Graphs and scatter if data exists
                if client_ppu_data is not None and reference_ppu_data.size > 0:
                    variables = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg', 'Force_at_PP', 'Vel_at_PP']
                    for i, var in enumerate(variables):
                        formatted_var = var.replace('_', ' ')
                        doc.add_paragraph(f"{formatted_var} Comparison (PPU)", style='Heading 2')
                        bar_image = generate_bar_graph(
                            var,
                            float(client_ppu_data[i]),
                            reference_ppu_data[:, i],
                            f'{formatted_var} Comparison (PPU)',
                            tmpdirname
                        )
                        doc.add_picture(bar_image, width=Inches(6))
    
                    # Force vs Velocity scatter for PPU
                    client_ppu_dict = {'Force_at_PP': client_ppu_data[3],
                                       'Vel_at_PP'  : client_ppu_data[4]}
                    reference_ppu_df = pd.DataFrame(reference_ppu_data, columns=variables)
                    doc.add_paragraph("Force vs. Velocity Scatter Plot (PPU)", style='Heading 2')
                    ppu_scatter = generate_scatter_plot(
                        client_ppu_dict,
                        reference_ppu_df if not reference_ppu_df.empty else pd.DataFrame(columns=variables),
                        'Force_at_PP', 'Vel_at_PP',
                        'PPU: Force vs. Velocity', tmpdirname
                    )
                    doc.add_picture(ppu_scatter, width=Inches(6))
                else:
                    print("PPU: missing client or reference data; skipping PPU figures.")

        # ───────────────────────── DJ ─────────────────────────        
        elif movement == 'DJ':
            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP, CT, RSI
                FROM DJ
                WHERE name = ?
            """, (client_name,))
            client_dj_data = client_cursor.fetchone()

            # ── REFERENCE (robust: pandas + print counts) ─────────────────────
            dj_vars = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg',
                       'Force_at_PP', 'Vel_at_PP', 'CT', 'RSI']
            ref_sql = f"SELECT {', '.join(dj_vars)} FROM DJ"
            reference_dj_df = pd.read_sql_query(ref_sql, reference_conn)
            print(f"DJ reference rows: {len(reference_dj_df)}")
            reference_dj_data = reference_dj_df.to_numpy(dtype=float, copy=False)

            # ── POWER OVERLAY (files like *DJ*_Power*.txt or DJ_Power*.txt) ───
            power_dir = r"D:\Athletic Screen 2.0\Output Files"
            dj_power_files = (sorted(globmod.glob(os.path.join(power_dir, "*DJ*_Power*.txt"))) or
                  sorted(globmod.glob(os.path.join(power_dir, "DJ_Power*.txt"))))
            # --- DJ power files & analysis ---
            dj_files = find_power_files("DJ")
            if dj_files:
                dj_traces = [load_power_txt(pf) for pf in dj_files]
                add_power_analysis_section(
                    doc, movement="DJ", traces=dj_traces, fs_hz=1000,
                    tmpdirname=tmpdirname, reference_cursor=reference_cursor,
                    reference_table="DJ"
                )

            # ── BAR GRAPHS + SCATTER (with reference) ─────────────────────────
            if client_dj_data:
                for i, var in enumerate(dj_vars):
                    doc.add_paragraph(f"{var.replace('_',' ')} Comparison", style='Heading 2')
                    ref_col = reference_dj_data[:, i] if len(reference_dj_df) else np.array([])
                    bar = generate_bar_graph(var, client_dj_data[i], ref_col,
                                             f'{var} Comparison', tmpdirname)
                    doc.add_picture(bar, width=Inches(6))

                client_dj_dict = {'Force_at_PP': client_dj_data[3],
                                  'Vel_at_PP'  : client_dj_data[4]}
                doc.add_paragraph("Force vs. Velocity Scatter Plot", style='Heading 2')
                dj_scatter = generate_scatter_plot(
                    client_dj_dict,
                    reference_dj_df if not reference_dj_df.empty else pd.DataFrame(columns=dj_vars),
                    'Force_at_PP', 'Vel_at_PP',
                    'DJ: Force vs. Velocity', tmpdirname
                )
                doc.add_picture(dj_scatter, width=Inches(6))
            else:
                print("No DJ client row found.")

        # ───────────────────────── SLV ───────────────────────        
        elif movement == 'SLV':
            # ── CLIENT ROWS (Left/Right) ───────────────────────────────────────
            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP
                FROM SLV WHERE name = ? AND side = 'Left'
            """, (client_name,))
            client_slvl_data = client_cursor.fetchone()

            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP
                FROM SLV WHERE name = ? AND side = 'Right'
            """, (client_name,))
            client_slvr_data = client_cursor.fetchone()

            # ── REFERENCE (keep side to filter/inspect if needed) ─────────────
            slv_vars_no_side = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg', 'Force_at_PP', 'Vel_at_PP']
            ref_sql = f"SELECT side, {', '.join(slv_vars_no_side)} FROM SLV"
            reference_slv_df = pd.read_sql_query(ref_sql, reference_conn)
            print(f"SLV reference rows: {len(reference_slv_df)}")

            # ── POWER OVERLAYS (Left & Right) ─────────────────────────────────
            # We try common patterns. Adjust to your actual export names if needed.
            power_dir = r"D:\Athletic Screen 2.0\Output Files"

            # Left trials
            slv_left_files = (sorted(globmod.glob(os.path.join(power_dir, "*SLVL*_Power*.txt"))) or
                  sorted(globmod.glob(os.path.join(power_dir, "SLV_Power_Left*.txt"))) or
                  sorted(globmod.glob(os.path.join(power_dir, "SLV_Left*_Power*.txt"))))

            # Right trials
            slv_right_files = (sorted(globmod.glob(os.path.join(power_dir, "*SLVR*_Power*.txt"))) or
                   sorted(globmod.glob(os.path.join(power_dir, "SLV_Power_Right*.txt"))) or
                   sorted(globmod.glob(os.path.join(power_dir, "SLV_Right*_Power*.txt"))))

            # --- SLV power files & analysis ---
            slv_files = find_power_files("SLV")
            if slv_files:
                slv_traces = [load_power_txt(pf) for pf in slv_files]
                add_power_analysis_section(
                    doc, movement="SLV", traces=slv_traces, fs_hz=1000,
                    tmpdirname=tmpdirname, reference_cursor=reference_cursor,
                    reference_table="SLV"
                )

            # ── HISTOGRAMS + SCATTER (with reference) ─────────────────────────
            if client_slvl_data and client_slvr_data and not reference_slv_df.empty:
                # Build name→value dicts so we don't rely on fragile positional indexes
                slv_cols = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg', 'Force_at_PP', 'Vel_at_PP']
                client_slvl = dict(zip(slv_cols, map(float, client_slvl_data)))
                client_slvr = dict(zip(slv_cols, map(float, client_slvr_data)))
            
                # We want JH, PP, Force@PP (fixed), and Vel@PP
                for var in ['JH_IN', 'PP_FORCEPLATE', 'Force_at_PP', 'Vel_at_PP']:
                    doc.add_paragraph(f"{var.replace('_',' ')} Comparison (Left vs Right)",
                                      style='Heading 2')
            
                    # Reference distribution by column name
                    ref_col = reference_slv_df[var].to_numpy(dtype=float, copy=False)
            
                    # Client values by column name (correct variables now)
                    left_val  = client_slvl.get(var, np.nan)
                    right_val = client_slvr.get(var, np.nan)
            
                    hist = generate_slv_histogram(
                        var,
                        left_val,            # Left
                        right_val,           # Right
                        ref_col,
                        f'{var} Comparison',
                        tmpdirname
                    )
                    doc.add_picture(hist, width=Inches(6))
            
                # Scatter Force vs Velocity (merge both sides)
                client_slv_all = np.array([list(client_slvl.values()),
                                           list(client_slvr.values())], dtype=float)
                client_slv_dict = {
                    'Force_at_PP': [client_slvl['Force_at_PP'], client_slvr['Force_at_PP']],
                    'Vel_at_PP'  : [client_slvl['Vel_at_PP'],   client_slvr['Vel_at_PP']],
                }
                ref_for_scatter = reference_slv_df[['Force_at_PP', 'Vel_at_PP']]
                doc.add_paragraph("Force vs. Velocity Scatter Plot", style='Heading 2')
                slv_scatter = generate_scatter_plot(
                    client_slv_dict,
                    ref_for_scatter,
                    'Force_at_PP', 'Vel_at_PP',
                    'SLV: Force vs. Velocity', tmpdirname
                )
                doc.add_picture(slv_scatter, width=Inches(6))
            else:
                if not (client_slvl_data and client_slvr_data):
                    print("⚠️  Missing SLV client data – skipping SLV graphs.")
                if reference_slv_df.empty:
                    print("⚠️  SLV reference pull returned 0 rows.")
    
        elif movement == 'NMT':
            # Fetch NMT data for the client (10s taps only)
            print("Entering NMT section…")
            client_cursor.execute("SELECT NUM_TAPS_10s FROM NMT WHERE name = ?", (client_name,))
            client_nmt_data = client_cursor.fetchone()
        
            reference_cursor.execute("SELECT NUM_TAPS_10s FROM NMT")
            reference_nmt_data = np.array(reference_cursor.fetchall(), dtype=float)
        
            print(f"NMT client row present: {client_nmt_data is not None}, reference rows: {len(reference_nmt_data)}")
        
            if client_nmt_data and reference_nmt_data.size > 0:
                nmt_var_label = 'NUM TAPS (10s)'
                doc.add_paragraph(f"{nmt_var_label} Comparison", style='Heading 2')
        
                # Pass table_hint="NMT" to avoid mis-inference
                nmt_image = generate_bar_graph(
                    'NUM_TAPS_10s',
                    float(client_nmt_data[0]),
                    reference_nmt_data[:, 0],
                    f'{nmt_var_label} Comparison',
                    tmpdirname          # ← stop here
                )
                doc.add_picture(nmt_image, width=Inches(6))
            else:
                print("NMT: missing client or reference data; skipping figure.")


# Function to convert DOCX to images
def docx_to_images(docx_path, output_dir):
    # Extract text from the DOCX file
    text = docx2txt.process(docx_path)
    
    # Split the text into lines
    lines = text.splitlines()

    # Create a blank image with white background
    img_width, img_height = 1000, 1500
    image = Image.new('RGB', (img_width, img_height), color='white')
    draw = ImageDraw.Draw(image)

    # Use a simple font
    try:
        font = ImageFont.truetype("arial.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    # Draw the text onto the image
    padding = 20
    y_text = padding
    for line in lines:
        if y_text + padding > img_height:
            # Save the image and start a new one if the text exceeds the page height
            img_path = os.path.join(output_dir, f"page_{int(y_text / img_height)}.png")
            image.save(img_path)
            y_text = padding
            image = Image.new('RGB', (img_width, img_height), color='white')
            draw = ImageDraw.Draw(image)

        # Calculate text size and draw it
        text_bbox = draw.textbbox((0, 0), line, font=font)
        text_width = text_bbox[2] - text_bbox[0]
        text_height = text_bbox[3] - text_bbox[1]

        draw.text((padding, y_text), line, font=font, fill="black")
        y_text += text_height + padding

    # Save the last image
    img_path = os.path.join(output_dir, "final_page.png")
    image.save(img_path)

    return img_path

doc.save(output_filename)          # ← only one final save
print(f"Document saved at: {output_filename}")

# Close connections
client_conn.close()
reference_conn.close()

# Example usage
img_output_directory = r'G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Images'
os.makedirs(img_output_directory, exist_ok=True)

# Convert DOCX to images
img_path = docx_to_images(output_filename, img_output_directory)
print(f"Images saved at {img_path}")

Databases opened successfully.
Client Name: Jalen Hollins
Document saved at: G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Athletic_Report_Jalen_Hollins.docx
Images saved at G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Images\final_page.png


In [3]:


import sqlite3
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from datetime import date
import tempfile
import docx2txt
from PIL import Image, ImageDraw, ImageFont
import os

# Corrected file paths with raw strings to handle backslashes properly
client_db_path = r'D:\Athletic Screen 2.0\Output Files\movement_database_v2.db'
reference_db_path = r'D:\Athletic Screen 2.0\Output Files\Athletic_Screen_All_data_v2.db'

# Ensure the paths are valid and accessible
if not os.path.exists(client_db_path):
    print(f"Client database not found at {client_db_path}")
if not os.path.exists(reference_db_path):
    print(f"Reference database not found at {reference_db_path}")

# Connect to the client and reference databases
client_conn = sqlite3.connect(client_db_path)
reference_conn = sqlite3.connect(reference_db_path)
client_cursor = client_conn.cursor()
reference_cursor = reference_conn.cursor()

print("Databases opened successfully.")

# Fetch the client's name from the database (assuming the 'name' column is in all tables)
client_cursor.execute("SELECT DISTINCT name FROM CMJ")  # Change table if necessary
client_name = client_cursor.fetchone()[0]  # Get the first row and first column
print(f"Client Name: {client_name}")

# ---------- build unique export paths (date-stamped, no overwrite) -----
client_cursor.execute("SELECT MAX(date) FROM CMJ WHERE name = ?", (client_name,))
assessment_date = client_cursor.fetchone()[0]          # e.g. '2025-05-22'
if not assessment_date:                                # fallback to today
    assessment_date = date.today().strftime("%Y-%m-%d")

reports_dir = r'G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports'
os.makedirs(reports_dir, exist_ok=True)

parts = client_name.split(', ')
client_name_rev = f"{parts[1]}_{parts[0]}" if len(parts) == 2 else client_name
base_name = f"Athletic_Report_{client_name_rev}_{assessment_date}_All Comp"

output_filename = os.path.join(reports_dir, base_name + ".docx")
img_output_directory = os.path.join(reports_dir, "Images", base_name)
os.makedirs(img_output_directory, exist_ok=True)

# auto-increment if the same file already exists
counter = 1
while os.path.exists(output_filename):
    output_filename = os.path.join(
        reports_dir, f"{base_name}_{counter}.docx"
    )
    img_output_directory = os.path.join(
        reports_dir, "Images", f"{base_name}_{counter}"
    )
    os.makedirs(img_output_directory, exist_ok=True)
    counter += 1
# ----------------------------------------------------------------------


# Helper function to calculate percentile
def calculate_percentile(value, reference_data):
    return stats.percentileofscore(reference_data, value)

# ─── UPDATED generate_bar_graph ────────────────────────────────────────────────
def generate_bar_graph(variable, client_value, reference_data, title, tmpdirname):
    """
    Blue bars  = reference distribution
    ─ red      = client MAX (best trial **within the same movement table**)
    ─ violet   = client MEAN (average of those trials)

    • No code outside this function needs to change.
    • If the caller still passes one score, that is fine; this function looks
      up any matching trials on the same assessment day and combines them.
    • RSI histograms use 0.25-wide bins so you see bars at 1.25, 1.50, 1.75 …
    """
    import numpy as np, matplotlib.pyplot as plt, os, sqlite3, re

    # ───────────── lookup: which movement table are we dealing with? ─────────
    column_table_map = {"CMJ": ["CMJ"], "DJ": ["DJ"], "SLV": ["SLV"], "NMT": ["NMT"]}
    table_guess = None
    for tbl in column_table_map:          # CMJ, DJ, SLV, NMT
        try:
            reference_cursor.execute(f"SELECT COUNT(*) FROM {tbl}")
            if reference_cursor.fetchone()[0] == len(reference_data):
                table_guess = tbl
                break
        except sqlite3.OperationalError:
            continue
    if table_guess is None:               # fallback
        for tbl in column_table_map:
            try:
                client_cursor.execute(f"SELECT 1 FROM {tbl} LIMIT 1")
                table_guess = tbl
                break
            except sqlite3.OperationalError:
                continue

    # ───────────── gather all trials for this athlete / table / day ──────────
    scores = []
    if table_guess:
        client_cursor.execute(
            f"SELECT trial_name FROM {table_guess} WHERE name=? LIMIT 1",
            (client_name,),
        )
        row = client_cursor.fetchone()
        date_prefix = None
        if row and row[0]:
            m = re.match(r"(\d{4}[-_]\d{2}[-_]\d{2})", row[0])
            date_prefix = m.group(1) if m else None

        if date_prefix:
            q = f"SELECT {variable} FROM {table_guess} WHERE name=? AND trial_name LIKE ?"
            client_cursor.execute(q, (client_name, f"{date_prefix}%"))
        else:
            q = f"SELECT {variable} FROM {table_guess} WHERE name=?"
            client_cursor.execute(q, (client_name,))

        scores = [r[0] for r in client_cursor.fetchall() if r[0] is not None]

    # fallback if still empty
    if not scores:
        scores = [client_value]

    scores = np.asarray(scores, dtype=float)
    c_mean = scores.mean()
    
    # --- choose the extreme we draw as the red line ---------------------------
    if variable.upper() == "CT":        # Contact-Time → use the LOWEST value
        c_extreme = scores.min()
        extreme_label = "Client Min"
    else:                               # every other metric → highest value
        c_extreme = scores.max()
        extreme_label = "Client Max"

    perc_mean = calculate_percentile(c_mean, reference_data)

    # ──────────────────────────── plotting begins ────────────────────────────
    plt.figure(facecolor="#181818")
    ax = plt.subplot(111, facecolor="#303030")

    reference_plotted = False  # flag to avoid double-plotting

    # ---------- RSI special case: 0.25-wide bars & custom ticks -------------
    if variable.upper() == "RSI":
        lo = np.floor(reference_data.min() / 0.25) * 0.25
        hi = np.ceil(reference_data.max() / 0.25) * 0.25
        bins = np.arange(lo, hi + 0.25, 0.25)      # bin edges
        centers = bins[:-1]                         # bar positions

        counts, _ = np.histogram(reference_data, bins=bins)
        ax.bar(
            centers,
            counts,
            width=0.25,
            align="edge",
            color="cornflowerblue",
            alpha=0.7,
            edgecolor="white",
            label="Reference",
        )
        # after counts, _ = np.histogram(...)
        for x, h in zip(centers, counts):
            if h == 0:                        # empty bin → draw thin outline
                ax.bar(x, 1e-6, width=.25, align='edge',
                       color='none', edgecolor='#404040', linewidth=.5)

        ax.set_xticks(centers)
        ax.set_xticklabels([f"{x:.2f}" for x in centers], color="lightgrey")

        reference_plotted = True  # we already drew the reference bars
    else:
        bins = 20  # default bin count

    # ---------- draw reference histogram when not plotted above -------------
    if not reference_plotted:
        ax.hist(
            reference_data,
            bins=bins,
            color="cornflowerblue",
            alpha=0.7,
            edgecolor="white",
            label="Reference",
        )

    # ---------- client mean / max lines -------------------------------------
    ax.axvline(c_extreme, color="red", ls="--", lw=2, label="Client Max")
    ax.axvline(c_mean, color="violet", ls="--", lw=2, label="Client Mean")

    # ---------- cosmetics ----------------------------------------------------
    ax.set_xlabel(variable.replace("_", " "), color="slategrey")
    ax.set_ylabel("Frequency", color="slategrey")
    ax.tick_params(axis="x", colors="lightgrey")
    ax.tick_params(axis="y", colors="lightgrey")
    ax.grid(color="dimgrey")

    txt = (
        f"Percentile (mean): {perc_mean:.1f}%"
        f"\nMean: {c_mean:.2f}"
        f"\nMax:  {c_extreme:.2f}"
    )
    plt.text(
        0.95,
        0.05,
        txt,
        ha="right",
        va="bottom",
        transform=ax.transAxes,
        color="white",
        fontsize=9,
        backgroundcolor="#181818",
    )

    ax.legend(facecolor="black", edgecolor="grey", prop={"size": "small"}, labelcolor="grey")

    # ---------- save ---------------------------------------------------------
    out_path = os.path.join(tmpdirname, f"{variable}_histogram.png")
    plt.savefig(out_path, bbox_inches="tight", facecolor="#181818")
    plt.close()
    return out_path

# Function to generate scatter plot for CMJ
def generate_scatter_plot(client_data, reference_data, x_var, y_var, title, tmpdirname):
    plt.figure(facecolor='#181818', figsize=(6, 6))
    ax = plt.subplot(111, facecolor='#303030')

    # Create scatter plot for reference data (cornflower blue)
    ax.scatter(reference_data[x_var], reference_data[y_var], label='Reference', alpha=0.5, color='cornflowerblue')

    # Create scatter plot for client data (red)
    ax.scatter(client_data[x_var], client_data[y_var], label='Client', color='red', edgecolors='black', s=100)

    # Set axis labels, replacing underscores with spaces
    ax.set_xlabel(x_var.replace('_', ' '), color='slategrey')
    ax.set_ylabel(y_var.replace('_', ' '), color='slategrey')

    # Dynamically set ticks and numbers to light grey
    ax.tick_params(axis='x', colors='lightgrey')
    ax.tick_params(axis='y', colors='lightgrey')

    # Add vertical and horizontal reference lines (light grey)
    ax.axvline(x=np.mean(reference_data[x_var]), color='lightgrey', linestyle='--', linewidth=1)
    ax.axhline(y=np.mean(reference_data[y_var]), color='lightgrey', linestyle='--', linewidth=1)

    # Customize grid style
    ax.grid(color='dimgrey')

    # Add legend
    ax.legend(facecolor='black', edgecolor='grey', prop={'size': 'small'}, labelcolor='grey')

    # Save scatter plot to file
    scatter_filename = os.path.join(tmpdirname, 'cmj_scatter.png')
    plt.savefig(scatter_filename, bbox_inches='tight', facecolor='#181818')
    plt.close()

    return scatter_filename

# Modified function to generate a histogram comparing left and right leg data
def generate_slv_histogram(variable, left_value, right_value,
                           reference_data, title, tmpdirname):
    """
    Blue bars  = reference distribution
    ─ green    = client LEFT (latest trial value you passed in)
    ─ orange   = client RIGHT

    Text box shows, for each side:
        • mean across all trials from the same assessment day
        • max across those trials
        • percentile of that mean vs. the reference distribution
    """
    import numpy as np, matplotlib.pyplot as plt, os, re, sqlite3

    # ── helper – get all trials for a given side on the same assessment day ──
    def _fetch_side_vals(side):
        # detect yyyy-mm-dd prefix in the first trial_name for this side
        client_cursor.execute(
            "SELECT trial_name FROM SLV WHERE name=? AND side=? LIMIT 1",
            (client_name, side)
        )
        row = client_cursor.fetchone()
        date_prefix = None
        if row and row[0]:
            m = re.match(r'(\d{4}[-_]\d{2}[-_]\d{2})', row[0])
            date_prefix = m.group(1) if m else None

        if date_prefix:
            q = f"SELECT {variable} FROM SLV WHERE name=? AND side=? AND trial_name LIKE ?"
            client_cursor.execute(q, (client_name, side, f'{date_prefix}%'))
        else:
            q = f"SELECT {variable} FROM SLV WHERE name=? AND side=?"
            client_cursor.execute(q, (client_name, side))

        return [r[0] for r in client_cursor.fetchall() if r[0] is not None]

    # pull all trials for each side; fall back to the single value passed in
    left_vals  = np.asarray(_fetch_side_vals('Left')  or [left_value],  dtype=float)
    right_vals = np.asarray(_fetch_side_vals('Right') or [right_value], dtype=float)

    left_mean,  left_max  = left_vals.mean(),  left_vals.max()
    right_mean, right_max = right_vals.mean(), right_vals.max()

    left_pct  = calculate_percentile(left_mean,  reference_data)
    right_pct = calculate_percentile(right_mean, reference_data)

    # ── plot ────────────────────────────────────────────────────────────────
    plt.figure(facecolor='#181818')
    ax = plt.subplot(111, facecolor='#303030')

    ax.hist(reference_data, bins=20, color='cornflowerblue',
            alpha=0.7, edgecolor='white', label='Reference')

    ax.axvline(left_value,  color='green',  ls='--', lw=2, label='Left (latest)')
    ax.axvline(right_value, color='orange', ls='--', lw=2, label='Right (latest)')

    ax.set_xlabel(variable.replace('_', ' '), color='slategrey')
    ax.set_ylabel('Frequency',               color='slategrey')
    ax.tick_params(axis='x', colors='lightgrey')
    ax.tick_params(axis='y', colors='lightgrey')
    ax.grid(color='dimgrey')

    txt = (
        f'LEFT  – mean: {left_mean:.2f}\n'
        f'        max:  {left_max:.2f}\n'
        f'        %ile: {left_pct:.1f}\n'
        f'RIGHT – mean: {right_mean:.2f}\n'
        f'        max:  {right_max:.2f}\n'
        f'        %ile: {right_pct:.1f}'
    )
    plt.text(0.95, 0.05, txt, ha='right', va='bottom',
             transform=ax.transAxes, color='white', fontsize=9,
             backgroundcolor='#181818')

    ax.legend(facecolor='black', edgecolor='grey',
              prop={'size': 'small'}, labelcolor='grey')

    out_path = os.path.join(tmpdirname, f'{variable}_histogram_slv.png')
    plt.savefig(out_path, bbox_inches='tight', facecolor='#181818')
    plt.close()
    return out_path

# Prepare the document
doc = Document()
doc.add_picture("8ctane Baseball - Black abd Blue BG.jpeg", width=Inches(4.0))  # Replace with your logo path
doc.paragraphs[-1].alignment = WD_ALIGN_PARAGRAPH.CENTER

# Adding player name and date
doc.add_paragraph(f"Player's Name: {client_name}")  # Replace client_name with dynamic value
doc.add_paragraph(f"Date: {date.today().strftime('%B %d, %Y')}")

# Create a temporary directory to store images
with tempfile.TemporaryDirectory() as tmpdirname:
    # List of movements to process
    movements = ['CMJ', 'DJ', 'SLV', 'NMT']
    
    for movement in movements:
        # Add movement title
        doc.add_paragraph(f"{movement} Report", style='Title')
        doc.add_paragraph(f"This section includes percentile reports and comparisons for {movement}.", style='Heading 2')

        if movement == 'CMJ':
            # Fetch CMJ data for the client
            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg,
                       Force_at_PP, Vel_at_PP
                FROM CMJ WHERE name = ?
            """, (client_name,))
            client_cmj_data = client_cursor.fetchone()
            reference_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg,
                       Force_at_PP, Vel_at_PP
                FROM CMJ
            """)
            reference_cmj_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_cmj_data and reference_cmj_data.size > 0:
                # Generate bar graphs for each variable in CMJ
                variables = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg', 'Force_at_PP', 'Vel_at_PP']
                for i, var in enumerate(variables):
                    # Format the variable name by removing underscores
                    formatted_var = var.replace('_', ' ')
                    
                    # Add variable title before the graph
                    doc.add_paragraph(f"{formatted_var} Comparison", style='Heading 2')
                    
                    # Generate the bar graph and add to document
                    bar_image = generate_bar_graph(var, client_cmj_data[i], reference_cmj_data[:, i], f'{formatted_var} Comparison', tmpdirname)
                    doc.add_picture(bar_image, width=Inches(6))
                
                # Generate scatter plot for CMJ (Force_Peak_Power vs. Velo_Peak_Power)
                client_cmj_dict = {'Force_at_PP': client_cmj_data[3],
                   'Vel_at_PP':   client_cmj_data[4]}
                reference_cmj_dict = pd.DataFrame(reference_cmj_data, columns=variables)
                
                # Add scatter plot title and image
                doc.add_paragraph("Force vs. Velocity Scatter Plot", style='Heading 2')
                scatter_image = generate_scatter_plot(client_cmj_dict, reference_cmj_dict,
                                                      'Force_at_PP', 'Vel_at_PP',
                                                      'CMJ: Force vs. Velocity', tmpdirname)
                doc.add_picture(scatter_image, width=Inches(6))
    
        # ───────────────────────── DJ ─────────────────────────
        elif movement == 'DJ':
            # ── pull client & reference rows ──────────────────
            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP, CT, RSI
                FROM DJ
                WHERE name = ?
            """, (client_name,))
            client_dj_data = client_cursor.fetchone()
    
            reference_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP, CT, RSI
                FROM DJ
            """)
            reference_dj_data = np.array(reference_cursor.fetchall())
    
            dj_vars = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg' ,'Force_at_PP', 'Vel_at_PP', 'CT', 'RSI']
    
            # ── bar graphs ────────────────────────────────────
            if client_dj_data and reference_dj_data.size:
                for i, var in enumerate(dj_vars):
                    doc.add_paragraph(f"{var.replace('_',' ')} Comparison",
                                      style='Heading 2')
                    bar = generate_bar_graph(
                        var,
                        client_dj_data[i],
                        reference_dj_data[:, i],
                        f'{var} Comparison',
                        tmpdirname
                    )
                    doc.add_picture(bar, width=Inches(6))
    
            # OPTIONAL: DJ force–velocity scatter (comment out if unwanted)
            client_dj_dict     = {'Force_at_PP': client_dj_data[3],
                                  'Vel_at_PP'  : client_dj_data[4]}
            reference_dj_dict  = pd.DataFrame(reference_dj_data,
                                              columns=dj_vars)
            doc.add_paragraph("Force vs. Velocity Scatter Plot",
                              style='Heading 2')
            dj_scatter = generate_scatter_plot(
                client_dj_dict, reference_dj_dict,
                'Force_at_PP', 'Vel_at_PP',
                'DJ: Force vs. Velocity', tmpdirname
            )
            doc.add_picture(dj_scatter, width=Inches(6))
    
        # ───────────────────────── SLV ───────────────────────
        elif movement == 'SLV':
            # pull left & right trials
            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP
                FROM SLV WHERE name = ? AND side = 'Left'
            """, (client_name,))
            client_slvl_data = client_cursor.fetchone()
    
            client_cursor.execute("""
                SELECT JH_IN,  PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP
                FROM SLV WHERE name = ? AND side = 'Right'
            """, (client_name,))
            client_slvr_data = client_cursor.fetchone()
    
            reference_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP FROM SLV
            """)
            reference_slv_data = np.array(reference_cursor.fetchall())
    
            slv_vars = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg',  'Force_at_PP', 'Vel_at_PP']
    
            if client_slvl_data and client_slvr_data and reference_slv_data.size:
                # ── left-vs-right histograms (JH & Force) ─────
                for idx, var in enumerate(['JH_IN', 'Force_at_PP']):
                    doc.add_paragraph(
                        f"{var.replace('_',' ')} Comparison (Left vs Right)",
                        style='Heading 2')
                    hist = generate_slv_histogram(
                        var,
                        client_slvl_data[idx],            # left value
                        client_slvr_data[idx],            # right value
                        reference_slv_data[:, idx],
                        f'{var} Comparison', tmpdirname
                    )
                    doc.add_picture(hist, width=Inches(6))
    
                # ── scatter Force vs Velocity ────────────────
                client_slv_all = np.array([client_slvl_data, client_slvr_data])
                client_slv_dict = {
                    'Force_at_PP': client_slv_all[:, 3],
                    'Vel_at_PP'  : client_slv_all[:, 4],
                }
                reference_slv_dict = pd.DataFrame(reference_slv_data,
                                                  columns=slv_vars)
    
                doc.add_paragraph("Force vs. Velocity Scatter Plot",
                                  style='Heading 2')
                slv_scatter = generate_scatter_plot(
                    client_slv_dict, reference_slv_dict,
                    'Force_at_PP', 'Vel_at_PP',
                    'SLV: Force vs. Velocity', tmpdirname
                )
                doc.add_picture(slv_scatter, width=Inches(6))
            else:
                print("⚠️  Missing SLV data – skipping SLV graphs")

        elif movement == 'NMT':
            # Fetch NMT data for the client (10s taps only)
            client_cursor.execute(f"SELECT NUM_TAPS_10s FROM NMT WHERE name = '{client_name}'")
            client_nmt_data = client_cursor.fetchone()
            reference_cursor.execute("SELECT NUM_TAPS_10s FROM NMT")
            reference_nmt_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_nmt_data and reference_nmt_data.size > 0:
                # Format the variable name by removing underscores
                nmt_var = 'NUM_TAPS_10s'.replace('_', ' ')
                
                # Add variable title before the graph
                doc.add_paragraph(f"{nmt_var} Comparison", style='Heading 2')
                
                # Generate histogram for NMT 10s taps
                nmt_image = generate_bar_graph('NUM_TAPS_10s', client_nmt_data[0], reference_nmt_data[:, 0], f'{nmt_var} Comparison', tmpdirname)
                doc.add_picture(nmt_image, width=Inches(6))

# Function to convert DOCX to images
def docx_to_images(docx_path, output_dir):
    # Extract text from the DOCX file
    text = docx2txt.process(docx_path)
    
    # Split the text into lines
    lines = text.splitlines()

    # Create a blank image with white background
    img_width, img_height = 1000, 1500
    image = Image.new('RGB', (img_width, img_height), color='white')
    draw = ImageDraw.Draw(image)

    # Use a simple font
    try:
        font = ImageFont.truetype("arial.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    # Draw the text onto the image
    padding = 20
    y_text = padding
    for line in lines:
        if y_text + padding > img_height:
            # Save the image and start a new one if the text exceeds the page height
            img_path = os.path.join(output_dir, f"page_{int(y_text / img_height)}.png")
            image.save(img_path)
            y_text = padding
            image = Image.new('RGB', (img_width, img_height), color='white')
            draw = ImageDraw.Draw(image)

        # Calculate text size and draw it
        text_bbox = draw.textbbox((0, 0), line, font=font)
        text_width = text_bbox[2] - text_bbox[0]
        text_height = text_bbox[3] - text_bbox[1]

        draw.text((padding, y_text), line, font=font, fill="black")
        y_text += text_height + padding

    # Save the last image
    img_path = os.path.join(output_dir, "final_page.png")
    image.save(img_path)

    return img_path

doc.save(output_filename)          # ← only one final save
print(f"Document saved at: {output_filename}")

# Close connections
client_conn.close()
reference_conn.close()

# Example usage
img_output_directory = r'G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Images'
os.makedirs(img_output_directory, exist_ok=True)

# Convert DOCX to images
img_path = docx_to_images(output_filename, img_output_directory)
print(f"Images saved at {img_path}")

Databases opened successfully.
Client Name: Jalen Hollins
Document saved at: G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Athletic_Report_Jalen_Hollins_All_Comp.docx
Images saved at G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Images\final_page.png


In [22]:
import sqlite3
import time
import os

# Paths to the source and target databases
source_db_path = 'D:/Athletic Screen 2.0/Output Files/movement_database_v2.db'
output_folder = 'D:/Athletic Screen 2.0/Output Files/'
target_databases = ['Athletic_Screen_Pro_data_v2.db']
all_data_db_path = os.path.join(output_folder, 'Athletic_Screen_All_data_v2.db')

# Retry mechanism for handling the locked database error
def retry_execute(func):
    retries = 5  # Number of retries
    while retries > 0:
        try:
            func()
            break
        except sqlite3.OperationalError as e:
            if 'database is locked' in str(e):
                print("Database is locked, retrying...")
                time.sleep(1)  # Wait for 1 second before retrying
                retries -= 1
            else:
                raise e
        if retries == 0:
            raise Exception("Max retries reached. Database is still locked.")

# Table schemas to create in the target databases and the combined database
table_schemas = {
    'CMJ': '''CREATE TABLE IF NOT EXISTS CMJ (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT,
                trial_name TEXT,
                JH_IN REAL,
                Peak_Power REAL,
                PP_FORCEPLATE REAL,
                Force_at_PP REAL,
                Vel_at_PP REAL,
                PP_W_per_kg REAL
              )''',

    'DJ':  '''CREATE TABLE IF NOT EXISTS DJ (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT,
                trial_name TEXT,
                JH_IN REAL,
                Peak_Power REAL,
                PP_FORCEPLATE REAL,
                Force_at_PP REAL,
                Vel_at_PP REAL,
                PP_W_per_kg REAL,
                CT REAL,
                RSI REAL
              )''',

    'SLV': '''CREATE TABLE IF NOT EXISTS SLV (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT, 
                trial_name TEXT,
                side TEXT,
                JH_IN REAL,
                PP_FORCEPLATE REAL,
                Force_at_PP REAL,
                Vel_at_PP REAL,
                PP_W_per_kg REAL
              )''',
    'NMT': '''CREATE TABLE IF NOT EXISTS NMT (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT,
                trial_name TEXT,
                NUM_TAPS_10s REAL,
                NUM_TAPS_20s REAL,
                NUM_TAPS_30s REAL,
                NUM_TAPS REAL
              )'''
}
# Function to create tables in a database connection
def create_tables(conn):
    cursor = conn.cursor()
    for schema in table_schemas.values():
        cursor.execute(schema)
    conn.commit()

# Open the source database
source_conn = sqlite3.connect(source_db_path, timeout=10)
source_cursor = source_conn.cursor()

# Create connections to all target databases and combined database
target_conns = {db_name: sqlite3.connect(os.path.join(output_folder, db_name), timeout=10) for db_name in target_databases}
target_conns['all'] = sqlite3.connect(all_data_db_path, timeout=10)

# Ensure tables exist in each database
for conn in target_conns.values():
    create_tables(conn)

# Function to copy data from one table in the source to target databases
def copy_table_data(table_name):
    # Fetch all data except the 'id' column from the source table
    source_cursor.execute(f"SELECT * FROM {table_name}")
    rows = source_cursor.fetchall()

    # For each row, insert it into each target database
    for db_name, conn in target_conns.items():
        cursor = conn.cursor()
        placeholders = ", ".join(["?"] * len(rows[0][1:]))  # Skips the first 'id' column
        query = f"INSERT INTO {table_name} ({', '.join([desc[0] for desc in source_cursor.description][1:])}) VALUES ({placeholders})"
        retry_execute(lambda: cursor.executemany(query, [row[1:] for row in rows]))  # Exclude 'id' column for insertion
        conn.commit()
        print(f"Copied {len(rows)} rows to {table_name} in {db_name}")

# List of table names to copy data
tables_to_copy = ['CMJ', 'DJ', 'SLV', 'NMT']

# Copy data from each table
for table in tables_to_copy:
    copy_table_data(table)

# Close all connections
source_conn.close()
for conn in target_conns.values():
    conn.close()

print("Data successfully copied to each target and combined database.")

# Path to the folder containing ASCII .txt files
ascii_folder = r"D:/Athletic Screen 2.0/Output Files/"

# Remove all .txt files in the ascii_folder
for filename in os.listdir(ascii_folder):
    if filename.lower().endswith(".txt"):
        file_path = os.path.join(ascii_folder, filename)
        try:
            os.remove(file_path)
            print(f"Deleted: {file_path}")
        except Exception as e:
            print(f"Failed to delete {file_path}: {e}")

print("All ASCII .txt files cleared after ingestion.")


Copied 3 rows to CMJ in Athletic_Screen_Pro_data_v2.db
Copied 3 rows to CMJ in all
Copied 3 rows to DJ in Athletic_Screen_Pro_data_v2.db
Copied 3 rows to DJ in all
Copied 6 rows to SLV in Athletic_Screen_Pro_data_v2.db
Copied 6 rows to SLV in all
Copied 1 rows to NMT in Athletic_Screen_Pro_data_v2.db
Copied 1 rows to NMT in all
Data successfully copied to each target and combined database.


In [1]:
import shutil

# Paths
db_source_paths = [
    r"D:/Athletic Screen 2.0/Output Files/movement_database_v2.db",
    r"D:/Athletic Screen 2.0/Output Files/Athletic_Screen_All_data_v2.db",
    r"D:/Athletic Screen 2.0/Output Files/Athletic_Screen_Pro_data_v2.db"
]
destination_folder = r"G:/My Drive/Data/Athletic Screen Data"

# Copy each DB file to the destination
for db_path in db_source_paths:
    try:
        shutil.copy(db_path, destination_folder)
        print(f"Copied {db_path} → {destination_folder}")
    except Exception as e:
        print(f"Failed to copy {db_path}: {e}")


Copied D:/Athletic Screen 2.0/Output Files/movement_database_v2.db → G:/My Drive/Data/Athletic Screen Data
Copied D:/Athletic Screen 2.0/Output Files/Athletic_Screen_All_data_v2.db → G:/My Drive/Data/Athletic Screen Data
Copied D:/Athletic Screen 2.0/Output Files/Athletic_Screen_Pro_data_v2.db → G:/My Drive/Data/Athletic Screen Data
