In [21]:
import sqlite3
import os
import re

# Path to the folder containing your txt files
folder_path = 'D:/Athletic Screen 2.0/Output Files/'
db_path = 'D:/Athletic Screen 2.0/Output Files/movement_database_v2.db'

# Delete the database file if it exists to start fresh
if os.path.exists(db_path):
    os.remove(db_path)
    print(f"Deleted existing database at {db_path}")

# Connect to the SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Define the corrected table schemas for each movement
table_schemas = {
    'CMJ': '''CREATE TABLE IF NOT EXISTS CMJ (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT,
                trial_name TEXT,
                JH_IN REAL,
                Peak_Power REAL,
                PP_FORCEPLATE REAL,
                Force_at_PP REAL,
                Vel_at_PP REAL,
                PP_W_per_kg REAL
              )''',

    'DJ':  '''CREATE TABLE IF NOT EXISTS DJ (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT,
                trial_name TEXT,
                JH_IN REAL,
                Peak_Power REAL,
                PP_FORCEPLATE REAL,
                Force_at_PP REAL,
                Vel_at_PP REAL,
                PP_W_per_kg REAL,
                CT REAL,
                RSI REAL
              )''',

    'SLV': '''CREATE TABLE IF NOT EXISTS SLV (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT, 
                trial_name TEXT,
                side TEXT,
                JH_IN REAL,
                PP_FORCEPLATE REAL,
                Force_at_PP REAL,
                Vel_at_PP REAL,
                PP_W_per_kg REAL
              )''',
    'NMT': '''CREATE TABLE IF NOT EXISTS NMT (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT, 
                trial_name TEXT,
                NUM_TAPS_10s REAL,
                NUM_TAPS_20s REAL,
                NUM_TAPS_30s REAL,
                NUM_TAPS REAL
              )'''
}

# Create the tables in the database (if they don't exist)
for schema in table_schemas.values():
    cursor.execute(schema)

# Function to extract the client's name from the first line of the file
def extract_name(line):
    match = re.search(r'Data\\(.*?)[_\\]', line)
    if match:
        return match.group(1)
    return None

def extract_date(line):
    """
    Looks for a segment like 2025-05-22_ in the first-line path returned by Cortex.
    Returns '2025-05-22' or None if not found.
    """
    m = re.search(r'\\(\d{4}-\d{2}-\d{2})_', line)
    return m.group(1) if m else None
# ─────────────────────────────────────────────────────────────────────────────

# Function to insert data into the appropriate table
def insert_data_into_table(table_name, name, trial_name, variables):
    """
    `variables` still contains the leading dummy “1”.
    After we drop that each txt gives:
        CMJ : 8 numbers
        DJ  : 8 numbers
        SLV : 6 numbers
        NMT : 4 numbers
    We pick only the columns we store.
    """
    v = variables[1:]          # drop the leading “1”

    if table_name == 'CMJ':
        # keep indices 0,1,4,5,6,7  (→ six values)
        vals = [v[i] for i in (0, 1, 4, 5, 6, 7)]
        cursor.execute(
            """INSERT INTO CMJ
               (name, date, trial_name,
                JH_IN, Peak_Power,
                PP_FORCEPLATE, Force_at_PP, Vel_at_PP,
                PP_W_per_kg)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (name, date, trial_name, *vals)
        )

    elif table_name == 'DJ':
        # keep every value (8 numbers)
        cursor.execute(
            """INSERT INTO DJ
               (name, date, trial_name,
                JH_IN, Peak_Power,
                PP_FORCEPLATE, Force_at_PP, Vel_at_PP,
                CT, RSI, PP_W_per_kg)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (name, date, trial_name, *v)        # 8 numbers + 2 strings = 10
        )

    elif table_name == 'SLV':
        side = 'Left' if 'SLVL' in trial_name else 'Right'
        # keep indices 0,2,3,4,5  (→ five values)
        vals = [v[i] for i in (0, 2, 3, 4, 5)]
        cursor.execute(
            """INSERT INTO SLV
               (name, date, trial_name, side,
                JH_IN,
                PP_FORCEPLATE, Force_at_PP, Vel_at_PP,
                PP_W_per_kg)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (name, date, trial_name, side, *vals)
        )

    elif table_name == 'NMT':
        cursor.execute(
            """INSERT INTO NMT
               (name, date, trial_name,
                NUM_TAPS_10s, NUM_TAPS_20s, NUM_TAPS_30s, NUM_TAPS)
               VALUES (?, ?, ?, ?, ?, ?, ?)""",
            (name, date, trial_name, *v)
        )
# Loop through the txt files in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith('.txt'):
        trial_name = os.path.splitext(file_name)[0]
        
        # Determine which table the file belongs to
        if 'CMJ' in trial_name:
            table_name = 'CMJ'
        elif 'DJ' in trial_name:
            table_name = 'DJ'
        elif 'SLVL' in trial_name or 'SLVR' in trial_name:
            table_name = 'SLV'
        elif 'NMT' in trial_name:
            table_name = 'NMT'
        else:
            continue  # Skip any files that don't match the naming pattern

        # Load the data from the txt file
        file_path = os.path.join(folder_path, file_name)
        try:
            with open(file_path, 'r') as f:
                # Extract the name from the first line
                first_line = f.readline().strip()
                name = extract_name(first_line)
                date = extract_date(first_line)

                # Print the extracted name to verify
                print(f"File: {file_name}, Extracted Name: {name}")

                if not name:
                    print(f"Name extraction failed for {file_name}, skipping.")
                    continue

                # Read all lines until we find the line with the actual numeric data
                # --- replace the old for-loop (line_num, line) with this: -------------------
                for line in f:
                    line = line.strip()
                    if not line:
                        continue
                
                    if re.match(r'^[-+]?\d', line):          # first real numeric row
                        variables = [float(v) for v in line.split()]
                        print(f"Processing file: {file_name}, Variables: {variables}")
                        insert_data_into_table(table_name, name, trial_name, variables)
                        break


        except Exception as e:
            print(f"Unexpected error with file {file_name}: {e}")

# Commit the changes and close the connection
conn.commit()
conn.close()

print("Data successfully inserted into the database.")


Deleted existing database at D:/Athletic Screen 2.0/Output Files/movement_database_v2.db
File: CMJ1.txt, Extracted Name: Zach, Vennaro
Processing file: CMJ1.txt, Variables: [1.0, 15.5, 516.0, 2.27, 228.0, 1196.5, 2318.52, 516.07, 14.96]
File: SLVL1.txt, Extracted Name: Zach, Vennaro
Processing file: SLVL1.txt, Variables: [1.0, 9.6, 6216.0, 944.8, 2044.4, 462.1, 11.81]
File: CMJ2.txt, Extracted Name: Zach, Vennaro
Processing file: CMJ2.txt, Variables: [1.0, 16.6, 523.0, 2.38, 219.0, 1207.8, 2307.87, 523.32, 15.1]
File: CMJ3.txt, Extracted Name: Zach, Vennaro
Processing file: CMJ3.txt, Variables: [1.0, 15.5, 516.0, 2.27, 228.0, 1196.5, 2318.52, 516.07, 14.96]
File: DJ1.txt, Extracted Name: Zach, Vennaro
Processing file: DJ1.txt, Variables: [1.0, 19.8, 2187.0, 1894.7, 2686.37, 705.3, 0.63, 1.6, 23.68]
File: DJ2.txt, Extracted Name: Zach, Vennaro
Processing file: DJ2.txt, Variables: [1.0, 18.2, 1750.0, 1871.3, 2507.22, 746.35, 0.67, 1.38, 23.39]
File: DJ3.txt, Extracted Name: Zach, Vennaro

In [2]:
import sqlite3
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from datetime import date
import tempfile
import docx2txt
from PIL import Image, ImageDraw, ImageFont
import os

# Corrected file paths with raw strings to handle backslashes properly
client_db_path = r'D:\Athletic Screen 2.0\Output Files\movement_database_v2.db'
reference_db_path = r'D:\Athletic Screen 2.0\Output Files\Athletic_Screen_Pro_data_v2.db'

# Ensure the paths are valid and accessible
if not os.path.exists(client_db_path):
    print(f"Client database not found at {client_db_path}")
if not os.path.exists(reference_db_path):
    print(f"Reference database not found at {reference_db_path}")

# Connect to the client and reference databases
client_conn = sqlite3.connect(client_db_path)
reference_conn = sqlite3.connect(reference_db_path)
client_cursor = client_conn.cursor()
reference_cursor = reference_conn.cursor()

print("Databases opened successfully.")

# Fetch the client's name from the database (assuming the 'name' column is in all tables)
client_cursor.execute("SELECT DISTINCT name FROM CMJ")  # Change table if necessary
client_name = client_cursor.fetchone()[0]  # Get the first row and first column
print(f"Client Name: {client_name}")

# ---------- build the final DOCX path ---------------------------------
output_dir = r'G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports'
os.makedirs(output_dir, exist_ok=True)

parts = client_name.split(', ')
client_name_rev = f"{parts[1]} {parts[0]}" if len(parts) == 2 else client_name
output_filename = os.path.join(
    output_dir, f"Athletic_Report_{client_name_rev.replace(' ', '_')}.docx"
)
# ----------------------------------------------------------------------

# Helper function to calculate percentile
def calculate_percentile(value, reference_data):
    return stats.percentileofscore(reference_data, value)

# ─── UPDATED generate_bar_graph ────────────────────────────────────────────────
def generate_bar_graph(variable, client_value, reference_data, title, tmpdirname):
    """
    Blue bars  = reference distribution
    ─ red      = client MAX (best trial **within the same movement table**)
    ─ violet   = client MEAN (average of those trials)

    • No code outside this function needs to change.
    • If the caller still passes one score, that is fine; this function looks
      up any matching trials on the same assessment day and combines them.
    • RSI histograms use 0.25-wide bins so you see bars at 1.25, 1.50, 1.75 …
    """
    import numpy as np, matplotlib.pyplot as plt, os, sqlite3, re

    # ───────────── lookup: which movement table are we dealing with? ─────────
    column_table_map = {"CMJ": ["CMJ"], "DJ": ["DJ"], "SLV": ["SLV"], "NMT": ["NMT"]}
    table_guess = None
    for tbl in column_table_map:          # CMJ, DJ, SLV, NMT
        try:
            reference_cursor.execute(f"SELECT COUNT(*) FROM {tbl}")
            if reference_cursor.fetchone()[0] == len(reference_data):
                table_guess = tbl
                break
        except sqlite3.OperationalError:
            continue
    if table_guess is None:               # fallback
        for tbl in column_table_map:
            try:
                client_cursor.execute(f"SELECT 1 FROM {tbl} LIMIT 1")
                table_guess = tbl
                break
            except sqlite3.OperationalError:
                continue

    # ───────────── gather all trials for this athlete / table / day ──────────
    scores = []
    if table_guess:
        client_cursor.execute(
            f"SELECT trial_name FROM {table_guess} WHERE name=? LIMIT 1",
            (client_name,),
        )
        row = client_cursor.fetchone()
        date_prefix = None
        if row and row[0]:
            m = re.match(r"(\d{4}[-_]\d{2}[-_]\d{2})", row[0])
            date_prefix = m.group(1) if m else None

        if date_prefix:
            q = f"SELECT {variable} FROM {table_guess} WHERE name=? AND trial_name LIKE ?"
            client_cursor.execute(q, (client_name, f"{date_prefix}%"))
        else:
            q = f"SELECT {variable} FROM {table_guess} WHERE name=?"
            client_cursor.execute(q, (client_name,))

        scores = [r[0] for r in client_cursor.fetchall() if r[0] is not None]

    # fallback if still empty
    if not scores:
        scores = [client_value]

    scores = np.asarray(scores, dtype=float)
    c_max, c_mean = scores.max(), scores.mean()
    perc_mean = calculate_percentile(c_mean, reference_data)

    # ──────────────────────────── plotting begins ────────────────────────────
    plt.figure(facecolor="#181818")
    ax = plt.subplot(111, facecolor="#303030")

    reference_plotted = False  # flag to avoid double-plotting

    # ---------- RSI special case: 0.25-wide bars & custom ticks -------------
    if variable.upper() == "RSI":
        lo = np.floor(reference_data.min() / 0.25) * 0.25
        hi = np.ceil(reference_data.max() / 0.25) * 0.25
        bins = np.arange(lo, hi + 0.25, 0.25)      # bin edges
        centers = bins[:-1]                         # bar positions

        counts, _ = np.histogram(reference_data, bins=bins)
        ax.bar(
            centers,
            counts,
            width=0.25,
            align="edge",
            color="cornflowerblue",
            alpha=0.7,
            edgecolor="white",
            label="Reference",
        )
        # after counts, _ = np.histogram(...)
        for x, h in zip(centers, counts):
            if h == 0:                        # empty bin → draw thin outline
                ax.bar(x, 1e-6, width=.25, align='edge',
                       color='none', edgecolor='#404040', linewidth=.5)

        ax.set_xticks(centers)
        ax.set_xticklabels([f"{x:.2f}" for x in centers], color="lightgrey")

        reference_plotted = True  # we already drew the reference bars
    else:
        bins = 20  # default bin count

    # ---------- draw reference histogram when not plotted above -------------
    if not reference_plotted:
        ax.hist(
            reference_data,
            bins=bins,
            color="cornflowerblue",
            alpha=0.7,
            edgecolor="white",
            label="Reference",
        )

    # ---------- client mean / max lines -------------------------------------
    ax.axvline(c_max, color="red", ls="--", lw=2, label="Client Max")
    ax.axvline(c_mean, color="violet", ls="--", lw=2, label="Client Mean")

    # ---------- cosmetics ----------------------------------------------------
    ax.set_xlabel(variable.replace("_", " "), color="slategrey")
    ax.set_ylabel("Frequency", color="slategrey")
    ax.tick_params(axis="x", colors="lightgrey")
    ax.tick_params(axis="y", colors="lightgrey")
    ax.grid(color="dimgrey")

    txt = (
        f"Percentile (mean): {perc_mean:.1f}%"
        f"\nMean: {c_mean:.2f}"
        f"\nMax:  {c_max:.2f}"
    )
    plt.text(
        0.95,
        0.05,
        txt,
        ha="right",
        va="bottom",
        transform=ax.transAxes,
        color="white",
        fontsize=9,
        backgroundcolor="#181818",
    )

    ax.legend(facecolor="black", edgecolor="grey", prop={"size": "small"}, labelcolor="grey")

    # ---------- save ---------------------------------------------------------
    out_path = os.path.join(tmpdirname, f"{variable}_histogram.png")
    plt.savefig(out_path, bbox_inches="tight", facecolor="#181818")
    plt.close()
    return out_path

# Function to generate scatter plot for CMJ
def generate_scatter_plot(client_data, reference_data, x_var, y_var, title, tmpdirname):
    plt.figure(facecolor='#181818', figsize=(6, 6))
    ax = plt.subplot(111, facecolor='#303030')

    # Create scatter plot for reference data (cornflower blue)
    ax.scatter(reference_data[x_var], reference_data[y_var], label='Reference', alpha=0.5, color='cornflowerblue')

    # Create scatter plot for client data (red)
    ax.scatter(client_data[x_var], client_data[y_var], label='Client', color='red', edgecolors='black', s=100)

    # Set axis labels, replacing underscores with spaces
    ax.set_xlabel(x_var.replace('_', ' '), color='slategrey')
    ax.set_ylabel(y_var.replace('_', ' '), color='slategrey')

    # Dynamically set ticks and numbers to light grey
    ax.tick_params(axis='x', colors='lightgrey')
    ax.tick_params(axis='y', colors='lightgrey')

    # Add vertical and horizontal reference lines (light grey)
    ax.axvline(x=np.mean(reference_data[x_var]), color='lightgrey', linestyle='--', linewidth=1)
    ax.axhline(y=np.mean(reference_data[y_var]), color='lightgrey', linestyle='--', linewidth=1)

    # Customize grid style
    ax.grid(color='dimgrey')

    # Add legend
    ax.legend(facecolor='black', edgecolor='grey', prop={'size': 'small'}, labelcolor='grey')

    # Save scatter plot to file
    scatter_filename = os.path.join(tmpdirname, 'cmj_scatter.png')
    plt.savefig(scatter_filename, bbox_inches='tight', facecolor='#181818')
    plt.close()

    return scatter_filename

# Modified function to generate a histogram comparing left and right leg data
def generate_slv_histogram(variable, left_value, right_value,
                           reference_data, title, tmpdirname):
    """
    Blue bars  = reference distribution
    ─ green    = client LEFT (latest trial value you passed in)
    ─ orange   = client RIGHT

    Text box shows, for each side:
        • mean across all trials from the same assessment day
        • max across those trials
        • percentile of that mean vs. the reference distribution
    """
    import numpy as np, matplotlib.pyplot as plt, os, re, sqlite3

    # ── helper – get all trials for a given side on the same assessment day ──
    def _fetch_side_vals(side):
        # detect yyyy-mm-dd prefix in the first trial_name for this side
        client_cursor.execute(
            "SELECT trial_name FROM SLV WHERE name=? AND side=? LIMIT 1",
            (client_name, side)
        )
        row = client_cursor.fetchone()
        date_prefix = None
        if row and row[0]:
            m = re.match(r'(\d{4}[-_]\d{2}[-_]\d{2})', row[0])
            date_prefix = m.group(1) if m else None

        if date_prefix:
            q = f"SELECT {variable} FROM SLV WHERE name=? AND side=? AND trial_name LIKE ?"
            client_cursor.execute(q, (client_name, side, f'{date_prefix}%'))
        else:
            q = f"SELECT {variable} FROM SLV WHERE name=? AND side=?"
            client_cursor.execute(q, (client_name, side))

        return [r[0] for r in client_cursor.fetchall() if r[0] is not None]

    # pull all trials for each side; fall back to the single value passed in
    left_vals  = np.asarray(_fetch_side_vals('Left')  or [left_value],  dtype=float)
    right_vals = np.asarray(_fetch_side_vals('Right') or [right_value], dtype=float)

    left_mean,  left_max  = left_vals.mean(),  left_vals.max()
    right_mean, right_max = right_vals.mean(), right_vals.max()

    left_pct  = calculate_percentile(left_mean,  reference_data)
    right_pct = calculate_percentile(right_mean, reference_data)

    # ── plot ────────────────────────────────────────────────────────────────
    plt.figure(facecolor='#181818')
    ax = plt.subplot(111, facecolor='#303030')

    ax.hist(reference_data, bins=20, color='cornflowerblue',
            alpha=0.7, edgecolor='white', label='Reference')

    ax.axvline(left_value,  color='green',  ls='--', lw=2, label='Left (latest)')
    ax.axvline(right_value, color='orange', ls='--', lw=2, label='Right (latest)')

    ax.set_xlabel(variable.replace('_', ' '), color='slategrey')
    ax.set_ylabel('Frequency',               color='slategrey')
    ax.tick_params(axis='x', colors='lightgrey')
    ax.tick_params(axis='y', colors='lightgrey')
    ax.grid(color='dimgrey')

    txt = (
        f'LEFT  – mean: {left_mean:.2f}\n'
        f'        max:  {left_max:.2f}\n'
        f'        %ile: {left_pct:.1f}\n'
        f'RIGHT – mean: {right_mean:.2f}\n'
        f'        max:  {right_max:.2f}\n'
        f'        %ile: {right_pct:.1f}'
    )
    plt.text(0.95, 0.05, txt, ha='right', va='bottom',
             transform=ax.transAxes, color='white', fontsize=9,
             backgroundcolor='#181818')

    ax.legend(facecolor='black', edgecolor='grey',
              prop={'size': 'small'}, labelcolor='grey')

    out_path = os.path.join(tmpdirname, f'{variable}_histogram_slv.png')
    plt.savefig(out_path, bbox_inches='tight', facecolor='#181818')
    plt.close()
    return out_path

# Prepare the document
doc = Document()
doc.add_picture("8ctane Baseball - Black abd Blue BG.jpeg", width=Inches(4.0))  # Replace with your logo path
doc.paragraphs[-1].alignment = WD_ALIGN_PARAGRAPH.CENTER

# Adding player name and date
doc.add_paragraph(f"Player's Name: {client_name}")  # Replace client_name with dynamic value
doc.add_paragraph(f"Date: {date.today().strftime('%B %d, %Y')}")

# Create a temporary directory to store images
with tempfile.TemporaryDirectory() as tmpdirname:
    # List of movements to process
    movements = ['CMJ', 'DJ', 'SLV', 'NMT']
    
    for movement in movements:
        # Add movement title
        doc.add_paragraph(f"{movement} Report", style='Title')
        doc.add_paragraph(f"This section includes percentile reports and comparisons for {movement}.", style='Heading 2')

        if movement == 'CMJ':
            # Fetch CMJ data for the client
            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg,
                       Force_at_PP, Vel_at_PP
                FROM CMJ WHERE name = ?
            """, (client_name,))
            client_cmj_data = client_cursor.fetchone()
            reference_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg,
                       Force_at_PP, Vel_at_PP
                FROM CMJ
            """)
            reference_cmj_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_cmj_data and reference_cmj_data.size > 0:
                # Generate bar graphs for each variable in CMJ
                variables = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg', 'Force_at_PP', 'Vel_at_PP']
                for i, var in enumerate(variables):
                    # Format the variable name by removing underscores
                    formatted_var = var.replace('_', ' ')
                    
                    # Add variable title before the graph
                    doc.add_paragraph(f"{formatted_var} Comparison", style='Heading 2')
                    
                    # Generate the bar graph and add to document
                    bar_image = generate_bar_graph(var, client_cmj_data[i], reference_cmj_data[:, i], f'{formatted_var} Comparison', tmpdirname)
                    doc.add_picture(bar_image, width=Inches(6))
                
                # Generate scatter plot for CMJ (Force_Peak_Power vs. Velo_Peak_Power)
                client_cmj_dict = {'Force_at_PP': client_cmj_data[2],
                   'Vel_at_PP':   client_cmj_data[3]}
                reference_cmj_dict = pd.DataFrame(reference_cmj_data, columns=variables)
                
                # Add scatter plot title and image
                doc.add_paragraph("Force vs. Velocity Scatter Plot", style='Heading 2')
                scatter_image = generate_scatter_plot(client_cmj_dict, reference_cmj_dict,
                                                      'Force_at_PP', 'Vel_at_PP',
                                                      'CMJ: Force vs. Velocity', tmpdirname)
                doc.add_picture(scatter_image, width=Inches(6))
    
        # ───────────────────────── DJ ─────────────────────────
        elif movement == 'DJ':
            # ── pull client & reference rows ──────────────────
            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP, CT, RSI
                FROM DJ
                WHERE name = ?
            """, (client_name,))
            client_dj_data = client_cursor.fetchone()
    
            reference_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP, CT, RSI
                FROM DJ
            """)
            reference_dj_data = np.array(reference_cursor.fetchall())
    
            dj_vars = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg' ,'Force_at_PP', 'Vel_at_PP', 'CT', 'RSI']
    
            # ── bar graphs ────────────────────────────────────
            if client_dj_data and reference_dj_data.size:
                for i, var in enumerate(dj_vars):
                    doc.add_paragraph(f"{var.replace('_',' ')} Comparison",
                                      style='Heading 2')
                    bar = generate_bar_graph(
                        var,
                        client_dj_data[i],
                        reference_dj_data[:, i],
                        f'{var} Comparison',
                        tmpdirname
                    )
                    doc.add_picture(bar, width=Inches(6))
    
            # OPTIONAL: DJ force–velocity scatter (comment out if unwanted)
            client_dj_dict     = {'Force_at_PP': client_dj_data[1],
                                  'Vel_at_PP'  : client_dj_data[2]}
            reference_dj_dict  = pd.DataFrame(reference_dj_data,
                                              columns=dj_vars)
            doc.add_paragraph("Force vs. Velocity Scatter Plot",
                              style='Heading 2')
            dj_scatter = generate_scatter_plot(
                client_dj_dict, reference_dj_dict,
                'Force_at_PP', 'Vel_at_PP',
                'DJ: Force vs. Velocity', tmpdirname
            )
            doc.add_picture(dj_scatter, width=Inches(6))
    
        # ───────────────────────── SLV ───────────────────────
        elif movement == 'SLV':
            # pull left & right trials
            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP
                FROM SLV WHERE name = ? AND side = 'Left'
            """, (client_name,))
            client_slvl_data = client_cursor.fetchone()
    
            client_cursor.execute("""
                SELECT JH_IN,  PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP
                FROM SLV WHERE name = ? AND side = 'Right'
            """, (client_name,))
            client_slvr_data = client_cursor.fetchone()
    
            reference_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP FROM SLV
            """)
            reference_slv_data = np.array(reference_cursor.fetchall())
    
            slv_vars = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg'  'Force_at_PP', 'Vel_at_PP']
    
            if client_slvl_data and client_slvr_data and reference_slv_data.size:
                # ── left-vs-right histograms (JH & Force) ─────
                for idx, var in enumerate(['JH_IN', 'Force_at_PP']):
                    doc.add_paragraph(
                        f"{var.replace('_',' ')} Comparison (Left vs Right)",
                        style='Heading 2')
                    hist = generate_slv_histogram(
                        var,
                        client_slvl_data[idx],            # left value
                        client_slvr_data[idx],            # right value
                        reference_slv_data[:, idx],
                        f'{var} Comparison', tmpdirname
                    )
                    doc.add_picture(hist, width=Inches(6))
    
                # ── scatter Force vs Velocity ────────────────
                client_slv_all = np.array([client_slvl_data, client_slvr_data])
                client_slv_dict = {
                    'Force_at_PP': client_slv_all[:, 1],
                    'Vel_at_PP'  : client_slv_all[:, 2],
                }
                reference_slv_dict = pd.DataFrame(reference_slv_data,
                                                  columns=slv_vars)
    
                doc.add_paragraph("Force vs. Velocity Scatter Plot",
                                  style='Heading 2')
                slv_scatter = generate_scatter_plot(
                    client_slv_dict, reference_slv_dict,
                    'Force_at_PP', 'Vel_at_PP',
                    'SLV: Force vs. Velocity', tmpdirname
                )
                doc.add_picture(slv_scatter, width=Inches(6))
            else:
                print("⚠️  Missing SLV data – skipping SLV graphs")

        elif movement == 'NMT':
            # Fetch NMT data for the client (10s taps only)
            client_cursor.execute(f"SELECT NUM_TAPS_10s FROM NMT WHERE name = '{client_name}'")
            client_nmt_data = client_cursor.fetchone()
            reference_cursor.execute("SELECT NUM_TAPS_10s FROM NMT")
            reference_nmt_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_nmt_data and reference_nmt_data.size > 0:
                # Format the variable name by removing underscores
                nmt_var = 'NUM_TAPS_10s'.replace('_', ' ')
                
                # Add variable title before the graph
                doc.add_paragraph(f"{nmt_var} Comparison", style='Heading 2')
                
                # Generate histogram for NMT 10s taps
                nmt_image = generate_bar_graph('NUM_TAPS_10s', client_nmt_data[0], reference_nmt_data[:, 0], f'{nmt_var} Comparison', tmpdirname)
                doc.add_picture(nmt_image, width=Inches(6))

# Function to convert DOCX to images
def docx_to_images(docx_path, output_dir):
    # Extract text from the DOCX file
    text = docx2txt.process(docx_path)
    
    # Split the text into lines
    lines = text.splitlines()

    # Create a blank image with white background
    img_width, img_height = 1000, 1500
    image = Image.new('RGB', (img_width, img_height), color='white')
    draw = ImageDraw.Draw(image)

    # Use a simple font
    try:
        font = ImageFont.truetype("arial.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    # Draw the text onto the image
    padding = 20
    y_text = padding
    for line in lines:
        if y_text + padding > img_height:
            # Save the image and start a new one if the text exceeds the page height
            img_path = os.path.join(output_dir, f"page_{int(y_text / img_height)}.png")
            image.save(img_path)
            y_text = padding
            image = Image.new('RGB', (img_width, img_height), color='white')
            draw = ImageDraw.Draw(image)

        # Calculate text size and draw it
        text_bbox = draw.textbbox((0, 0), line, font=font)
        text_width = text_bbox[2] - text_bbox[0]
        text_height = text_bbox[3] - text_bbox[1]

        draw.text((padding, y_text), line, font=font, fill="black")
        y_text += text_height + padding

    # Save the last image
    img_path = os.path.join(output_dir, "final_page.png")
    image.save(img_path)

    return img_path

doc.save(output_filename)          # ← only one final save
print(f"Document saved at: {output_filename}")

# Close connections
client_conn.close()
reference_conn.close()

# Example usage
img_output_directory = r'G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Images'
os.makedirs(img_output_directory, exist_ok=True)

# Convert DOCX to images
img_path = docx_to_images(output_filename, img_output_directory)
print(f"Images saved at {img_path}")

Databases opened successfully.
Client Name: Jalen Hollins
Document saved at: G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Athletic_Report_Jalen_Hollins.docx
Images saved at G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Images\final_page.png


In [3]:
import sqlite3
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from datetime import date
import tempfile
import docx2txt
from PIL import Image, ImageDraw, ImageFont
import os

# Corrected file paths with raw strings to handle backslashes properly
client_db_path = r'D:\Athletic Screen 2.0\Output Files\movement_database_v2.db'
reference_db_path = r'D:\Athletic Screen 2.0\Output Files\Athletic_Screen_All_data_v2.db'

# Ensure the paths are valid and accessible
if not os.path.exists(client_db_path):
    print(f"Client database not found at {client_db_path}")
if not os.path.exists(reference_db_path):
    print(f"Reference database not found at {reference_db_path}")

# Connect to the client and reference databases
client_conn = sqlite3.connect(client_db_path)
reference_conn = sqlite3.connect(reference_db_path)
client_cursor = client_conn.cursor()
reference_cursor = reference_conn.cursor()

print("Databases opened successfully.")

# Fetch the client's name from the database (assuming the 'name' column is in all tables)
client_cursor.execute("SELECT DISTINCT name FROM CMJ")  # Change table if necessary
client_name = client_cursor.fetchone()[0]  # Get the first row and first column
print(f"Client Name: {client_name}")

# ---------- build unique export paths (date-stamped, no overwrite) -----
client_cursor.execute("SELECT MAX(date) FROM CMJ WHERE name = ?", (client_name,))
assessment_date = client_cursor.fetchone()[0]          # e.g. '2025-05-22'
if not assessment_date:                                # fallback to today
    assessment_date = date.today().strftime("%Y-%m-%d")

reports_dir = r'G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports'
os.makedirs(reports_dir, exist_ok=True)

parts = client_name.split(', ')
client_name_rev = f"{parts[1]}_{parts[0]}" if len(parts) == 2 else client_name
base_name = f"Athletic_Report_{client_name_rev}_{assessment_date}"

output_filename = os.path.join(reports_dir, base_name + ".docx")
img_output_directory = os.path.join(reports_dir, "Images", base_name)
os.makedirs(img_output_directory, exist_ok=True)

# auto-increment if the same file already exists
counter = 1
while os.path.exists(output_filename):
    output_filename = os.path.join(
        reports_dir, f"{base_name}_{counter}.docx"
    )
    img_output_directory = os.path.join(
        reports_dir, "Images", f"{base_name}_{counter}"
    )
    os.makedirs(img_output_directory, exist_ok=True)
    counter += 1
# ----------------------------------------------------------------------


# Helper function to calculate percentile
def calculate_percentile(value, reference_data):
    return stats.percentileofscore(reference_data, value)

# ─── UPDATED generate_bar_graph ────────────────────────────────────────────────
def generate_bar_graph(variable, client_value, reference_data, title, tmpdirname):
    """
    Blue bars  = reference distribution
    ─ red      = client MAX (best trial **within the same movement table**)
    ─ violet   = client MEAN (average of those trials)

    • No code outside this function needs to change.
    • If the caller still passes one score, that is fine; this function looks
      up any matching trials on the same assessment day and combines them.
    • RSI histograms use 0.25-wide bins so you see bars at 1.25, 1.50, 1.75 …
    """
    import numpy as np, matplotlib.pyplot as plt, os, sqlite3, re

    # ───────────── lookup: which movement table are we dealing with? ─────────
    column_table_map = {"CMJ": ["CMJ"], "DJ": ["DJ"], "SLV": ["SLV"], "NMT": ["NMT"]}
    table_guess = None
    for tbl in column_table_map:          # CMJ, DJ, SLV, NMT
        try:
            reference_cursor.execute(f"SELECT COUNT(*) FROM {tbl}")
            if reference_cursor.fetchone()[0] == len(reference_data):
                table_guess = tbl
                break
        except sqlite3.OperationalError:
            continue
    if table_guess is None:               # fallback
        for tbl in column_table_map:
            try:
                client_cursor.execute(f"SELECT 1 FROM {tbl} LIMIT 1")
                table_guess = tbl
                break
            except sqlite3.OperationalError:
                continue

    # ───────────── gather all trials for this athlete / table / day ──────────
    scores = []
    if table_guess:
        client_cursor.execute(
            f"SELECT trial_name FROM {table_guess} WHERE name=? LIMIT 1",
            (client_name,),
        )
        row = client_cursor.fetchone()
        date_prefix = None
        if row and row[0]:
            m = re.match(r"(\d{4}[-_]\d{2}[-_]\d{2})", row[0])
            date_prefix = m.group(1) if m else None

        if date_prefix:
            q = f"SELECT {variable} FROM {table_guess} WHERE name=? AND trial_name LIKE ?"
            client_cursor.execute(q, (client_name, f"{date_prefix}%"))
        else:
            q = f"SELECT {variable} FROM {table_guess} WHERE name=?"
            client_cursor.execute(q, (client_name,))

        scores = [r[0] for r in client_cursor.fetchall() if r[0] is not None]

    # fallback if still empty
    if not scores:
        scores = [client_value]

    scores = np.asarray(scores, dtype=float)
    c_max, c_mean = scores.max(), scores.mean()
    perc_mean = calculate_percentile(c_mean, reference_data)

    # ──────────────────────────── plotting begins ────────────────────────────
    plt.figure(facecolor="#181818")
    ax = plt.subplot(111, facecolor="#303030")

    reference_plotted = False  # flag to avoid double-plotting

    # ---------- RSI special case: 0.25-wide bars & custom ticks -------------
    if variable.upper() == "RSI":
        lo = np.floor(reference_data.min() / 0.25) * 0.25
        hi = np.ceil(reference_data.max() / 0.25) * 0.25
        bins = np.arange(lo, hi + 0.25, 0.25)      # bin edges
        centers = bins[:-1]                         # bar positions

        counts, _ = np.histogram(reference_data, bins=bins)
        ax.bar(
            centers,
            counts,
            width=0.25,
            align="edge",
            color="cornflowerblue",
            alpha=0.7,
            edgecolor="white",
            label="Reference",
        )
        # after counts, _ = np.histogram(...)
        for x, h in zip(centers, counts):
            if h == 0:                        # empty bin → draw thin outline
                ax.bar(x, 1e-6, width=.25, align='edge',
                       color='none', edgecolor='#404040', linewidth=.5)

        ax.set_xticks(centers)
        ax.set_xticklabels([f"{x:.2f}" for x in centers], color="lightgrey")

        reference_plotted = True  # we already drew the reference bars
    else:
        bins = 20  # default bin count

    # ---------- draw reference histogram when not plotted above -------------
    if not reference_plotted:
        ax.hist(
            reference_data,
            bins=bins,
            color="cornflowerblue",
            alpha=0.7,
            edgecolor="white",
            label="Reference",
        )

    # ---------- client mean / max lines -------------------------------------
    ax.axvline(c_max, color="red", ls="--", lw=2, label="Client Max")
    ax.axvline(c_mean, color="violet", ls="--", lw=2, label="Client Mean")

    # ---------- cosmetics ----------------------------------------------------
    ax.set_xlabel(variable.replace("_", " "), color="slategrey")
    ax.set_ylabel("Frequency", color="slategrey")
    ax.tick_params(axis="x", colors="lightgrey")
    ax.tick_params(axis="y", colors="lightgrey")
    ax.grid(color="dimgrey")

    txt = (
        f"Percentile (mean): {perc_mean:.1f}%"
        f"\nMean: {c_mean:.2f}"
        f"\nMax:  {c_max:.2f}"
    )
    plt.text(
        0.95,
        0.05,
        txt,
        ha="right",
        va="bottom",
        transform=ax.transAxes,
        color="white",
        fontsize=9,
        backgroundcolor="#181818",
    )

    ax.legend(facecolor="black", edgecolor="grey", prop={"size": "small"}, labelcolor="grey")

    # ---------- save ---------------------------------------------------------
    out_path = os.path.join(tmpdirname, f"{variable}_histogram.png")
    plt.savefig(out_path, bbox_inches="tight", facecolor="#181818")
    plt.close()
    return out_path

# Function to generate scatter plot for CMJ
def generate_scatter_plot(client_data, reference_data, x_var, y_var, title, tmpdirname):
    plt.figure(facecolor='#181818', figsize=(6, 6))
    ax = plt.subplot(111, facecolor='#303030')

    # Create scatter plot for reference data (cornflower blue)
    ax.scatter(reference_data[x_var], reference_data[y_var], label='Reference', alpha=0.5, color='cornflowerblue')

    # Create scatter plot for client data (red)
    ax.scatter(client_data[x_var], client_data[y_var], label='Client', color='red', edgecolors='black', s=100)

    # Set axis labels, replacing underscores with spaces
    ax.set_xlabel(x_var.replace('_', ' '), color='slategrey')
    ax.set_ylabel(y_var.replace('_', ' '), color='slategrey')

    # Dynamically set ticks and numbers to light grey
    ax.tick_params(axis='x', colors='lightgrey')
    ax.tick_params(axis='y', colors='lightgrey')

    # Add vertical and horizontal reference lines (light grey)
    ax.axvline(x=np.mean(reference_data[x_var]), color='lightgrey', linestyle='--', linewidth=1)
    ax.axhline(y=np.mean(reference_data[y_var]), color='lightgrey', linestyle='--', linewidth=1)

    # Customize grid style
    ax.grid(color='dimgrey')

    # Add legend
    ax.legend(facecolor='black', edgecolor='grey', prop={'size': 'small'}, labelcolor='grey')

    # Save scatter plot to file
    scatter_filename = os.path.join(tmpdirname, 'cmj_scatter.png')
    plt.savefig(scatter_filename, bbox_inches='tight', facecolor='#181818')
    plt.close()

    return scatter_filename

# Modified function to generate a histogram comparing left and right leg data
def generate_slv_histogram(variable, left_value, right_value,
                           reference_data, title, tmpdirname):
    """
    Blue bars  = reference distribution
    ─ green    = client LEFT (latest trial value you passed in)
    ─ orange   = client RIGHT

    Text box shows, for each side:
        • mean across all trials from the same assessment day
        • max across those trials
        • percentile of that mean vs. the reference distribution
    """
    import numpy as np, matplotlib.pyplot as plt, os, re, sqlite3

    # ── helper – get all trials for a given side on the same assessment day ──
    def _fetch_side_vals(side):
        # detect yyyy-mm-dd prefix in the first trial_name for this side
        client_cursor.execute(
            "SELECT trial_name FROM SLV WHERE name=? AND side=? LIMIT 1",
            (client_name, side)
        )
        row = client_cursor.fetchone()
        date_prefix = None
        if row and row[0]:
            m = re.match(r'(\d{4}[-_]\d{2}[-_]\d{2})', row[0])
            date_prefix = m.group(1) if m else None

        if date_prefix:
            q = f"SELECT {variable} FROM SLV WHERE name=? AND side=? AND trial_name LIKE ?"
            client_cursor.execute(q, (client_name, side, f'{date_prefix}%'))
        else:
            q = f"SELECT {variable} FROM SLV WHERE name=? AND side=?"
            client_cursor.execute(q, (client_name, side))

        return [r[0] for r in client_cursor.fetchall() if r[0] is not None]

    # pull all trials for each side; fall back to the single value passed in
    left_vals  = np.asarray(_fetch_side_vals('Left')  or [left_value],  dtype=float)
    right_vals = np.asarray(_fetch_side_vals('Right') or [right_value], dtype=float)

    left_mean,  left_max  = left_vals.mean(),  left_vals.max()
    right_mean, right_max = right_vals.mean(), right_vals.max()

    left_pct  = calculate_percentile(left_mean,  reference_data)
    right_pct = calculate_percentile(right_mean, reference_data)

    # ── plot ────────────────────────────────────────────────────────────────
    plt.figure(facecolor='#181818')
    ax = plt.subplot(111, facecolor='#303030')

    ax.hist(reference_data, bins=20, color='cornflowerblue',
            alpha=0.7, edgecolor='white', label='Reference')

    ax.axvline(left_value,  color='green',  ls='--', lw=2, label='Left (latest)')
    ax.axvline(right_value, color='orange', ls='--', lw=2, label='Right (latest)')

    ax.set_xlabel(variable.replace('_', ' '), color='slategrey')
    ax.set_ylabel('Frequency',               color='slategrey')
    ax.tick_params(axis='x', colors='lightgrey')
    ax.tick_params(axis='y', colors='lightgrey')
    ax.grid(color='dimgrey')

    txt = (
        f'LEFT  – mean: {left_mean:.2f}\n'
        f'        max:  {left_max:.2f}\n'
        f'        %ile: {left_pct:.1f}\n'
        f'RIGHT – mean: {right_mean:.2f}\n'
        f'        max:  {right_max:.2f}\n'
        f'        %ile: {right_pct:.1f}'
    )
    plt.text(0.95, 0.05, txt, ha='right', va='bottom',
             transform=ax.transAxes, color='white', fontsize=9,
             backgroundcolor='#181818')

    ax.legend(facecolor='black', edgecolor='grey',
              prop={'size': 'small'}, labelcolor='grey')

    out_path = os.path.join(tmpdirname, f'{variable}_histogram_slv.png')
    plt.savefig(out_path, bbox_inches='tight', facecolor='#181818')
    plt.close()
    return out_path

# Prepare the document
doc = Document()
doc.add_picture("8ctane Baseball - Black abd Blue BG.jpeg", width=Inches(4.0))  # Replace with your logo path
doc.paragraphs[-1].alignment = WD_ALIGN_PARAGRAPH.CENTER

# Adding player name and date
doc.add_paragraph(f"Player's Name: {client_name}")  # Replace client_name with dynamic value
doc.add_paragraph(f"Date: {date.today().strftime('%B %d, %Y')}")

# Create a temporary directory to store images
with tempfile.TemporaryDirectory() as tmpdirname:
    # List of movements to process
    movements = ['CMJ', 'DJ', 'SLV', 'NMT']
    
    for movement in movements:
        # Add movement title
        doc.add_paragraph(f"{movement} Report", style='Title')
        doc.add_paragraph(f"This section includes percentile reports and comparisons for {movement}.", style='Heading 2')

        if movement == 'CMJ':
            # Fetch CMJ data for the client
            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg,
                       Force_at_PP, Vel_at_PP
                FROM CMJ WHERE name = ?
            """, (client_name,))
            client_cmj_data = client_cursor.fetchone()
            reference_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg,
                       Force_at_PP, Vel_at_PP
                FROM CMJ
            """)
            reference_cmj_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_cmj_data and reference_cmj_data.size > 0:
                # Generate bar graphs for each variable in CMJ
                variables = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg', 'Force_at_PP', 'Vel_at_PP']
                for i, var in enumerate(variables):
                    # Format the variable name by removing underscores
                    formatted_var = var.replace('_', ' ')
                    
                    # Add variable title before the graph
                    doc.add_paragraph(f"{formatted_var} Comparison", style='Heading 2')
                    
                    # Generate the bar graph and add to document
                    bar_image = generate_bar_graph(var, client_cmj_data[i], reference_cmj_data[:, i], f'{formatted_var} Comparison', tmpdirname)
                    doc.add_picture(bar_image, width=Inches(6))
                
                # Generate scatter plot for CMJ (Force_Peak_Power vs. Velo_Peak_Power)
                client_cmj_dict = {'Force_at_PP': client_cmj_data[2],
                   'Vel_at_PP':   client_cmj_data[3]}
                reference_cmj_dict = pd.DataFrame(reference_cmj_data, columns=variables)
                
                # Add scatter plot title and image
                doc.add_paragraph("Force vs. Velocity Scatter Plot", style='Heading 2')
                scatter_image = generate_scatter_plot(client_cmj_dict, reference_cmj_dict,
                                                      'Force_at_PP', 'Vel_at_PP',
                                                      'CMJ: Force vs. Velocity', tmpdirname)
                doc.add_picture(scatter_image, width=Inches(6))
    
        # ───────────────────────── DJ ─────────────────────────
        elif movement == 'DJ':
            # ── pull client & reference rows ──────────────────
            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP, CT, RSI
                FROM DJ
                WHERE name = ?
            """, (client_name,))
            client_dj_data = client_cursor.fetchone()
    
            reference_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP, CT, RSI
                FROM DJ
            """)
            reference_dj_data = np.array(reference_cursor.fetchall())
    
            dj_vars = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg' ,'Force_at_PP', 'Vel_at_PP', 'CT', 'RSI']
    
            # ── bar graphs ────────────────────────────────────
            if client_dj_data and reference_dj_data.size:
                for i, var in enumerate(dj_vars):
                    doc.add_paragraph(f"{var.replace('_',' ')} Comparison",
                                      style='Heading 2')
                    bar = generate_bar_graph(
                        var,
                        client_dj_data[i],
                        reference_dj_data[:, i],
                        f'{var} Comparison',
                        tmpdirname
                    )
                    doc.add_picture(bar, width=Inches(6))
    
            # OPTIONAL: DJ force–velocity scatter (comment out if unwanted)
            client_dj_dict     = {'Force_at_PP': client_dj_data[1],
                                  'Vel_at_PP'  : client_dj_data[2]}
            reference_dj_dict  = pd.DataFrame(reference_dj_data,
                                              columns=dj_vars)
            doc.add_paragraph("Force vs. Velocity Scatter Plot",
                              style='Heading 2')
            dj_scatter = generate_scatter_plot(
                client_dj_dict, reference_dj_dict,
                'Force_at_PP', 'Vel_at_PP',
                'DJ: Force vs. Velocity', tmpdirname
            )
            doc.add_picture(dj_scatter, width=Inches(6))
    
        # ───────────────────────── SLV ───────────────────────
        elif movement == 'SLV':
            # pull left & right trials
            client_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP
                FROM SLV WHERE name = ? AND side = 'Left'
            """, (client_name,))
            client_slvl_data = client_cursor.fetchone()
    
            client_cursor.execute("""
                SELECT JH_IN,  PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP
                FROM SLV WHERE name = ? AND side = 'Right'
            """, (client_name,))
            client_slvr_data = client_cursor.fetchone()
    
            reference_cursor.execute("""
                SELECT JH_IN, PP_FORCEPLATE, PP_W_per_kg, Force_at_PP, Vel_at_PP FROM SLV
            """)
            reference_slv_data = np.array(reference_cursor.fetchall())
    
            slv_vars = ['JH_IN', 'PP_FORCEPLATE', 'PP_W_per_kg',  'Force_at_PP', 'Vel_at_PP']
    
            if client_slvl_data and client_slvr_data and reference_slv_data.size:
                # ── left-vs-right histograms (JH & Force) ─────
                for idx, var in enumerate(['JH_IN', 'Force_at_PP']):
                    doc.add_paragraph(
                        f"{var.replace('_',' ')} Comparison (Left vs Right)",
                        style='Heading 2')
                    hist = generate_slv_histogram(
                        var,
                        client_slvl_data[idx],            # left value
                        client_slvr_data[idx],            # right value
                        reference_slv_data[:, idx],
                        f'{var} Comparison', tmpdirname
                    )
                    doc.add_picture(hist, width=Inches(6))
    
                # ── scatter Force vs Velocity ────────────────
                client_slv_all = np.array([client_slvl_data, client_slvr_data])
                client_slv_dict = {
                    'Force_at_PP': client_slv_all[:, 1],
                    'Vel_at_PP'  : client_slv_all[:, 2],
                }
                reference_slv_dict = pd.DataFrame(reference_slv_data,
                                                  columns=slv_vars)
    
                doc.add_paragraph("Force vs. Velocity Scatter Plot",
                                  style='Heading 2')
                slv_scatter = generate_scatter_plot(
                    client_slv_dict, reference_slv_dict,
                    'Force_at_PP', 'Vel_at_PP',
                    'SLV: Force vs. Velocity', tmpdirname
                )
                doc.add_picture(slv_scatter, width=Inches(6))
            else:
                print("⚠️  Missing SLV data – skipping SLV graphs")

        elif movement == 'NMT':
            # Fetch NMT data for the client (10s taps only)
            client_cursor.execute(f"SELECT NUM_TAPS_10s FROM NMT WHERE name = '{client_name}'")
            client_nmt_data = client_cursor.fetchone()
            reference_cursor.execute("SELECT NUM_TAPS_10s FROM NMT")
            reference_nmt_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_nmt_data and reference_nmt_data.size > 0:
                # Format the variable name by removing underscores
                nmt_var = 'NUM_TAPS_10s'.replace('_', ' ')
                
                # Add variable title before the graph
                doc.add_paragraph(f"{nmt_var} Comparison", style='Heading 2')
                
                # Generate histogram for NMT 10s taps
                nmt_image = generate_bar_graph('NUM_TAPS_10s', client_nmt_data[0], reference_nmt_data[:, 0], f'{nmt_var} Comparison', tmpdirname)
                doc.add_picture(nmt_image, width=Inches(6))

# Function to convert DOCX to images
def docx_to_images(docx_path, output_dir):
    # Extract text from the DOCX file
    text = docx2txt.process(docx_path)
    
    # Split the text into lines
    lines = text.splitlines()

    # Create a blank image with white background
    img_width, img_height = 1000, 1500
    image = Image.new('RGB', (img_width, img_height), color='white')
    draw = ImageDraw.Draw(image)

    # Use a simple font
    try:
        font = ImageFont.truetype("arial.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    # Draw the text onto the image
    padding = 20
    y_text = padding
    for line in lines:
        if y_text + padding > img_height:
            # Save the image and start a new one if the text exceeds the page height
            img_path = os.path.join(output_dir, f"page_{int(y_text / img_height)}.png")
            image.save(img_path)
            y_text = padding
            image = Image.new('RGB', (img_width, img_height), color='white')
            draw = ImageDraw.Draw(image)

        # Calculate text size and draw it
        text_bbox = draw.textbbox((0, 0), line, font=font)
        text_width = text_bbox[2] - text_bbox[0]
        text_height = text_bbox[3] - text_bbox[1]

        draw.text((padding, y_text), line, font=font, fill="black")
        y_text += text_height + padding

    # Save the last image
    img_path = os.path.join(output_dir, "final_page.png")
    image.save(img_path)

    return img_path

doc.save(output_filename)          # ← only one final save
print(f"Document saved at: {output_filename}")

# Close connections
client_conn.close()
reference_conn.close()

# Example usage
img_output_directory = r'G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Images'
os.makedirs(img_output_directory, exist_ok=True)

# Convert DOCX to images
img_path = docx_to_images(output_filename, img_output_directory)
print(f"Images saved at {img_path}")

Databases opened successfully.
Client Name: Jalen Hollins
Document saved at: G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Athletic_Report_Jalen_Hollins_All_Comp.docx
Images saved at G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Images\final_page.png


In [22]:
import sqlite3
import time
import os

# Paths to the source and target databases
source_db_path = 'D:/Athletic Screen 2.0/Output Files/movement_database_v2.db'
output_folder = 'D:/Athletic Screen 2.0/Output Files/'
target_databases = ['Athletic_Screen_Pro_data_v2.db']
all_data_db_path = os.path.join(output_folder, 'Athletic_Screen_All_data_v2.db')

# Retry mechanism for handling the locked database error
def retry_execute(func):
    retries = 5  # Number of retries
    while retries > 0:
        try:
            func()
            break
        except sqlite3.OperationalError as e:
            if 'database is locked' in str(e):
                print("Database is locked, retrying...")
                time.sleep(1)  # Wait for 1 second before retrying
                retries -= 1
            else:
                raise e
        if retries == 0:
            raise Exception("Max retries reached. Database is still locked.")

# Table schemas to create in the target databases and the combined database
table_schemas = {
    'CMJ': '''CREATE TABLE IF NOT EXISTS CMJ (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT,
                trial_name TEXT,
                JH_IN REAL,
                Peak_Power REAL,
                PP_FORCEPLATE REAL,
                Force_at_PP REAL,
                Vel_at_PP REAL,
                PP_W_per_kg REAL
              )''',

    'DJ':  '''CREATE TABLE IF NOT EXISTS DJ (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT,
                trial_name TEXT,
                JH_IN REAL,
                Peak_Power REAL,
                PP_FORCEPLATE REAL,
                Force_at_PP REAL,
                Vel_at_PP REAL,
                PP_W_per_kg REAL,
                CT REAL,
                RSI REAL
              )''',

    'SLV': '''CREATE TABLE IF NOT EXISTS SLV (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT, 
                trial_name TEXT,
                side TEXT,
                JH_IN REAL,
                PP_FORCEPLATE REAL,
                Force_at_PP REAL,
                Vel_at_PP REAL,
                PP_W_per_kg REAL
              )''',
    'NMT': '''CREATE TABLE IF NOT EXISTS NMT (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                date TEXT,
                trial_name TEXT,
                NUM_TAPS_10s REAL,
                NUM_TAPS_20s REAL,
                NUM_TAPS_30s REAL,
                NUM_TAPS REAL
              )'''
}
# Function to create tables in a database connection
def create_tables(conn):
    cursor = conn.cursor()
    for schema in table_schemas.values():
        cursor.execute(schema)
    conn.commit()

# Open the source database
source_conn = sqlite3.connect(source_db_path, timeout=10)
source_cursor = source_conn.cursor()

# Create connections to all target databases and combined database
target_conns = {db_name: sqlite3.connect(os.path.join(output_folder, db_name), timeout=10) for db_name in target_databases}
target_conns['all'] = sqlite3.connect(all_data_db_path, timeout=10)

# Ensure tables exist in each database
for conn in target_conns.values():
    create_tables(conn)

# Function to copy data from one table in the source to target databases
def copy_table_data(table_name):
    # Fetch all data except the 'id' column from the source table
    source_cursor.execute(f"SELECT * FROM {table_name}")
    rows = source_cursor.fetchall()

    # For each row, insert it into each target database
    for db_name, conn in target_conns.items():
        cursor = conn.cursor()
        placeholders = ", ".join(["?"] * len(rows[0][1:]))  # Skips the first 'id' column
        query = f"INSERT INTO {table_name} ({', '.join([desc[0] for desc in source_cursor.description][1:])}) VALUES ({placeholders})"
        retry_execute(lambda: cursor.executemany(query, [row[1:] for row in rows]))  # Exclude 'id' column for insertion
        conn.commit()
        print(f"Copied {len(rows)} rows to {table_name} in {db_name}")

# List of table names to copy data
tables_to_copy = ['CMJ', 'DJ', 'SLV', 'NMT']

# Copy data from each table
for table in tables_to_copy:
    copy_table_data(table)

# Close all connections
source_conn.close()
for conn in target_conns.values():
    conn.close()

print("Data successfully copied to each target and combined database.")



Copied 3 rows to CMJ in Athletic_Screen_Pro_data_v2.db
Copied 3 rows to CMJ in all
Copied 3 rows to DJ in Athletic_Screen_Pro_data_v2.db
Copied 3 rows to DJ in all
Copied 6 rows to SLV in Athletic_Screen_Pro_data_v2.db
Copied 6 rows to SLV in all
Copied 1 rows to NMT in Athletic_Screen_Pro_data_v2.db
Copied 1 rows to NMT in all
Data successfully copied to each target and combined database.
