In [1]:
import sqlite3
import os
import re

# Path to the folder containing your txt files
folder_path = 'D:/Athletic Screen 2.0/Output Files/'
db_path = 'D:/Athletic Screen 2.0/Output Files/movement_database.db'

# Delete the database file if it exists to start fresh
if os.path.exists(db_path):
    os.remove(db_path)
    print(f"Deleted existing database at {db_path}")

# Connect to the SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Define the corrected table schemas for each movement
table_schemas = {
    'CMJ': '''CREATE TABLE IF NOT EXISTS CMJ (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                trial_name TEXT,
                JH_IN REAL,
                Peak_Power REAL,
                Force_Peak_Power REAL,
                Velo_Peak_Power REAL
              )''',
    'DJ': '''CREATE TABLE IF NOT EXISTS DJ (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                trial_name TEXT,
                JH_IN REAL,
                LEWIS_PEAK_POWER REAL,
                CT REAL,
                RSI REAL
              )''',
    'SLV': '''CREATE TABLE IF NOT EXISTS SLV (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                trial_name TEXT,
                side TEXT,  -- SLVL or SLVR
                JH_IN REAL,
                LEWIS_PEAK_POWER REAL
              )''',
    'NMT': '''CREATE TABLE IF NOT EXISTS NMT (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                trial_name TEXT,
                NUM_TAPS_10s REAL,
                NUM_TAPS_20s REAL,
                NUM_TAPS_30s REAL,
                NUM_TAPS REAL
              )'''
}

# Create the tables in the database (if they don't exist)
for schema in table_schemas.values():
    cursor.execute(schema)

# Function to extract the client's name from the first line of the file
def extract_name(line):
    match = re.search(r'Data\\(.*?)[_\\]', line)
    if match:
        return match.group(1)
    return None

# Function to insert data into the appropriate table
def insert_data_into_table(table_name, name, trial_name, variables):
    # Skip the first value (the extra "1")
    variables = variables[1:]
    
    print(f"Inserting data for {name} into {table_name}, Trial: {trial_name}, Variables: {variables}")

    if table_name == 'SLV':
        side = 'Left' if 'SLVL' in trial_name else 'Right'
        cursor.execute(f"INSERT INTO {table_name} (name, trial_name, side, JH_IN, LEWIS_PEAK_POWER) VALUES (?, ?, ?, ?, ?)",
                       (name, trial_name, side, *variables))
    elif table_name == 'CMJ':
        cursor.execute(f"INSERT INTO {table_name} (name, trial_name, JH_IN, Peak_Power, Force_Peak_Power, Velo_Peak_Power) VALUES (?, ?, ?, ?, ?, ?)",
                       (name, trial_name, *variables))
    elif table_name == 'DJ':
        cursor.execute(f"INSERT INTO {table_name} (name, trial_name, JH_IN, LEWIS_PEAK_POWER, CT, RSI) VALUES (?, ?, ?, ?, ?, ?)",
                       (name, trial_name, *variables))
    elif table_name == 'NMT':
        cursor.execute(f"INSERT INTO {table_name} (name, trial_name, NUM_TAPS_10s, NUM_TAPS_20s, NUM_TAPS_30s, NUM_TAPS) VALUES (?, ?, ?, ?, ?, ?)",
                       (name, trial_name, *variables))

# Loop through the txt files in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith('.txt'):
        trial_name = os.path.splitext(file_name)[0]
        
        # Determine which table the file belongs to
        if 'CMJ' in trial_name:
            table_name = 'CMJ'
        elif 'DJ' in trial_name:
            table_name = 'DJ'
        elif 'SLVL' in trial_name or 'SLVR' in trial_name:
            table_name = 'SLV'
        elif 'NMT' in trial_name:
            table_name = 'NMT'
        else:
            continue  # Skip any files that don't match the naming pattern

        # Load the data from the txt file
        file_path = os.path.join(folder_path, file_name)
        try:
            with open(file_path, 'r') as f:
                # Extract the name from the first line
                first_line = f.readline().strip()
                name = extract_name(first_line)
                
                # Print the extracted name to verify
                print(f"File: {file_name}, Extracted Name: {name}")

                if not name:
                    print(f"Name extraction failed for {file_name}, skipping.")
                    continue

                # Read all lines until we find the line with the actual numeric data
                for line_num, line in enumerate(f):
                    line = line.strip()

                    # Print the line contents and line number to debug
                    print(f"Line {line_num} of {file_name}: {line}")

                    # Skip non-numeric lines and find the correct line (Line 4 in this case)
                    if line_num == 4:
                        variables = [float(value) for value in line.split()]
                        
                        # Print the detected variables to verify before inserting
                        print(f"Processing file: {file_name}, Variables: {variables}")
                        
                        # Insert the data into the appropriate table
                        insert_data_into_table(table_name, name, trial_name, variables)
                        break  # Only process the first valid line of numeric data

        except Exception as e:
            print(f"Unexpected error with file {file_name}: {e}")

# Commit the changes and close the connection
conn.commit()
conn.close()

print("Data successfully inserted into the database.")


Deleted existing database at D:/Athletic Screen 2.0/Output Files/movement_database.db
File: CMJ1.txt, Extracted Name: Victor Vargus
Line 0 of CMJ1.txt: JH_IN	Peak_Power	Force@Peak_Power	Velo@Peak_Power
Line 1 of CMJ1.txt: METRIC	METRIC	METRIC	METRIC
Line 2 of CMJ1.txt: PROCESSED	PROCESSED	PROCESSED	PROCESSED
Line 3 of CMJ1.txt: ITEM	X	X	X	X
Line 4 of CMJ1.txt: 1	18.0	326	2.14	152
Processing file: CMJ1.txt, Variables: [1.0, 18.0, 326.0, 2.14, 152.0]
Inserting data for Victor Vargus into CMJ, Trial: CMJ1, Variables: [18.0, 326.0, 2.14, 152.0]
File: CMJ2.txt, Extracted Name: Victor Vargus
Line 0 of CMJ2.txt: JH_IN	Peak_Power	Force@Peak_Power	Velo@Peak_Power
Line 1 of CMJ2.txt: METRIC	METRIC	METRIC	METRIC
Line 2 of CMJ2.txt: PROCESSED	PROCESSED	PROCESSED	PROCESSED
Line 3 of CMJ2.txt: ITEM	X	X	X	X
Line 4 of CMJ2.txt: 1	17.8	307	1.99	155
Processing file: CMJ2.txt, Variables: [1.0, 17.8, 307.0, 1.99, 155.0]
Inserting data for Victor Vargus into CMJ, Trial: CMJ2, Variables: [17.8, 307.0, 1.99,

In [2]:
import sqlite3
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from datetime import date
import tempfile
import docx2txt
from PIL import Image, ImageDraw, ImageFont
import os

# Corrected file paths with raw strings to handle backslashes properly
client_db_path = r'D:\Athletic Screen 2.0\Output Files\movement_database.db'
reference_db_path = r'D:\Athletic Screen 2.0\Output Files\Athletic_Screen_Pro_data.db'

# Ensure the paths are valid and accessible
if not os.path.exists(client_db_path):
    print(f"Client database not found at {client_db_path}")
if not os.path.exists(reference_db_path):
    print(f"Reference database not found at {reference_db_path}")

# Connect to the client and reference databases
client_conn = sqlite3.connect(client_db_path)
reference_conn = sqlite3.connect(reference_db_path)
client_cursor = client_conn.cursor()
reference_cursor = reference_conn.cursor()

print("Databases opened successfully.")

# Fetch the client's name from the database (assuming the 'name' column is in all tables)
client_cursor.execute("SELECT DISTINCT name FROM CMJ")  # Change table if necessary
client_name = client_cursor.fetchone()[0]  # Get the first row and first column
print(f"Client Name: {client_name}")

# Helper function to calculate percentile
def calculate_percentile(value, reference_data):
    return stats.percentileofscore(reference_data, value)

# Function to generate bar graphs with the specified layout
def generate_bar_graph(variable, client_value, reference_data, title, tmpdirname):
    percentile = calculate_percentile(client_value, reference_data)

    # Create figure and axis with specified colors and formatting
    plt.figure(facecolor='#181818')
    ax = plt.subplot(111, facecolor='#303030')

    # Plot histogram of reference data
    ax.hist(reference_data, bins=20, color='cornflowerblue', alpha=0.7, edgecolor='white', label='Reference Data')

    # Plot vertical line for client value (individual score)
    ax.axvline(x=client_value, color='red', linestyle='dashed', linewidth=2, label='Individual Score')

    # Set axis labels (removing underscores)
    ax.set_xlabel(variable.replace('_', ' '), color='slategrey')  # Replacing underscores with spaces
    ax.set_ylabel('Frequency', color='slategrey')

    xticks = np.arange(0, 5.1, 0.5)  # Explicit range and step for 0–5
    ax.set_xticks(xticks)
    ax.set_xticklabels([f'{x:.2f}' for x in xticks], color='lightgrey')
    
    # Dynamically format the xtick labels based on the range of data
    if max(reference_data) - min(reference_data) > 1:
        ax.set_xticklabels([f'{x:.0f}' for x in xticks], color='lightgrey')  # No decimals for large ranges
    elif max(reference_data) - min(reference_data) > 0.1:
        ax.set_xticklabels([f'{x:.2f}' for x in xticks], color='lightgrey')  # Two decimals for moderate ranges
    else:
        ax.set_xticklabels([f'{x:.4f}' for x in xticks], color='lightgrey')  # Four decimals for small ranges

    # Dynamically adjust y-tick intervals
    y_max = ax.get_ylim()[1]
    yticks = np.arange(0, y_max, step=max(1, y_max // 5))  # Ensure step of at least 1
    ax.set_yticks(yticks)

    # Set axis ticks and numbers to light grey
    ax.tick_params(axis='x', colors='lightgrey')  # x-axis tick color
    ax.tick_params(axis='y', colors='lightgrey')  # y-axis tick color

    # Set grid style
    ax.grid(color='dimgrey')

    # Add legend
    ax.legend(facecolor='black', edgecolor='grey', prop={'size': 'small'}, labelcolor='grey')

    # Add percentile and value information as text
    plt.text(0.95, 0.05, f'Percentile: {round(percentile, 2)}%\nValue: {round(client_value, 2)}', 
             ha='right', va='bottom', transform=ax.transAxes, color='white', fontsize=10, backgroundcolor='#181818')

    # Save histogram plot to file
    histogram_filename = os.path.join(tmpdirname, f'{variable}_histogram.png')
    plt.savefig(histogram_filename, bbox_inches='tight', facecolor='#181818')
    plt.close()

    return histogram_filename

# Function to generate scatter plot for CMJ
def generate_scatter_plot(client_data, reference_data, x_var, y_var, title, tmpdirname):
    plt.figure(facecolor='#181818', figsize=(6, 6))
    ax = plt.subplot(111, facecolor='#303030')

    # Create scatter plot for reference data (cornflower blue)
    ax.scatter(reference_data[x_var], reference_data[y_var], label='Reference', alpha=0.5, color='cornflowerblue')

    # Create scatter plot for client data (red)
    ax.scatter(client_data[x_var], client_data[y_var], label='Client', color='red', edgecolors='black', s=100)

    # Set axis labels, replacing underscores with spaces
    ax.set_xlabel(x_var.replace('_', ' '), color='slategrey')
    ax.set_ylabel(y_var.replace('_', ' '), color='slategrey')

    # Dynamically set ticks and numbers to light grey
    ax.tick_params(axis='x', colors='lightgrey')
    ax.tick_params(axis='y', colors='lightgrey')

    # Add vertical and horizontal reference lines (light grey)
    ax.axvline(x=np.mean(reference_data[x_var]), color='lightgrey', linestyle='--', linewidth=1)
    ax.axhline(y=np.mean(reference_data[y_var]), color='lightgrey', linestyle='--', linewidth=1)

    # Customize grid style
    ax.grid(color='dimgrey')

    # Add legend
    ax.legend(facecolor='black', edgecolor='grey', prop={'size': 'small'}, labelcolor='grey')

    # Save scatter plot to file
    scatter_filename = os.path.join(tmpdirname, 'cmj_scatter.png')
    plt.savefig(scatter_filename, bbox_inches='tight', facecolor='#181818')
    plt.close()

    return scatter_filename

# Modified function to generate a histogram comparing left and right leg data
def generate_slv_histogram(variable, left_value, right_value, reference_data, title, tmpdirname):
    plt.figure(facecolor='#181818')
    ax = plt.subplot(111, facecolor='#303030')

    # Plot histogram of reference data
    ax.hist(reference_data, bins=20, color='cornflowerblue', alpha=0.7, edgecolor='white', label='Reference Data')

    # Plot vertical lines for left and right leg values
    ax.axvline(x=left_value, color='green', linestyle='dashed', linewidth=2, label='Left Leg')
    ax.axvline(x=right_value, color='orange', linestyle='dashed', linewidth=2, label='Right Leg')

    # Set axis labels (removing underscores)
    ax.set_xlabel(variable.replace('_', ' '), color='slategrey')  # Replacing underscores with spaces
    ax.set_ylabel('Frequency', color='slategrey')

    # Set ticks dynamically
    xticks = np.linspace(min(reference_data), max(reference_data), num=10)
    ax.set_xticks(xticks)

    # Dynamically format the xtick labels based on the range of data
    if max(reference_data) - min(reference_data) > 1:
        ax.set_xticklabels([f'{x:.0f}' for x in xticks], color='lightgrey')  # No decimals for large ranges
    elif max(reference_data) - min(reference_data) > 0.1:
        ax.set_xticklabels([f'{x:.2f}' for x in xticks], color='lightgrey')  # Two decimals for moderate ranges
    else:
        ax.set_xticklabels([f'{x:.4f}' for x in xticks], color='lightgrey')  # Four decimals for small ranges

    # Dynamically adjust y-tick intervals
    y_max = ax.get_ylim()[1]
    yticks = np.arange(0, y_max, step=max(1, y_max // 5))
    ax.set_yticks(yticks)

    # Set axis ticks and numbers to light grey
    ax.tick_params(axis='x', colors='lightgrey')  # x-axis tick color
    ax.tick_params(axis='y', colors='lightgrey')  # y-axis tick color

    # Set grid style
    ax.grid(color='dimgrey')

    # Add legend
    ax.legend(facecolor='black', edgecolor='grey', prop={'size': 'small'}, labelcolor='grey')

    # Save histogram plot to file
    histogram_filename = os.path.join(tmpdirname, f'{variable}_histogram_slv.png')
    plt.savefig(histogram_filename, bbox_inches='tight', facecolor='#181818')
    plt.close()

    return histogram_filename

# Prepare the document
doc = Document()
doc.add_picture("8ctane Baseball - Black abd Blue BG.jpeg", width=Inches(4.0))  # Replace with your logo path
doc.paragraphs[-1].alignment = WD_ALIGN_PARAGRAPH.CENTER

# Adding player name and date
doc.add_paragraph(f"Player's Name: {client_name}")  # Replace client_name with dynamic value
doc.add_paragraph(f"Date: {date.today().strftime('%B %d, %Y')}")

# Create a temporary directory to store images
with tempfile.TemporaryDirectory() as tmpdirname:
    # List of movements to process
    movements = ['CMJ', 'DJ', 'SLV', 'NMT']
    
    for movement in movements:
        # Add movement title
        doc.add_paragraph(f"{movement} Report", style='Title')
        doc.add_paragraph(f"This section includes percentile reports and comparisons for {movement}.", style='Heading 2')

        if movement == 'CMJ':
            # Fetch CMJ data for the client
            client_cursor.execute(f"SELECT JH_IN, Peak_Power, Force_Peak_Power, Velo_Peak_Power FROM CMJ WHERE name = '{client_name}'")
            client_cmj_data = client_cursor.fetchone()
            reference_cursor.execute("SELECT JH_IN, Peak_Power, Force_Peak_Power, Velo_Peak_Power FROM CMJ")
            reference_cmj_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_cmj_data and reference_cmj_data.size > 0:
                # Generate bar graphs for each variable in CMJ
                variables = ['JH_IN', 'Peak_Power', 'Force_Peak_Power', 'Velo_Peak_Power']
                for i, var in enumerate(variables):
                    # Format the variable name by removing underscores
                    formatted_var = var.replace('_', ' ')
                    
                    # Add variable title before the graph
                    doc.add_paragraph(f"{formatted_var} Comparison", style='Heading 2')
                    
                    # Generate the bar graph and add to document
                    bar_image = generate_bar_graph(var, client_cmj_data[i], reference_cmj_data[:, i], f'{formatted_var} Comparison', tmpdirname)
                    doc.add_picture(bar_image, width=Inches(6))
                
                # Generate scatter plot for CMJ (Force_Peak_Power vs. Velo_Peak_Power)
                client_cmj_dict = {'Force_Peak_Power': client_cmj_data[2], 'Velo_Peak_Power': client_cmj_data[3]}
                reference_cmj_dict = pd.DataFrame(reference_cmj_data, columns=variables)
                
                # Add scatter plot title and image
                doc.add_paragraph("Force vs. Velocity Scatter Plot", style='Heading 2')
                scatter_image = generate_scatter_plot(client_cmj_dict, reference_cmj_dict, 'Force_Peak_Power', 'Velo_Peak_Power', 'CMJ: Force vs. Velocity', tmpdirname)
                doc.add_picture(scatter_image, width=Inches(6))

        elif movement == 'DJ':
            # Fetch DJ data for the client
            client_cursor.execute(f"SELECT JH_IN, LEWIS_PEAK_POWER, CT, RSI FROM DJ WHERE name = '{client_name}'")
            client_dj_data = client_cursor.fetchone()
            reference_cursor.execute("SELECT JH_IN, LEWIS_PEAK_POWER, CT, RSI FROM DJ")
            reference_dj_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_dj_data and reference_dj_data.size > 0:
                # Generate bar graphs for each variable in DJ
                variables = ['JH_IN', 'LEWIS_PEAK_POWER', 'CT', 'RSI']
                for i, var in enumerate(variables):
                    # Format the variable name by removing underscores
                    formatted_var = var.replace('_', ' ')
                    
                    # Add variable title before the graph
                    doc.add_paragraph(f"{formatted_var} Comparison", style='Heading 2')
                    
                    # Generate the bar graph and add to document
                    bar_image = generate_bar_graph(var, client_dj_data[i], reference_dj_data[:, i], f'{formatted_var} Comparison', tmpdirname)
                    doc.add_picture(bar_image, width=Inches(6))

        elif movement == 'SLV':
            # Fetch SLVL and SLVR data for the client
            client_cursor.execute(f"SELECT JH_IN, LEWIS_PEAK_POWER FROM SLV WHERE name = '{client_name}' AND side = 'Left'")
            client_slvl_data = client_cursor.fetchone()
            client_cursor.execute(f"SELECT JH_IN, LEWIS_PEAK_POWER FROM SLV WHERE name = '{client_name}' AND side = 'Right'")
            client_slvr_data = client_cursor.fetchone()
            reference_cursor.execute("SELECT JH_IN, LEWIS_PEAK_POWER FROM SLV")
            reference_slv_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_slvl_data and client_slvr_data and reference_slv_data.size > 0:
                # Combine SLVL and SLVR for Jump Height and Peak Power
                slv_variables = ['JH_IN', 'LEWIS_PEAK_POWER']
                
                for i, var in enumerate(slv_variables):
                    # Format the variable name by removing underscores
                    formatted_var = var.replace('_', ' ')
                    
                    # Add variable title before the graph
                    doc.add_paragraph(f"{formatted_var} Comparison (Left vs. Right Leg)", style='Heading 2')
                    
                    # Generate histogram for both Left and Right leg data
                    bar_image = generate_slv_histogram(var, client_slvl_data[i], client_slvr_data[i], reference_slv_data[:, i], f'{formatted_var} Comparison', tmpdirname)
                    doc.add_picture(bar_image, width=Inches(6))

        elif movement == 'NMT':
            # Fetch NMT data for the client (10s taps only)
            client_cursor.execute(f"SELECT NUM_TAPS_10s FROM NMT WHERE name = '{client_name}'")
            client_nmt_data = client_cursor.fetchone()
            reference_cursor.execute("SELECT NUM_TAPS_10s FROM NMT")
            reference_nmt_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_nmt_data and reference_nmt_data.size > 0:
                # Format the variable name by removing underscores
                nmt_var = 'NUM_TAPS_10s'.replace('_', ' ')
                
                # Add variable title before the graph
                doc.add_paragraph(f"{nmt_var} Comparison", style='Heading 2')
                
                # Generate histogram for NMT 10s taps
                nmt_image = generate_bar_graph('NUM_TAPS_10s', client_nmt_data[0], reference_nmt_data[:, 0], f'{nmt_var} Comparison', tmpdirname)
                doc.add_picture(nmt_image, width=Inches(6))


        # Extract first and last name (assuming name format is 'Last, First')
        name_parts = client_name.split(', ')
        if len(name_parts) == 2:
            client_name_reversed = f"{name_parts[1]} {name_parts[0]}"
        else:
            client_name_reversed = client_name  # In case the name doesn't follow the expected format

        # Save the document with the reversed client's name in the filename
        output_filename = os.path.join('D:/Athletic Screen 2.0/Output Files', f"Athletic_Report_{client_name_reversed.replace(' ', '_')}.docx")
        doc.save(output_filename)
        
# Fetch the client's name from the database (assuming the 'name' column is in all tables)
client_cursor.execute("SELECT DISTINCT name FROM CMJ")
client_name = client_cursor.fetchone()[0]

# Ensure the name is formatted correctly
name_parts = client_name.split(', ')
if len(name_parts) == 2:
    client_name_reversed = f"{name_parts[1]} {name_parts[0]}"
else:
    client_name_reversed = client_name

# Ensure output path is dynamically created based on client name
output_dir = r'G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports'
output_filename = os.path.join(output_dir, f"Athletic_Report_{client_name_reversed.replace(' ', '_')}.docx")

# Save the document to the correct location
doc.save(output_filename)
print(f"Document saved at: {output_filename}")

# Function to convert DOCX to images
def docx_to_images(docx_path, output_dir):
    # Extract text from the DOCX file
    text = docx2txt.process(docx_path)
    
    # Split the text into lines
    lines = text.splitlines()

    # Create a blank image with white background
    img_width, img_height = 1000, 1500
    image = Image.new('RGB', (img_width, img_height), color='white')
    draw = ImageDraw.Draw(image)

    # Use a simple font
    try:
        font = ImageFont.truetype("arial.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    # Draw the text onto the image
    padding = 20
    y_text = padding
    for line in lines:
        if y_text + padding > img_height:
            # Save the image and start a new one if the text exceeds the page height
            img_path = os.path.join(output_dir, f"page_{int(y_text / img_height)}.png")
            image.save(img_path)
            y_text = padding
            image = Image.new('RGB', (img_width, img_height), color='white')
            draw = ImageDraw.Draw(image)

        # Calculate text size and draw it
        text_bbox = draw.textbbox((0, 0), line, font=font)
        text_width = text_bbox[2] - text_bbox[0]
        text_height = text_bbox[3] - text_bbox[1]

        draw.text((padding, y_text), line, font=font, fill="black")
        y_text += text_height + padding

    # Save the last image
    img_path = os.path.join(output_dir, "final_page.png")
    image.save(img_path)

    return img_path

# Close connections
client_conn.close()
reference_conn.close()

# Example usage
img_output_directory = r'G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Images'
os.makedirs(img_output_directory, exist_ok=True)

# Convert DOCX to images
img_path = docx_to_images(output_filename, img_output_directory)
print(f"Images saved at {img_path}")

Databases opened successfully.
Client Name: Victor Vargus
Document saved at: G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Athletic_Report_Victor_Vargus.docx
Images saved at G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Images\final_page.png


In [3]:
import sqlite3
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from datetime import date
import tempfile
import docx2txt
from PIL import Image, ImageDraw, ImageFont
import os

# Corrected file paths with raw strings to handle backslashes properly
client_db_path = r'D:\Athletic Screen 2.0\Output Files\movement_database.db'
reference_db_path = r'D:\Athletic Screen 2.0\Output Files\Athletic_Screen_All_data.db'

# Ensure the paths are valid and accessible
if not os.path.exists(client_db_path):
    print(f"Client database not found at {client_db_path}")
if not os.path.exists(reference_db_path):
    print(f"Reference database not found at {reference_db_path}")

# Connect to the client and reference databases
client_conn = sqlite3.connect(client_db_path)
reference_conn = sqlite3.connect(reference_db_path)
client_cursor = client_conn.cursor()
reference_cursor = reference_conn.cursor()

print("Databases opened successfully.")

# Fetch the client's name from the database (assuming the 'name' column is in all tables)
client_cursor.execute("SELECT DISTINCT name FROM CMJ")  # Change table if necessary
client_name = client_cursor.fetchone()[0]  # Get the first row and first column
print(f"Client Name: {client_name}")

# Helper function to calculate percentile
def calculate_percentile(value, reference_data):
    return stats.percentileofscore(reference_data, value)

# Function to generate bar graphs with the specified layout
def generate_bar_graph(variable, client_value, reference_data, title, tmpdirname):
    percentile = calculate_percentile(client_value, reference_data)

    # Create figure and axis with specified colors and formatting
    plt.figure(facecolor='#181818')
    ax = plt.subplot(111, facecolor='#303030')

    # Plot histogram of reference data
    ax.hist(reference_data, bins=20, color='cornflowerblue', alpha=0.7, edgecolor='white', label='Reference Data')

    # Plot vertical line for client value (individual score)
    ax.axvline(x=client_value, color='red', linestyle='dashed', linewidth=2, label='Individual Score')

    # Set axis labels (removing underscores)
    ax.set_xlabel(variable.replace('_', ' '), color='slategrey')  # Replacing underscores with spaces
    ax.set_ylabel('Frequency', color='slategrey')

    # Set ticks dynamically
    xticks = np.linspace(min(reference_data), max(reference_data), num=10)  # Adjust 'num' for more or fewer ticks
    ax.set_xticks(xticks)
    
    # Dynamically format the xtick labels based on the range of data
    if max(reference_data) - min(reference_data) > 1:
        ax.set_xticklabels([f'{x:.0f}' for x in xticks], color='lightgrey')  # No decimals for large ranges
    elif max(reference_data) - min(reference_data) > 0.1:
        ax.set_xticklabels([f'{x:.2f}' for x in xticks], color='lightgrey')  # Two decimals for moderate ranges
    else:
        ax.set_xticklabels([f'{x:.4f}' for x in xticks], color='lightgrey')  # Four decimals for small ranges

    # Dynamically adjust y-tick intervals
    y_max = ax.get_ylim()[1]
    yticks = np.arange(0, y_max, step=max(1, y_max // 5))  # Ensure step of at least 1
    ax.set_yticks(yticks)

    # Set axis ticks and numbers to light grey
    ax.tick_params(axis='x', colors='lightgrey')  # x-axis tick color
    ax.tick_params(axis='y', colors='lightgrey')  # y-axis tick color

    # Set grid style
    ax.grid(color='dimgrey')

    # Add legend
    ax.legend(facecolor='black', edgecolor='grey', prop={'size': 'small'}, labelcolor='grey')

    # Add percentile and value information as text
    plt.text(0.95, 0.05, f'Percentile: {round(percentile, 2)}%\nValue: {round(client_value, 2)}', 
             ha='right', va='bottom', transform=ax.transAxes, color='white', fontsize=10, backgroundcolor='#181818')

    # Save histogram plot to file
    histogram_filename = os.path.join(tmpdirname, f'{variable}_histogram.png')
    plt.savefig(histogram_filename, bbox_inches='tight', facecolor='#181818')
    plt.close()

    return histogram_filename

# Function to generate scatter plot for CMJ
def generate_scatter_plot(client_data, reference_data, x_var, y_var, title, tmpdirname):
    plt.figure(facecolor='#181818', figsize=(6, 6))
    ax = plt.subplot(111, facecolor='#303030')

    # Create scatter plot for reference data (cornflower blue)
    ax.scatter(reference_data[x_var], reference_data[y_var], label='Reference', alpha=0.5, color='cornflowerblue')

    # Create scatter plot for client data (red)
    ax.scatter(client_data[x_var], client_data[y_var], label='Client', color='red', edgecolors='black', s=100)

    # Set axis labels, replacing underscores with spaces
    ax.set_xlabel(x_var.replace('_', ' '), color='slategrey')
    ax.set_ylabel(y_var.replace('_', ' '), color='slategrey')

    # Dynamically set ticks and numbers to light grey
    ax.tick_params(axis='x', colors='lightgrey')
    ax.tick_params(axis='y', colors='lightgrey')

    # Add vertical and horizontal reference lines (light grey)
    ax.axvline(x=np.mean(reference_data[x_var]), color='lightgrey', linestyle='--', linewidth=1)
    ax.axhline(y=np.mean(reference_data[y_var]), color='lightgrey', linestyle='--', linewidth=1)

    # Customize grid style
    ax.grid(color='dimgrey')

    # Add legend
    ax.legend(facecolor='black', edgecolor='grey', prop={'size': 'small'}, labelcolor='grey')

    # Save scatter plot to file
    scatter_filename = os.path.join(tmpdirname, 'cmj_scatter.png')
    plt.savefig(scatter_filename, bbox_inches='tight', facecolor='#181818')
    plt.close()

    return scatter_filename

# Modified function to generate a histogram comparing left and right leg data
def generate_slv_histogram(variable, left_value, right_value, reference_data, title, tmpdirname):
    plt.figure(facecolor='#181818')
    ax = plt.subplot(111, facecolor='#303030')

    # Plot histogram of reference data
    ax.hist(reference_data, bins=20, color='cornflowerblue', alpha=0.7, edgecolor='white', label='Reference Data')

    # Plot vertical lines for left and right leg values
    ax.axvline(x=left_value, color='green', linestyle='dashed', linewidth=2, label='Left Leg')
    ax.axvline(x=right_value, color='orange', linestyle='dashed', linewidth=2, label='Right Leg')

    # Set axis labels (removing underscores)
    ax.set_xlabel(variable.replace('_', ' '), color='slategrey')  # Replacing underscores with spaces
    ax.set_ylabel('Frequency', color='slategrey')

    # Set ticks dynamically
    xticks = np.linspace(min(reference_data), max(reference_data), num=10)
    ax.set_xticks(xticks)

    # Dynamically format the xtick labels based on the range of data
    if max(reference_data) - min(reference_data) > 1:
        ax.set_xticklabels([f'{x:.0f}' for x in xticks], color='lightgrey')  # No decimals for large ranges
    elif max(reference_data) - min(reference_data) > 0.1:
        ax.set_xticklabels([f'{x:.2f}' for x in xticks], color='lightgrey')  # Two decimals for moderate ranges
    else:
        ax.set_xticklabels([f'{x:.4f}' for x in xticks], color='lightgrey')  # Four decimals for small ranges

    # Dynamically adjust y-tick intervals
    y_max = ax.get_ylim()[1]
    yticks = np.arange(0, y_max, step=max(1, y_max // 5))
    ax.set_yticks(yticks)

    # Set axis ticks and numbers to light grey
    ax.tick_params(axis='x', colors='lightgrey')  # x-axis tick color
    ax.tick_params(axis='y', colors='lightgrey')  # y-axis tick color

    # Set grid style
    ax.grid(color='dimgrey')

    # Add legend
    ax.legend(facecolor='black', edgecolor='grey', prop={'size': 'small'}, labelcolor='grey')

    # Save histogram plot to file
    histogram_filename = os.path.join(tmpdirname, f'{variable}_histogram_slv.png')
    plt.savefig(histogram_filename, bbox_inches='tight', facecolor='#181818')
    plt.close()

    return histogram_filename

# Prepare the document
doc = Document()
doc.add_picture("8ctane Baseball - Black abd Blue BG.jpeg", width=Inches(4.0))  # Replace with your logo path
doc.paragraphs[-1].alignment = WD_ALIGN_PARAGRAPH.CENTER

# Adding player name and date
doc.add_paragraph(f"Player's Name: {client_name}")  # Replace client_name with dynamic value
doc.add_paragraph(f"Date: {date.today().strftime('%B %d, %Y')}")

# Create a temporary directory to store images
with tempfile.TemporaryDirectory() as tmpdirname:
    # List of movements to process
    movements = ['CMJ', 'DJ', 'SLV', 'NMT']
    
    for movement in movements:
        # Add movement title
        doc.add_paragraph(f"{movement} Report", style='Title')
        doc.add_paragraph(f"This section includes percentile reports and comparisons for {movement}.", style='Heading 2')

        if movement == 'CMJ':
            # Fetch CMJ data for the client
            client_cursor.execute(f"SELECT JH_IN, Peak_Power, Force_Peak_Power, Velo_Peak_Power FROM CMJ WHERE name = '{client_name}'")
            client_cmj_data = client_cursor.fetchone()
            reference_cursor.execute("SELECT JH_IN, Peak_Power, Force_Peak_Power, Velo_Peak_Power FROM CMJ")
            reference_cmj_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_cmj_data and reference_cmj_data.size > 0:
                # Generate bar graphs for each variable in CMJ
                variables = ['JH_IN', 'Peak_Power', 'Force_Peak_Power', 'Velo_Peak_Power']
                for i, var in enumerate(variables):
                    # Format the variable name by removing underscores
                    formatted_var = var.replace('_', ' ')
                    
                    # Add variable title before the graph
                    doc.add_paragraph(f"{formatted_var} Comparison", style='Heading 2')
                    
                    # Generate the bar graph and add to document
                    bar_image = generate_bar_graph(var, client_cmj_data[i], reference_cmj_data[:, i], f'{formatted_var} Comparison', tmpdirname)
                    doc.add_picture(bar_image, width=Inches(6))
                
                # Generate scatter plot for CMJ (Force_Peak_Power vs. Velo_Peak_Power)
                client_cmj_dict = {'Force_Peak_Power': client_cmj_data[2], 'Velo_Peak_Power': client_cmj_data[3]}
                reference_cmj_dict = pd.DataFrame(reference_cmj_data, columns=variables)
                
                # Add scatter plot title and image
                doc.add_paragraph("Force vs. Velocity Scatter Plot", style='Heading 2')
                scatter_image = generate_scatter_plot(client_cmj_dict, reference_cmj_dict, 'Force_Peak_Power', 'Velo_Peak_Power', 'CMJ: Force vs. Velocity', tmpdirname)
                doc.add_picture(scatter_image, width=Inches(6))

        elif movement == 'DJ':
            # Fetch DJ data for the client
            client_cursor.execute(f"SELECT JH_IN, LEWIS_PEAK_POWER, CT, RSI FROM DJ WHERE name = '{client_name}'")
            client_dj_data = client_cursor.fetchone()
            reference_cursor.execute("SELECT JH_IN, LEWIS_PEAK_POWER, CT, RSI FROM DJ")
            reference_dj_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_dj_data and reference_dj_data.size > 0:
                # Generate bar graphs for each variable in DJ
                variables = ['JH_IN', 'LEWIS_PEAK_POWER', 'CT', 'RSI']
                for i, var in enumerate(variables):
                    # Format the variable name by removing underscores
                    formatted_var = var.replace('_', ' ')
                    
                    # Add variable title before the graph
                    doc.add_paragraph(f"{formatted_var} Comparison", style='Heading 2')
                    
                    # Generate the bar graph and add to document
                    bar_image = generate_bar_graph(var, client_dj_data[i], reference_dj_data[:, i], f'{formatted_var} Comparison', tmpdirname)
                    doc.add_picture(bar_image, width=Inches(6))

        elif movement == 'SLV':
            # Fetch SLVL and SLVR data for the client
            client_cursor.execute(f"SELECT JH_IN, LEWIS_PEAK_POWER FROM SLV WHERE name = '{client_name}' AND side = 'Left'")
            client_slvl_data = client_cursor.fetchone()
            client_cursor.execute(f"SELECT JH_IN, LEWIS_PEAK_POWER FROM SLV WHERE name = '{client_name}' AND side = 'Right'")
            client_slvr_data = client_cursor.fetchone()
            reference_cursor.execute("SELECT JH_IN, LEWIS_PEAK_POWER FROM SLV")
            reference_slv_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_slvl_data and client_slvr_data and reference_slv_data.size > 0:
                # Combine SLVL and SLVR for Jump Height and Peak Power
                slv_variables = ['JH_IN', 'LEWIS_PEAK_POWER']
                
                for i, var in enumerate(slv_variables):
                    # Format the variable name by removing underscores
                    formatted_var = var.replace('_', ' ')
                    
                    # Add variable title before the graph
                    doc.add_paragraph(f"{formatted_var} Comparison (Left vs. Right Leg)", style='Heading 2')
                    
                    # Generate histogram for both Left and Right leg data
                    bar_image = generate_slv_histogram(var, client_slvl_data[i], client_slvr_data[i], reference_slv_data[:, i], f'{formatted_var} Comparison', tmpdirname)
                    doc.add_picture(bar_image, width=Inches(6))

        elif movement == 'NMT':
            # Fetch NMT data for the client (10s taps only)
            client_cursor.execute(f"SELECT NUM_TAPS_10s FROM NMT WHERE name = '{client_name}'")
            client_nmt_data = client_cursor.fetchone()
            reference_cursor.execute("SELECT NUM_TAPS_10s FROM NMT")
            reference_nmt_data = np.array(reference_cursor.fetchall())
            
            # Ensure data exists before proceeding
            if client_nmt_data and reference_nmt_data.size > 0:
                # Format the variable name by removing underscores
                nmt_var = 'NUM_TAPS_10s'.replace('_', ' ')
                
                # Add variable title before the graph
                doc.add_paragraph(f"{nmt_var} Comparison", style='Heading 2')
                
                # Generate histogram for NMT 10s taps
                nmt_image = generate_bar_graph('NUM_TAPS_10s', client_nmt_data[0], reference_nmt_data[:, 0], f'{nmt_var} Comparison', tmpdirname)
                doc.add_picture(nmt_image, width=Inches(6))


        # Extract first and last name (assuming name format is 'Last, First')
        name_parts = client_name.split(', ')
        if len(name_parts) == 2:
            client_name_reversed = f"{name_parts[1]} {name_parts[0]}"
        else:
            client_name_reversed = client_name  # In case the name doesn't follow the expected format

        # Save the document with the reversed client's name in the filename
        output_filename = os.path.join('D:/Athletic Screen 2.0/Output Files', f"Athletic_Report_{client_name_reversed.replace(' ', '_')}_All_Comp.docx")
        doc.save(output_filename)
        

# Save the document with the reversed client's name in the filename
output_filename = os.path.join('D:/Athletic Screen 2.0/Output Files', f"Athletic_Report_{client_name_reversed.replace(' ', '_')}_All_Comp.docx")
doc.save(output_filename)

# Fetch the client's name from the database (assuming the 'name' column is in all tables)
client_cursor.execute("SELECT DISTINCT name FROM CMJ")
client_name = client_cursor.fetchone()[0]

# Ensure the name is formatted correctly
name_parts = client_name.split(', ')
if len(name_parts) == 2:
    client_name_reversed = f"{name_parts[1]} {name_parts[0]}"
else:
    client_name_reversed = client_name

# Ensure output path is dynamically created based on client name
output_dir = r'G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports'
output_filename = os.path.join(output_dir, f"Athletic_Report_{client_name_reversed.replace(' ', '_')}_All_Comp.docx")

# Save the document to the correct location
doc.save(output_filename)
print(f"Document saved at: {output_filename}")

# Function to convert DOCX to images
def docx_to_images(docx_path, output_dir):
    # Extract text from the DOCX file
    text = docx2txt.process(docx_path)
    
    # Split the text into lines
    lines = text.splitlines()

    # Create a blank image with white background
    img_width, img_height = 1000, 1500
    image = Image.new('RGB', (img_width, img_height), color='white')
    draw = ImageDraw.Draw(image)

    # Use a simple font
    try:
        font = ImageFont.truetype("arial.ttf", 20)
    except IOError:
        font = ImageFont.load_default()

    # Draw the text onto the image
    padding = 20
    y_text = padding
    for line in lines:
        if y_text + padding > img_height:
            # Save the image and start a new one if the text exceeds the page height
            img_path = os.path.join(output_dir, f"page_{int(y_text / img_height)}.png")
            image.save(img_path)
            y_text = padding
            image = Image.new('RGB', (img_width, img_height), color='white')
            draw = ImageDraw.Draw(image)

        # Calculate text size and draw it
        text_bbox = draw.textbbox((0, 0), line, font=font)
        text_width = text_bbox[2] - text_bbox[0]
        text_height = text_bbox[3] - text_bbox[1]

        draw.text((padding, y_text), line, font=font, fill="black")
        y_text += text_height + padding

    # Save the last image
    img_path = os.path.join(output_dir, "final_page.png")
    image.save(img_path)

    return img_path

# Close connections
client_conn.close()
reference_conn.close()

# Example usage
img_output_directory = r'G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Images'
os.makedirs(img_output_directory, exist_ok=True)

# Convert DOCX to images
img_path = docx_to_images(output_filename, img_output_directory)
print(f"Images saved at {img_path}")

Databases opened successfully.
Client Name: Victor Vargus
Document saved at: G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Athletic_Report_Victor_Vargus_All_Comp.docx
Images saved at G:\My Drive\Athletic Screen 2.0 Reports\Pro Reports\Images\final_page.png


In [4]:
import sqlite3
import time
import os

# Paths to the source and target databases
source_db_path = 'D:/Athletic Screen 2.0/Output Files/movement_database.db'
output_folder = 'D:/Athletic Screen 2.0/Output Files/'
target_databases = ['Athletic_Screen_Pro_data.db']
all_data_db_path = os.path.join(output_folder, 'Athletic_Screen_All_data.db')

# Retry mechanism for handling the locked database error
def retry_execute(func):
    retries = 5  # Number of retries
    while retries > 0:
        try:
            func()
            break
        except sqlite3.OperationalError as e:
            if 'database is locked' in str(e):
                print("Database is locked, retrying...")
                time.sleep(1)  # Wait for 1 second before retrying
                retries -= 1
            else:
                raise e
        if retries == 0:
            raise Exception("Max retries reached. Database is still locked.")

# Table schemas to create in the target databases and the combined database
table_schemas = {
    'CMJ': '''CREATE TABLE IF NOT EXISTS CMJ (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                trial_name TEXT,
                JH_IN REAL,
                Peak_Power REAL,
                Force_Peak_Power REAL,
                Velo_Peak_Power REAL
              )''',
    'DJ': '''CREATE TABLE IF NOT EXISTS DJ (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                trial_name TEXT,
                JH_IN REAL,
                LEWIS_PEAK_POWER REAL,
                CT REAL,
                RSI REAL
              )''',
    'SLV': '''CREATE TABLE IF NOT EXISTS SLV (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                trial_name TEXT,
                side TEXT,
                JH_IN REAL,
                LEWIS_PEAK_POWER REAL
              )''',
    'NMT': '''CREATE TABLE IF NOT EXISTS NMT (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT,
                trial_name TEXT,
                NUM_TAPS_10s REAL,
                NUM_TAPS_20s REAL,
                NUM_TAPS_30s REAL,
                NUM_TAPS REAL
              )'''
}

# Function to create tables in a database connection
def create_tables(conn):
    cursor = conn.cursor()
    for schema in table_schemas.values():
        cursor.execute(schema)
    conn.commit()

# Open the source database
source_conn = sqlite3.connect(source_db_path, timeout=10)
source_cursor = source_conn.cursor()

# Create connections to all target databases and combined database
target_conns = {db_name: sqlite3.connect(os.path.join(output_folder, db_name), timeout=10) for db_name in target_databases}
target_conns['all'] = sqlite3.connect(all_data_db_path, timeout=10)

# Ensure tables exist in each database
for conn in target_conns.values():
    create_tables(conn)

# Function to copy data from one table in the source to target databases
def copy_table_data(table_name):
    # Fetch all data except the 'id' column from the source table
    source_cursor.execute(f"SELECT * FROM {table_name}")
    rows = source_cursor.fetchall()

    # For each row, insert it into each target database
    for db_name, conn in target_conns.items():
        cursor = conn.cursor()
        placeholders = ", ".join(["?"] * len(rows[0][1:]))  # Skips the first 'id' column
        query = f"INSERT INTO {table_name} ({', '.join([desc[0] for desc in source_cursor.description][1:])}) VALUES ({placeholders})"
        retry_execute(lambda: cursor.executemany(query, [row[1:] for row in rows]))  # Exclude 'id' column for insertion
        conn.commit()
        print(f"Copied {len(rows)} rows to {table_name} in {db_name}")

# List of table names to copy data
tables_to_copy = ['CMJ', 'DJ', 'SLV', 'NMT']

# Copy data from each table
for table in tables_to_copy:
    copy_table_data(table)

# Close all connections
source_conn.close()
for conn in target_conns.values():
    conn.close()

print("Data successfully copied to each target and combined database.")




Copied 3 rows to CMJ in Athletic_Screen_Pro_data.db
Copied 3 rows to CMJ in all
Copied 3 rows to DJ in Athletic_Screen_Pro_data.db
Copied 3 rows to DJ in all
Copied 6 rows to SLV in Athletic_Screen_Pro_data.db
Copied 6 rows to SLV in all
Copied 1 rows to NMT in Athletic_Screen_Pro_data.db
Copied 1 rows to NMT in all
Data successfully copied to each target and combined database.
