In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np
from scipy import stats

# Set up the report title
report_title = "Performance Report - Ray Cebulski"

# 1. Load & Combine All CSVs from extracted_files
extracted_path = "extracted_files"
csv_files = [f for f in os.listdir(extracted_path) if f.endswith(".csv")]

dataframes = []
for file in csv_files:
    df = pd.read_csv(os.path.join(extracted_path, file))
    df["session_file"] = file  # Track file source
    dataframes.append(df)

# Ensure data is loaded
if dataframes:
    processed_data = pd.concat(dataframes, ignore_index=True)
    print("Final Combined Data Shape:", processed_data.shape)
else:
    raise ValueError("No CSV files found in extracted_files!")

# 2. Normalize Column Names
processed_data.columns = processed_data.columns.str.strip()
processed_data.columns = processed_data.columns.str.replace(r'\s+', '_', regex=True)

# 3. Find Correct Columns
time_to_takeoff_col = next((col for col in processed_data.columns if "time_to_takeoff" in col.lower().replace(" ", "_")), None)
mrsi_col = next((col for col in processed_data.columns if "mrsi" in col.lower().replace(" ", "_")), None)

if not time_to_takeoff_col or not mrsi_col:
    print("Available Columns:", processed_data.columns.tolist())
    raise KeyError("One or more required columns ('Time To Takeoff', 'MRSI') not found!")

print(f"Using Columns - Time to Takeoff: {time_to_takeoff_col}, MRSI: {mrsi_col}")

# 4. Filter to the Most Recent 6 Jumps
processed_data = processed_data.sort_values(by="session_file", ascending=False).tail(6)

# Calculate Y-axis limits based on 2 standard deviations from the mean
mrsi_mean = processed_data[mrsi_col].mean()
mrsi_std = processed_data[mrsi_col].std()
mrsi_ymin, mrsi_ymax = mrsi_mean - 2 * mrsi_std, mrsi_mean + 2 * mrsi_std

time_mean = processed_data[time_to_takeoff_col].mean()
time_std = processed_data[time_to_takeoff_col].std()
time_ymin, time_ymax = time_mean - 2 * time_std, time_mean + 2 * time_std

# Create figure with white background
plt.style.use("default")

# Create bell curve histogram data
# Generate simulated team data for each metric
np.random.seed(42)  # For reproducibility

# Dictionary to store all the data for histograms
histogram_data = {
    "CMJ": {
        "team_mean": 45.2,
        "team_std": 3.5,
        "athlete_value": 47.8,
        "unit": "cm",
        "color": "forestgreen",
        "higher_better": True
    },
    "30 Yard Dash": {
        "team_mean": 4.05,
        "team_std": 0.22,
        "athlete_value": 3.88,
        "unit": "sec",
        "color": "royalblue",
        "higher_better": False
    },
    "3RM Squat": {
        "team_mean": 320,
        "team_std": 35,
        "athlete_value": 340,
        "unit": "lbs",
        "color": "darkorange",
        "higher_better": True
    },
    "Predicted 1RM": {
        "team_mean": 370,
        "team_std": 40,
        "athlete_value": 395,
        "unit": "lbs",
        "color": "purple",
        "higher_better": True
    }
}

# Create bell curve histograms for each metric
fig, axes = plt.subplots(2, 2, figsize=(14, 12))
axes = axes.flatten()

for i, (metric, data) in enumerate(histogram_data.items()):
    # Generate random data for team distribution
    team_data = np.random.normal(data["team_mean"], data["team_std"], 1000)
    
    # Calculate percentile of athlete
    if data["higher_better"]:
        percentile = stats.percentileofscore(team_data, data["athlete_value"])
    else:
        percentile = 100 - stats.percentileofscore(team_data, data["athlete_value"])
    
    # Create histogram bins
    bins = np.linspace(data["team_mean"] - 3*data["team_std"], 
                       data["team_mean"] + 3*data["team_std"], 
                       20)
    
    # Plot histogram
    n, bins, patches = axes[i].hist(team_data, bins=bins, density=True, 
                                   alpha=0.7, color=data["color"])
    
    # Calculate bin ranges for x-ticks
    bin_ranges = [f"{bins[j]:.2f}" for j in range(len(bins))]
    
    # Create PDF curve
    x = np.linspace(data["team_mean"] - 4*data["team_std"], 
                   data["team_mean"] + 4*data["team_std"], 
                   1000)
    pdf = stats.norm.pdf(x, data["team_mean"], data["team_std"])
    
    # Plot PDF
    axes[i].plot(x, pdf, 'purple', linewidth=2, label='PDF')
    
    # Plot mean and athlete value lines
    axes[i].axvline(data["team_mean"], color='orange', linestyle='--', linewidth=2, label='Team Mean')
    axes[i].axvline(data["athlete_value"], color='red', linestyle='--', linewidth=2, label='Ray Cebulski')
    
    # Set title and labels
    axes[i].set_title(f"Distribution of {metric}", fontsize=14, fontweight='bold')
    
    # Set y-axis label only for the left plots
    if i % 2 == 0:
        axes[i].set_ylabel('Probability', fontsize=12)
    
    # Add text box with statistics
    if data["higher_better"]:
        performance_text = "above" if data["athlete_value"] > data["team_mean"] else "below"
    else:
        performance_text = "below" if data["athlete_value"] < data["team_mean"] else "above"
        
    textbox = (
        f"Team μ = {data['team_mean']:.1f} {data['unit']}\n"
        f"Team σ = {data['team_std']:.1f}\n"
        f"Ray = {data['athlete_value']:.1f} {data['unit']}\n"
        f"Percentile = {percentile:.0f}%\n"
        f"{abs(data['athlete_value'] - data['team_mean']):.1f} {data['unit']} {performance_text} avg"
    )
    
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    axes[i].text(0.05, 0.95, textbox, transform=axes[i].transAxes, 
                fontsize=10, verticalalignment='top', bbox=props)
    
    # Set legend
    axes[i].legend(loc='upper right')
    
    # Reduce number of x-tick labels to avoid crowding
    if i == 0 or i == 1:  # Top row - show fewer ticks
        step = 4
    else:  # Bottom row - show more ticks
        step = 3
        
    ticks_pos = bins[::step]
    ticks_labels = [f"{val:.1f}" for val in ticks_pos]
    axes[i].set_xticks(ticks_pos)
    axes[i].set_xticklabels(ticks_labels, rotation=45)
    
    # Add x-axis label
    unit_text = f"({data['unit']})" if data["unit"] else ""
    axes[i].set_xlabel(f"{metric} {unit_text}", fontsize=12)
    
    # Set grid
    axes[i].grid(True, linestyle='--', alpha=0.7)

plt.tight_layout()
plt.savefig("performance_histograms.png", dpi=300, bbox_inches='tight')
plt.show()

# Original strength lollipop plot
plt.figure(figsize=(10, 8))
strength_metrics = ["Shoulder IR - Left", "Shoulder IR - Right", "Shoulder ER - Left", "Shoulder ER - Right", "Hip Abduction - Left", "Hip Abduction - Right", "Hip Adduction - Left", "Hip Adduction - Right", "Grip - Left", "Grip - Right"]
values = np.random.uniform(30, 100, len(strength_metrics))
y_positions = np.arange(len(strength_metrics))

ax = plt.subplot()
ax.hlines(y_positions, values - 10, values + 10, color="gray", linewidth=5, alpha=0.5, label="Team Distribution")
for i, (val, label) in enumerate(zip(values, strength_metrics)):
    marker = 'D' if "Shoulder" in label else 'o'
    color = "green" if "Right" in label else "gold"
    ax.scatter(val, y_positions[i], color=color, marker=marker, s=100, edgecolors="black", label=label if i < 3 else "")
ax.set_yticks(y_positions)
ax.set_yticklabels(strength_metrics)
ax.set_xlabel("Force (lbs)")
ax.set_title("Strength Values")
ax.legend(loc='lower center', bbox_to_anchor=(0.5, -0.15), ncol=4)
ax.grid(axis="x", linestyle="dashed", alpha=0.7)
plt.tight_layout()
plt.savefig("strength_lollipop.png", dpi=300, bbox_inches='tight')
plt.show()

# Ensure MRSI Graph Displays with updated colors
plt.figure()
fig_mrsi = go.Figure()
fig_mrsi.add_trace(go.Scatter(
    x=processed_data.index,  
    y=processed_data[mrsi_col],
    mode="lines+markers",
    name="MRSI",
    line=dict(color="green", shape="spline", width=3),
    fill="tonexty",
    fillcolor="rgba(0, 128, 0, 0.3)",
    yaxis="y1"
))
fig_mrsi.add_trace(go.Scatter(
    x=processed_data.index,
    y=processed_data[time_to_takeoff_col],
    mode="lines+markers",
    name="Time to Takeoff",
    line=dict(color="gold", shape="spline", width=3),
    fill="tonexty",
    fillcolor="rgba(255, 215, 0, 0.3)",
    yaxis="y2"
))
fig_mrsi.update_layout(
    title="MRSI vs. Time to Takeoff – Last 6 Jumps",
    xaxis=dict(title="Jump #", tickmode="array", tickvals=list(range(6))),
    yaxis=dict(title="MRSI", color="green", side="left", range=[mrsi_ymin, mrsi_ymax]),
    yaxis2=dict(title="Time to Takeoff (s)", color="gold", overlaying="y", side="right", range=[time_ymin, time_ymax]),
    template="plotly_white"
)
fig_mrsi.show()