In [None]:
# Install necessary libraries (if not installed)
%pip install plotly pandas ipywidgets

# Import required libraries
import os
import zipfile
import glob
import pandas as pd
import plotly.graph_objects as go
from ipywidgets import DatePicker, Dropdown, Output, HBox, interact, Layout

# Ensure Plotly renders properly in Jupyter Notebook
pio.renderers.default = "jupyterlab"  # or "colab" or "iframe"

In [None]:
# Define paths
zip_folder = "csv_files/"  # Where ZIP files are located
extract_folder = "extracted_files/"  # Where CSVs will be extracted

# Ensure the extraction folder exists
os.makedirs(extract_folder, exist_ok=True)

# Find all ZIP files in the zip_files folder
zip_files = glob.glob(os.path.join(zip_folder, "*.zip"))

if not zip_files:
    print("❌ No ZIP files found in 'csv_files/'. Please check the folder.")

for zip_file in zip_files:
    try:
        with zipfile.ZipFile(zip_file, "r") as zip_ref:
            # Extract only CSV files to the extracted_files folder
            for file in zip_ref.namelist():
                if file.endswith(".csv"):  # Only extract CSV files
                    zip_ref.extract(file, extract_folder)
                    print(f"✅ Extracted: {file} from {zip_file} to {extract_folder}")

    except Exception as e:
        print(f"❌ Error extracting {zip_file}: {e}")

# Check if any CSV files exist in extracted_files/
csv_files = glob.glob(os.path.join(extract_folder, "*.csv"))
if csv_files:
    print(f"✅ Successfully extracted {len(csv_files)} CSV files to '{extract_folder}'")
else:
    print("⚠️ No CSV files found after extraction. Verify ZIP contents.")


In [None]:
# Load and combine all extracted CSV files
csv_files = glob.glob(os.path.join(extract_folder, "*.csv"))

all_data_frames = []

if not csv_files:
    print("❌ No CSV files found in 'extracted_files/'. Ensure ZIPs contain CSVs.")

for file in csv_files:
    try:
        df = pd.read_csv(file, encoding="utf-8-sig")  # Ensure correct encoding
        if df.empty:
            print(f"⚠️ Warning: {file} is empty and was skipped.")
        else:
            print(f"✅ Loaded {file} ({df.shape[0]} rows, {df.shape[1]} columns).")
            all_data_frames.append(df)
    except Exception as e:
        print(f"❌ Error loading {file}: {e}")

# Merge all CSVs into a single DataFrame
if all_data_frames:
    combined_df = pd.concat(all_data_frames, ignore_index=True)
    print(f"✅ Merged {len(all_data_frames)} files into `combined_df` ({combined_df.shape[0]} rows).")
else:
    combined_df = pd.DataFrame()
    print("⚠️ No valid data loaded. Check extracted CSV files.")

In [None]:
# Define paths
zip_folder = "csv_files/"  # Where ZIP files are located
extract_folder = "extracted_files/"  # Where CSVs will be extracted

# Ensure the extraction folder exists
os.makedirs(extract_folder, exist_ok=True)

# Find all ZIP files in the zip_files folder
zip_files = glob.glob(os.path.join(zip_folder, "*.zip"))

if not zip_files:
    print("❌ No ZIP files found in 'csv_files/'. Please check the folder.")

for zip_file in zip_files:
    try:
        with zipfile.ZipFile(zip_file, "r") as zip_ref:
            # Extract only CSV files to the extracted_files folder
            for file in zip_ref.namelist():
                if file.endswith(".csv"):  # Only extract CSV files
                    zip_ref.extract(file, extract_folder)
                    print(f"✅ Extracted: {file} from {zip_file} to {extract_folder}")

    except Exception as e:
        print(f"❌ Error extracting {zip_file}: {e}")

# Check if any CSV files exist in extracted_files/
csv_files = glob.glob(os.path.join(extract_folder, "*.csv"))
if csv_files:
    print(f"✅ Successfully extracted {len(csv_files)} CSV files to '{extract_folder}'")
else:
    print("⚠️ No CSV files found after extraction. Verify ZIP contents.")


In [None]:
# Load and combine all extracted CSV files
csv_files = glob.glob(os.path.join(extract_folder, "*.csv"))

all_data_frames = []

if not csv_files:
    print("❌ No CSV files found in 'extracted_files/'. Ensure ZIPs contain CSVs.")

for file in csv_files:
    try:
        df = pd.read_csv(file, encoding="utf-8-sig")  # Ensure correct encoding
        if df.empty:
            print(f"⚠️ Warning: {file} is empty and was skipped.")
        else:
            print(f"✅ Loaded {file} ({df.shape[0]} rows, {df.shape[1]} columns).")
            all_data_frames.append(df)
    except Exception as e:
        print(f"❌ Error loading {file}: {e}")

# Merge all CSVs into a single DataFrame
if all_data_frames:
    combined_df = pd.concat(all_data_frames, ignore_index=True)
    print(f"✅ Merged {len(all_data_frames)} files into `combined_df` ({combined_df.shape[0]} rows).")
else:
    combined_df = pd.DataFrame()
    print("⚠️ No valid data loaded. Check extracted CSV files.")


In [None]:
if not combined_df.empty:
    # Standardize column names (lowercase, replace spaces with underscores)
    combined_df.columns = (
        combined_df.columns.str.strip().str.lower().str.replace(" ", "_", regex=True)
    )

    # Convert 'date' column to datetime format
    combined_df["date"] = pd.to_datetime(combined_df["date"], errors="coerce")

    # Drop rows with missing dates
    combined_df = combined_df.dropna(subset=["date"])

    print("✅ Column names standardized and date column processed.")
else:
    print("⚠️ `combined_df` is empty. Investigate ZIP or CSV issues.")


In [None]:
# Define valid columns that can be used for graphing
valid_columns = [
    "time_to_takeoff", "mrsi", "jump_height", "braking_rfd", 
    "countermovement_depth", "peak_velocity", "braking_phase", 
    "flight_time", "takeoff_velocity", "peak_braking_velocity", 
    "propulsive_phase"
]

# Keep only columns that exist in the dataset
valid_columns = [col for col in valid_columns if col in combined_df.columns]

print(f"✅ Available numeric columns: {valid_columns}")

In [None]:
# Dropdown options for athlete selection
athletes = sorted(combined_df["name"].unique())

# Date pickers for filtering dataset
min_date = combined_df["date"].min()
max_date = combined_df["date"].max()

start_date_picker = DatePicker(
    description="Start Date:",
    value=min_date,
    layout=Layout(width='40%')
)

end_date_picker = DatePicker(
    description="End Date:",
    value=max_date,
    layout=Layout(width='40%')
)

# Dropdown for athlete selection
athlete_dropdown = Dropdown(
    options=athletes,
    description="Athlete:",
    layout=Layout(width='50%')
)

# Dropdowns for selecting variables
var1_dropdown = Dropdown(
    options=valid_columns,
    value=valid_columns[0],
    description="Variable 1:",
    layout=Layout(width='50%')
)

var2_dropdown = Dropdown(
    options=valid_columns,
    value=valid_columns[1],
    description="Variable 2:",
    layout=Layout(width='50%')
)

# Output widget for displaying the graph
output = Output()

In [None]:
# Function to process the data (filter top 2 jumps and average them)
def process_top_jumps(data):
    if data.empty:
        return pd.DataFrame()

    # Get top 2 jumps per day
    top_jumps = (
        data.sort_values(by="jump_height", ascending=False)
        .groupby("date")
        .head(2)
    )

    # Compute the mean for numeric columns
    processed_data = top_jumps.groupby("date", as_index=False).mean(numeric_only=True)
    return processed_data

In [None]:
# Function to update the Plotly graph with smooth curves & shading
def update_graph(athlete, var1, var2, start_date, end_date):
    with output:
        output.clear_output(wait=True)

        # Convert date selection to datetime
        start_date = pd.to_datetime(start_date)
        end_date = pd.to_datetime(end_date)

        # Filter data for the selected athlete and date range
        athlete_data = combined_df[
            (combined_df["name"] == athlete) & 
            (combined_df["date"] >= start_date) & 
            (combined_df["date"] <= end_date)
        ]

        if athlete_data.empty:
            print("❌ No data available for the selected range.")
            return

        # Process the top 2 jumps per day
        athlete_data = process_top_jumps(athlete_data)

        if athlete_data.empty:
            print("❌ No valid data after processing top 2 jumps per day.")
            return

        # Compute mean and standard deviation for scaling
        var1_mean = athlete_data[var1].mean()
        var1_std = athlete_data[var1].std()
        var2_mean = athlete_data[var2].mean()
        var2_std = athlete_data[var2].std()

        # Set y-axis ranges independently
        var1_min = max(0, var1_mean - 2 * var1_std)
        var1_max = athlete_data[var1].max() + 0.01
        var2_min = max(0, var2_mean - 2 * var2_std)
        var2_max = athlete_data[var2].max() + 0.01

        # Create the figure
        fig = go.Figure()

        # Add trace for Variable 1 (Left y-axis) - RED
        fig.add_trace(go.Scatter(
            x=athlete_data["date"],
            y=athlete_data[var1],
            mode='lines+markers',
            name=f"{var1}",
            line=dict(color='red', shape='spline', width=3),
            fill='tonexty',
            fillcolor='rgba(255, 0, 0, 0.3)'  # Red shading
        ))

        # Add trace for Variable 2 (Right y-axis) - YELLOW
        fig.add_trace(go.Scatter(
            x=athlete_data["date"],
            y=athlete_data[var2],
            mode='lines+markers',
            name=f"{var2}",
            line=dict(color='yellow', shape='spline', width=3),
            fill='tonexty',
            fillcolor='rgba(255, 255, 0, 0.3)',  # Yellow shading
            yaxis="y2"
        ))

        # Set layout with black background and red/yellow styling
        fig.update_layout(
            title=f"Performance Over Time - {athlete}",
            xaxis=dict(title="Date", tickformat="%m-%d", gridcolor="gray", color="white"),
            yaxis=dict(
                title=f"{var1}",
                titlefont=dict(color="red"),
                tickfont=dict(color="red"),
                gridcolor="gray",
                range=[var1_min, var1_max]  
            ),
            yaxis2=dict(
                title=f"{var2}",
                titlefont=dict(color="yellow"),  
                tickfont=dict(color="yellow"),  
                overlaying="y",
                side="right",
                gridcolor="gray",
                range=[var2_min, var2_max]  
            ),
            paper_bgcolor="black",  
            plot_bgcolor="black",  
            template="plotly_dark",
            showlegend=True
        )

        # Show the graph
        fig.show()

In [None]:
interact(
    update_graph,
    athlete=athlete_dropdown,
    var1=var1_dropdown,
    var2=var2_dropdown,
    start_date=start_date_picker,
    end_date=end_date_picker
)

display(output)