# Demo Notebook that Runs an Exploratory Data Analysis GUI for Movement by Movement metrics

__[CoLab Notebook Link](https://githubtocolab.com/RebootMotion/reboot-toolkit/blob/main/examples/RebootMotionAnalysisGUI.ipynb)__

Run the cells in order, making sure to enter AWS credentials in the cell when prompted

In [None]:
#@title Install Python Package

!pip install git+https://github.com/RebootMotion/reboot-toolkit.git@v2.10.2#egg=reboot_toolkit > /dev/null
!echo "Done Installing"

In [None]:
#@title Install Visualization Packages

!pip install "pygwalker[notebook]" --upgrade --pre
!pip install sweetviz

In [None]:
#@title Import Python Libraries

import reboot_toolkit as rtk
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pygwalker as pyg

from reboot_toolkit import S3Metadata, MocapType, MovementType, Handedness, FileType, PlayerMetadata
from IPython.display import display

In [None]:
#@title AWS Credentials

# Upload your Organization's .env file to the local file system, per https://pypi.org/project/python-dotenv/
# OR input your credentials string generated by the Reboot Dashboard

boto3_session = rtk.setup_aws()

In [None]:
#@title User Input - No code changes required below this section, just enter information in forms

# Update the below info to match your desired analysis information
# Common changes you might want to make:

# To analyze both Hawk-Eye HFR data from the Stats API,
# and also Hawk-Eye Action files (e.g. from the DSP),
#  set mocap_types=[MocapType.HAWKEYE_HFR, MocapType.HAWKEYE]

# To analyze baseball-hitting,
# set movement_type=MovementType.BASEBALL_HITTING

# To analyze right-handed players,
# set handedness=Handedness.RIGHT

# To analyze data from the momentum and energy files,
# set file_type=FileType.MOMENTUM_ENERGY

# See https://docs.rebootmotion.com/ for all available file types and the data in each
mocap_types = [MocapType.HAWKEYE, MocapType.HAWKEYE_HFR]
movement_type = MovementType.BASEBALL_PITCHING
handedness = Handedness.RIGHT
file_type = FileType.METRICS_BASEBALL_PITCHING_V_ALL

# Update the label to whatever you'd like to be displayed in the visuals
primary_segment_label = 'Primary Segment'
comparison_segment_label = 'Comparison Segment'

# Use this bool to add columns of data, like pitch_type and start_speed, from the stats API
add_stats_api = True  # True or False

if add_stats_api:
    print("Will add data from the Stats API like velo and pitch type")
    
else:
    print("Will NOT add data from the Stats API like velo and pitch type (set to True above if needed)")

In [None]:
#@title Set S3 File Info

# Common changes you might want to make:

# To analyze both Hawk-Eye HFR data from the Stats API,
# and also Hawk-Eye Action files (e.g. from the DSP),
#  set mocap_types=[MocapType.HAWKEYE_HFR, MocapType.HAWKEYE]

# To analyze baseball-hitting,
# set movement_type=MovementType.BASEBALL_HITTING

# To analyze right-handed players,
# set handedness=Handedness.RIGHT

# See https://docs.rebootmotion.com/ for all available file types and the data in each

# Update the below info to match your desired analysis information
s3_metadata = S3Metadata(
    org_id=os.environ['ORG_ID'],
    mocap_types=mocap_types,
    movement_type=movement_type,
    handedness=handedness,
    file_type=file_type,
)

s3_df = rtk.download_s3_summary_df(s3_metadata)

In [None]:
#@title Optional Look Up Player ID by Name

name_to_look_up = "Jacob deGrom"

rtk.find_player_matches(s3_df, name_to_look_up, match_threshold=50., max_results=5)

In [None]:
#@title Display the Interface for Selecting the Primary Data Segment to Analyze

primary_segment_widget = rtk.create_interactive_widget(s3_df)
display(primary_segment_widget)

In [None]:
#@title Set Primary Analysis Segment Info

primary_segment_data = primary_segment_widget.children[1].result
primary_analysis_segment = PlayerMetadata(
    org_player_ids=primary_segment_data["org_player_ids"],
    session_dates=primary_segment_data["session_dates"],
    session_nums=primary_segment_data["session_nums"],
    session_date_start=primary_segment_data["session_date_start"],
    session_date_end=primary_segment_data["session_date_end"],
    year=primary_segment_data["year"],
    org_movement_id=None, # set the play GUID for the skeleton animation; None defaults to the first play
    s3_metadata=s3_metadata,
)

primary_segment_summary_df = rtk.filter_s3_summary_df(primary_analysis_segment, s3_df)

# Add Movement Num and S3 Key to Primary DataFrame to Enable Sorting
prim_available_s3_keys = rtk.list_available_s3_keys(os.environ['ORG_ID'], primary_segment_summary_df)
primary_segment_data_df = rtk.load_games_to_df_from_s3_paths(primary_segment_summary_df['s3_path_delivery'].tolist())
primary_segment_data_df = rtk.merge_data_df_with_s3_keys(primary_segment_data_df, prim_available_s3_keys)

if add_stats_api:
    print('Adding Stats API data (like pitch speed) to the data df...')
    primary_segment_data_df = rtk.decorate_primary_segment_df_with_stats_api(primary_segment_data_df)
    print("Available Pitch Types:")
    print(primary_segment_data_df['pitch_type'].unique())

In [None]:
#@title Optional: After adding the Stats API data, uncomment below to filter the data

# # FILTER BY PITCH TYPES
# pitch_types = {'Four-Seam Fastball', 'Curveball'}  # list the pitch types you want to include
# primary_segment_data_df = primary_segment_data_df.loc[
#     primary_segment_data_df['pitch_type'].isin(pitch_types)
# ].copy().reset_index(drop=True)

# # FILTER BY A VELO RANGE
# velo_lo = 90
# velo_hi = 100
# primary_segment_data_df = primary_segment_data_df[
#     (primary_segment_data_df["start_speed"] > velo_lo) & (primary_segment_data_df["start_speed"] < velo_hi)
# ].copy().reset_index(drop=True)

# # Uncomment to print number of rows returned by filters
# print('Num available rows:', len(primary_segment_data_df))

In [None]:
#@title Run this Cell to Display a Data Analysis GUI

# To update the domain of ana axis:
# 1. Click the Debugging Wrench Icon in the GUI
# 2. Click the three dots that appear next to the Chart and then Open in Vega Editor
# 3. Scroll down to the x and y-axis definitions, and update using the syntax below
#     x: {"scale": {"domain": [85, 100]}}

pyg.walk(primary_segment_data_df)

In [None]:
#@title Optional:Uncomment and Run this Cell to Automatically Create Interactive HTML Plots in a Local Folder
#!pip install autoviz
#
# from autoviz import AutoViz_Class
# 
# AV = AutoViz_Class()
# 
# dependent_variable = 'pitch_hand_proj_max'
# 
# dft = AV.AutoViz(
#     "",
#     sep=",",
#     depVar=dependent_variable,
#     dfte=primary_segment_data_df.select_dtypes('number'),
#     header=0,
#     verbose=1,
#     lowess=True,
#     chart_format="html",
#     max_rows_analyzed=150000,
#     max_cols_analyzed=10,
#     save_plot_dir=None
# )

In [None]:
#@title Optional: Uncomment to analyze correlations with a dependent variable

# pitch_type_of_interest = 'Four-Seam Fastball'
# 
# dependent_variable = 'start_speed'
# 
# correlation_threshold = 0.7
# 
# primary_segment_data_df[dependent_variable] = primary_segment_data_df[dependent_variable].astype(float)
# 
# filtered_df = primary_segment_data_df.loc[primary_segment_data_df['pitch_type'] == pitch_type_of_interest].copy().reset_index(drop=True)
# 
# correlations = filtered_df.corrwith(filtered_df[dependent_variable], numeric_only=True)
# 
# hi_corrs = correlations.loc[correlations >= correlation_threshold]
# 
# print('Correlations with', dependent_variable, 'above threshold...')
# print()
# print(hi_corrs)
# print()
# 
# for hi_corr in hi_corrs.index.tolist():
#     if not hi_corr.startswith('.') and hi_corr != dependent_variable:
#         corr_fig = plt.figure()
#         
#         plt.plot(filtered_df[hi_corr], filtered_df[dependent_variable], 'o')
#         
#         plt.plot(
#             np.unique(filtered_df[hi_corr]), 
#             np.poly1d(np.polyfit(filtered_df[hi_corr], filtered_df[dependent_variable], 1))(np.unique(filtered_df[hi_corr])), 
#             linewidth=3
#         )
#         
#         plt.title(f"{dependent_variable} vs {hi_corr}")
#         
#         plt.xlabel(hi_corr)
#         plt.ylabel(dependent_variable)
#         plt.grid()
#         corr_fig.show()

In [None]:
#@title Display the Interface for Selecting the Comparison Data Segment to Analyze

comparison_segment_widget = rtk.create_interactive_widget(s3_df)
display(comparison_segment_widget)

In [None]:
#@title Set Comparison Analysis Segment Inputs

comparison_s3_metadata = s3_metadata
comparison_segment_data = comparison_segment_widget.children[1].result

comparison_analysis_segment = PlayerMetadata(
    org_player_ids=comparison_segment_data["org_player_ids"],
    session_dates=comparison_segment_data["session_dates"],
    session_nums=comparison_segment_data["session_nums"],
    session_date_start=comparison_segment_data["session_date_start"],
    session_date_end=comparison_segment_data["session_date_end"],
    year=comparison_segment_data["year"],
    org_movement_id=None, # set the play GUID for the skeleton animation; None defaults to the first play
    s3_metadata=comparison_s3_metadata,
)

comparison_segment_summary_df = rtk.filter_s3_summary_df(comparison_analysis_segment, s3_df)

# Add Movement Num and S3 Key to Comparison DataFrame to Enable Sorting
comp_available_s3_keys = rtk.list_available_s3_keys(os.environ['ORG_ID'], comparison_segment_summary_df)
comparison_segment_data_df = rtk.load_games_to_df_from_s3_paths(comparison_segment_summary_df['s3_path_delivery'].tolist())
comparison_segment_data_df = rtk.merge_data_df_with_s3_keys(comparison_segment_data_df , comp_available_s3_keys).sort_values(by=['session_date', 'movement_num'])

if add_stats_api:
    print('Adding Stats API data (like pitch speed) to the data df...')
    comparison_segment_data_df = rtk.decorate_primary_segment_df_with_stats_api(comparison_segment_data_df)
    print("Available Pitch Types:")
    print(comparison_segment_data_df['pitch_type'].unique())

In [None]:
#@title Optional: After adding the Stats API data, uncomment below to filter the data

# # FILTER BY PITCH TYPES
# pitch_types = {'Four-Seam Fastball', 'Curveball'}  # list the pitch types you want to include
# comparison_segment_data_df = comparison_segment_data_df.loc[
#     comparison_segment_data_df['pitch_type'].isin(pitch_types)
# ].copy().reset_index(drop=True)

# # FILTER BY A VELO RANGE
# velo_lo = 90
# velo_hi = 100
# comparison_segment_data_df = comparison_segment_data_df[
#     (comparison_segment_data_df["start_speed"] >= velo_lo) & (comparison_segment_data_df["start_speed"] <= velo_hi)
# ].copy().reset_index(drop=True)

# # Uncomment to print number of rows returned by filters
# print('Num available rows:', len(comparison_segment_data_df))

In [None]:
#@title Run this Cell to Automatically Create A Comparison Analysis Report in a Local Folder
import sweetviz as sv

target_feature = 'start_speed'

pitch_type_of_interest = 'Four-Seam Fastball'

filtered_comparison_df = comparison_segment_data_df.loc[comparison_segment_data_df['pitch_type'] == pitch_type_of_interest].copy().reset_index(drop=True)
filtered_primary_df = primary_segment_data_df.loc[primary_segment_data_df['pitch_type'] == pitch_type_of_interest].copy().reset_index(drop=True)

comparison_report = sv.compare(
    (filtered_comparison_df, comparison_segment_label),
    (filtered_primary_df, primary_segment_label), 
    target_feat=target_feature
)

# comparison_report = sv.compare(
#     (comparison_segment_data_df, comparison_segment_label),
#     (primary_segment_data_df, primary_segment_label), 
#     target_feat=target_feature
# )

comparison_report.show_html()

In [None]:
#@title Concatenate the Comparison and Primary Data, so they can be analyzed together. No user changes required

comparison_segment_data_df['segment_type'] = comparison_segment_label
primary_segment_data_df['segment_type'] = primary_segment_label
analysis_df = pd.concat([comparison_segment_data_df, primary_segment_data_df]).sort_values(by=['session_date', 'movement_num']).reset_index(drop=True)

In [None]:
#@title Run this Cell to Display a Data Analysis GUI

pyg.walk(analysis_df)

In [None]:
#@title Analyze Movement by Movement Metrics Over Time

analysis_df['session_date'] = pd.to_datetime(analysis_df['session_date'])

# To analyze momentum metrics, use these parameters...
body_segments = ['rear_leg', 'lead_leg', 'torso', 'pitch_up_arm', 'pitch_forearm', 'pitch_hand']
metric_categories = ['proj_max']  # 'vert_ang', 'proj_norm_time', 'side_max', 'side_min', 'side_max_percent', 'side_min_percent'
metrics = [f"{body_seg}_{metric_cat}" for body_seg in body_segments for metric_cat in metric_categories]

# # To analyze joint angle metrics, use these parameters...
# body_joints = ['rear_hip_ir', 'lead_knee_flex', 'lumbar_ext', 'spine_rot', 'pitch_shoulder_horz', 'pitch_shoulder_rot', 'pitch_elbow_flex']
# metric_categories = ['range_full_min', 'range_full_max', 'range_min', 'range_max', 'range_norm']
# metrics = [f"{body_joint}_{metric_cat}" for body_joint in body_joints for metric_cat in metric_categories]

# # To analyze momentum transfer metrics, use these parameters...
# metrics = ['lower_half_to_torso_proj', 'torso_to_up_arm_proj', 'up_arm_to_pitch_hand_proj']

comparison_color = 'green'
primary_color = 'red'

# set the window within which the rolling average will be calculated
rolling_window = 5

figs = []

comparison = analysis_df.loc[analysis_df['segment_type'] == comparison_segment_label]
x_comp = comparison.index.tolist()

primary = analysis_df.loc[analysis_df['segment_type'] == primary_segment_label]
x_prim = primary.index.tolist()

date_indexes = {}

for session_date in sorted(analysis_df['session_date'].dt.strftime('%Y-%m-%d').unique()):
    date_indexes[analysis_df[analysis_df['session_date']==session_date].index.tolist()[0]] = str(session_date)

for metric in metrics:

    y_comp = comparison[metric].rolling(window=rolling_window).mean()
    y_comp_std = comparison[metric].rolling(window=rolling_window).std()

    y_comp_lo = y_comp - y_comp_std
    y_comp_hi = y_comp + y_comp_std

    y_prim = primary[metric].rolling(window=rolling_window).mean()
    y_prim_std = primary[metric].rolling(window=rolling_window).std()

    y_prim_lo = y_prim - y_prim_std
    y_prim_hi = y_prim + y_prim_std

    fig = plt.figure()

    plt.fill_between(x_comp, y_comp_lo, y_comp_hi, color=comparison_color, alpha=0.2)
    plt.plot(x_comp, y_comp, comparison_color, label="comparison")

    plt.fill_between(x_prim, y_prim_lo, y_prim_hi, color=primary_color, alpha=0.2)
    plt.plot(x_prim, y_prim, primary_color, label="primary")

    min_val = np.nanmin(y_comp_lo.tolist() + y_prim_lo.tolist())
    max_val = np.nanmax(y_comp_hi.tolist() + y_prim_hi.tolist())

    plt.vlines(x=date_indexes.keys(), ymin=min_val, ymax=max_val, color = 'black')

    for k, v in date_indexes.items():
        plt.text(k, max_val, v, rotation=-90, verticalalignment='top')

    plt.ylabel('metric value')
    plt.xlabel('season pitch count')
    plt.title(metric)
    plt.legend()

    plt.grid()

    figs.append(fig)

    plt.show()

In [None]:
from matplotlib.backends.backend_pdf import PdfPages

pdf_file_name = 'analysis_pitch_by_pitch.pdf'

pdf_analysis = PdfPages(pdf_file_name)

for fig in figs:

    pdf_analysis.savefig(fig)

pdf_analysis.close()

print('Saved plots to', pdf_file_name)