# Demo Notebook for Creating an Analysis of Reboot Motion Time Series Data

__[CoLab Notebook Link](https://githubtocolab.com/RebootMotion/reboot-toolkit/blob/main/examples/RebootMotionAnalysis.ipynb)__

Run the cells in order, making sure to enter AWS credentials in the cell when prompted

In [None]:
#@title Install Python Package

!pip install git+https://github.com/RebootMotion/reboot-toolkit.git@v2.10.4#egg=reboot_toolkit > /dev/null
!echo "Done Installing"

In [None]:
#@title Import Python Libraries

import awswrangler as wr
import reboot_toolkit as rtk
import matplotlib.pyplot as plt
import os
import pandas as pd

from reboot_toolkit import S3Metadata, MocapType, MovementType, Handedness, FileType, PlayerMetadata
from IPython.display import display

In [None]:
#@title AWS Credentials

# Upload your Organization's .env file to the local file system, per https://pypi.org/project/python-dotenv/
# OR input your credentials string generated by the Reboot Dashboard

boto3_session = rtk.setup_aws()

In [None]:
#@title User Input - No code changes required below this section, just enter information in forms

# Update the below info to match your desired analysis information
# Common changes you might want to make:

# To analyze both Hawk-Eye HFR data from the Stats API,
# and also Hawk-Eye Action files (e.g. from the DSP),
#  set mocap_types=[MocapType.HAWKEYE_HFR, MocapType.HAWKEYE]

# To analyze baseball-hitting,
# set movement_type=MovementType.BASEBALL_HITTING

# To analyze right-handed players,
# set handedness=Handedness.RIGHT

# To analyze data from the momentum and energy files,
# set file_type=FileType.MOMENTUM_ENERGY

# See https://docs.rebootmotion.com/ for all available file types and the data in each
mocap_types = [MocapType.HAWKEYE_HFR, MocapType.HAWKEYE]
movement_type = MovementType.BASEBALL_PITCHING
handedness = Handedness.LEFT
file_type = FileType.INVERSE_KINEMATICS

# Update the label to whatever you'd like to be displayed in the visuals
primary_segment_label = 'Primary Segment'
comparison_segment_label = 'Comparison Segment'

# Use this bool to add columns of data, like pitch_type and start_speed, from the stats API
add_stats_api = False  # True or False

if add_stats_api:
    print("Will add data from the Stats API like velo and pitch type")
    
else:
    print("Will NOT add data from the Stats API like velo and pitch type (set to True above if needed)")

In [None]:
#@title Set S3 File Info

s3_metadata = S3Metadata(
    org_id=os.environ['ORG_ID'],
    mocap_types=mocap_types,
    movement_type=movement_type,
    handedness=handedness,
    file_type=file_type,
)

s3_df = rtk.download_s3_summary_df(s3_metadata)

In [None]:
#@title Optional Look Up Player ID by Name

name_to_look_up = "Jacob deGrom"

rtk.find_player_matches(s3_df, name_to_look_up, match_threshold=50., max_results=5)

In [None]:
#@title Display the Interface for Selecting the Primary Data Segment to Analyze

# Run this cell to display the dropdown menus and reset all options to NULL
primary_segment_widget = rtk.create_interactive_widget(s3_df)
display(primary_segment_widget)

In [None]:
#@title Set Primary Analysis Segment Info

primary_segment_data = primary_segment_widget.children[1].result
primary_analysis_segment = PlayerMetadata(
    org_player_ids=primary_segment_data["org_player_ids"],
    session_dates=primary_segment_data["session_dates"],
    session_nums=primary_segment_data["session_nums"],
    session_date_start=primary_segment_data["session_date_start"],
    session_date_end=primary_segment_data["session_date_end"],
    year=primary_segment_data["year"],
    org_movement_id=None, # set the play GUID for the skeleton animation; None defaults to the first play
    s3_metadata=s3_metadata,
)

primary_segment_summary_df = rtk.filter_s3_summary_df(primary_analysis_segment, s3_df)

# Common Issue:
# If no data files are returned here,
# check that the segment selection widget and the S3 File Info above are set correctly,
# also if the cells were updated after running once, check that they were run again with any new selections.

# List all Available S3 data for the Primary Analysis Segment
available_s3_keys = rtk.list_available_s3_keys(os.environ['ORG_ID'], primary_segment_summary_df)
# Load the Primary Analysis Segment into an Analysis Dictionary with Mean and Standard Dev DataFrames
primary_segment_data_df = rtk.load_games_to_df_from_s3_paths(primary_segment_summary_df['s3_path_delivery'].tolist())

if add_stats_api:
    print('Adding data from the Stats API...')
    primary_segment_data_df = rtk.decorate_primary_segment_df_with_stats_api(primary_segment_data_df)
    print("Available Pitch Types")
    print(primary_segment_data_df['pitch_type'].unique())

# Common Issue:
# Missing element: data integrity issues in parsing. Majority of data is still likely fine.
# Common metrics to filter by are: start_speed, end_speed, spin_rate, spin_direction, zone, pitch_type

In [None]:
#@title Optional: After adding the Stats API data, uncomment below to filter the data

# # FILTER BY PITCH TYPES
# pitch_types = {'Four-Seam Fastball', 'Curveball'}  # list the pitch types you want to include
# primary_segment_data_df = primary_segment_data_df.loc[
#     primary_segment_data_df['pitch_type'].isin(pitch_types)
# ].copy().reset_index(drop=True)

# # FILTER BY A VELO RANGE
# velo_lo = 90
# velo_hi = 100
# primary_segment_data_df = primary_segment_data_df[
#     (primary_segment_data_df["start_speed"] > velo_lo) & (primary_segment_data_df["start_speed"] < velo_hi)
# ].copy().reset_index(drop=True)

# # Uncomment to print number of rows returned by filters
# print('Num available rows:', len(primary_segment_data_df))

In [None]:
#@title Optional: Modify below to add / filter using the MLB Stats API and / or Custom Metadata

# If you do not want to add custom metadata, skip this cell or run it without modification.

# Otherwise, if you do want to add custom metadata...
# 1) Upload a CSV with a pitch ID column that can pair with the MoCap data
# * Note that the play ID with the MoCap data is the play GUID from the Stats API *
# 2) Type the filename for the uploaded CSV
custom_metadata_filename = ''
# 3) Set the name of the column with the pitch ID for merging
metadata_play_id_name = 'play_id'

# if you want to filter based on the custom metadata, modify below as needed...
col_for_filtering = 'pitch_type'  # name of column for filtering
col_values_to_filter_on = ['FA']  # list column vals to filter on

if len(custom_metadata_filename.strip()) > 0:
    print('Adding and filtering with custom metadata...')
    custom_metadata_df = pd.read_csv(custom_metadata_filename.strip())
    
    primary_segment_data_df = rtk.filter_df_on_custom_metadata(
      primary_segment_data_df, custom_metadata_df, metadata_play_id_name,
      metadata_col=col_for_filtering, vals_to_keep=col_values_to_filter_on
    )

else:
    custom_metadata_df = None
    
print("Num available rows:", len(primary_segment_data_df))
print('Here is a 5 row sample...')
primary_segment_data_df.head()

In [None]:
#@title Load data into format for processing

primary_segment_dict = rtk.load_data_into_analysis_dict(primary_analysis_segment, primary_segment_data_df, segment_label=primary_segment_label)

In [None]:
#@title Display the Interface for Selecting the Comparison Data Segment to Analyze

comparison_segment_widget = rtk.create_interactive_widget(s3_df)
display(comparison_segment_widget)

In [None]:
#@title Optional - Set Comparison Analysis Segment Inputs

comparison_s3_metadata = s3_metadata
comparison_segment_data = comparison_segment_widget.children[1].result

comparison_analysis_segment = PlayerMetadata(
    org_player_ids=comparison_segment_data["org_player_ids"],
    session_dates=comparison_segment_data["session_dates"],
    session_nums=comparison_segment_data["session_nums"],
    session_date_start=comparison_segment_data["session_date_start"],
    session_date_end=comparison_segment_data["session_date_end"],
    year=comparison_segment_data["year"],
    org_movement_id=None, # set the play GUID for the skeleton animation; None defaults to the first play
    s3_metadata=comparison_s3_metadata,
)

comparison_segment_summary_df = rtk.filter_s3_summary_df(comparison_analysis_segment, s3_df)
comparison_segment_data_df = rtk.load_games_to_df_from_s3_paths(comparison_segment_summary_df['s3_path_delivery'].tolist())

if add_stats_api:
    print('Adding data from the Stats API...')
    comparison_segment_data_df = rtk.decorate_primary_segment_df_with_stats_api(comparison_segment_data_df)
    print("Available Pitch Types")
    print(comparison_segment_data_df['pitch_type'].unique())

# Common Issue:
# Missing element: data integrity issues in parsing. Majority of data is still likely fine.
# Common metrics to filter by are: start_speed, end_speed, spin_rate, spin_direction, zone, pitch_type

In [None]:
#@title Optional: After adding the Stats API data, uncomment below to filter the data

# # FILTER BY PITCH TYPES
# pitch_types = {'Four-Seam Fastball', 'Curveball'}  # list the pitch types you want to include
# comparison_segment_data_df = comparison_segment_data_df.loc[
#     comparison_segment_data_df['pitch_type'].isin(pitch_types)
# ].copy().reset_index(drop=True)

# # FILTER BY A VELO RANGE
# velo_lo = 90
# velo_hi = 100
# comparison_segment_data_df = comparison_segment_data_df[
#     (comparison_segment_data_df["start_speed"] >= velo_lo) & (comparison_segment_data_df["start_speed"] <= velo_hi)
# ].copy().reset_index(drop=True)

# # Uncomment to print number of rows returned by filters
# print('Num available rows:', len(comparison_segment_data_df))

In [None]:
#@title Optional: Modify below to filter using the Custom Metadata added Above

comp_seg_col_for_filtering = 'pitch_type'  # type name of column for filtering
comp_seg_col_values_to_filter_on = ['SL']  # list column vals to filter on

if custom_metadata_df is not None:
    print('Adding custom metadata...')
    comparison_segment_data_df = rtk.filter_df_on_custom_metadata(
        comparison_segment_data_df, custom_metadata_df, metadata_play_id_name,
        metadata_col=comp_seg_col_for_filtering,
        vals_to_keep=comp_seg_col_values_to_filter_on
    )
    
print("Num available rows:", len(comparison_segment_data_df))
print('Here is a 5 row sample...')
comparison_segment_data_df.head()

In [None]:
comparison_segment_dict = rtk.load_data_into_analysis_dict(comparison_analysis_segment, comparison_segment_data_df, segment_label=comparison_segment_label)

# Common Issue:
# If no data files are returned here,
# check that the segment selection widget and the S3 File Info above are set correctly,
# also if the cells were updated after running once, check that they were run again with any new selections.

In [None]:
#@title Put One or Two Analysis Dicts into a list to be sent to AWS for Analysis

# If you did not add any comparison segment, uncomment and comment below so the list only includes the primary segment
# analysis_dicts = [primary_segment_dict]

analysis_dicts = [primary_segment_dict, comparison_segment_dict]

In [None]:
#@title Optional - Create Simple Comparison Plots

# Available time options for the x_column include: 'time_from_max_hand', 'norm_time', 'rel_frame', 'time'
# Note if analyzing basketball, "time_from..." column is 'time_from_max_height'
x_column = 'time_from_max_hand'

# Below are examples of possible joint angles of interest for a left-handed pitcher,
# feel free to update to any available data from the INVERSE KINEMATICS file type here https://docs.rebootmotion.com/,
# or if your file type is MOMENTUM_ENERGY, you can update "y_columns" to columns from that file type
y_columns = [
    'pelvis_rot', 'pelvis_side',
    'torso_rot', 'torso_side', 'torso_ext',
    'left_shoulder_rot', 'left_shoulder_flex', 'left_shoulder_abd',
    'left_elbow'
]

# Set the y-axis label to whatever is appropriate for the y-columns above
y_axis_label = "joint angle (deg)"

# Update to the number of standard deviations you want to shade in the plot relative to the mean
stand_devs_to_shade = 1.0

# Set to true to plot the time derivative of each y_column above, which will not shade stand_devs
do_plot_velocity = False

mpl_figs = []

for y_column in y_columns:

    mpl_fig = plt.figure()

    for segment_dict in analysis_dicts:

        if do_plot_velocity:
            y = segment_dict['df_mean'][y_column].diff() / segment_dict['df_mean']['time'].diff()

        else:
            y = segment_dict['df_mean'][y_column]

            y_lo = segment_dict['df_mean'][y_column] - (stand_devs_to_shade * segment_dict['df_std'][y_column])
            y_hi = segment_dict['df_mean'][y_column] + (stand_devs_to_shade * segment_dict['df_std'][y_column])

            plt.fill_between(segment_dict['df_mean'][x_column], y_lo, y_hi, alpha=0.4)

        plt.plot(segment_dict['df_mean'][x_column], y, label=segment_dict['segment_label'])

    plt.ylabel(y_axis_label)
    # plt.ylim([-180, 180])  # uncomment to limit the height of the y-axis

    plt.xlabel(x_column)
    # plt.xlim([-1, 0.1])  # uncomment to limit the width of the x-axis

    plt.title(y_column)

    plt.legend()

    plt.grid()

    plt.show()

    mpl_figs.append(mpl_fig)


In [None]:
#@title Optional - Save Plots to a PDF

from matplotlib.backends.backend_pdf import PdfPages

pdf_file_name = 'analysis.pdf'

pdf_analysis = PdfPages(pdf_file_name)

for mpl_fig in mpl_figs:

    pdf_analysis.savefig(mpl_fig)

pdf_analysis.close()

print('Saved plots to', pdf_file_name)

In [None]:
#@title Get Population Inverse Kinematics Data from S3

print('Loading data from:', s3_metadata.s3_population_prefix)

print('Downloading population mean...')
pop_mean_df = wr.s3.read_csv([f"{s3_metadata.s3_population_prefix}mean_ik.csv"], index_col=[0])

print('Downloading population standard deviation...')
pop_std_df = wr.s3.read_csv([f"{s3_metadata.s3_population_prefix}std_ik.csv"], index_col=[0])

print('Done!')

# Common Issue:
# If no data files are returned here,
# check that the S3 File Info above is set correctly.
# Note that currently population data is only being aggregated for the Hawk-Eye HFR mocap type.

In [None]:
#@title Inspect Available Joint Angle Names

joint_angle_names = rtk.get_available_joint_angles(analysis_dicts)
print(f"Available Joint Angles:\nn={len(joint_angle_names)}\n{joint_angle_names}")

In [None]:
#@title Set Plot Information for Synchronized Animation

time_column_to_plot = 'time_from_max_hand'  # seconds from max dom hand velo

joint_angles_to_plot = ['torso_side', 'left_elbow']  # list of joint angles to plot below, from available angles above

# set to True to plot the mean joint angle trace across the selection,
# set to False to plot the joint angle trace for the play specified by the play_guid in the analysis_dict from that cell above
plot_joint_angle_mean = True

# set to True to write an html file for each joint angle skeleton animation
write_individual_html = False

# step size between animation frames
frame_step = 25

In [None]:
#@title Create Animation by Sending Plot Information to AWS

figs = []

for joint_angle_to_plot in joint_angles_to_plot:
    fig = rtk.get_animation(boto3_session, analysis_dicts, pop_mean_df, pop_std_df, time_column_to_plot, joint_angle_to_plot, plot_joint_angle_mean, frame_step=frame_step)
  
    if write_individual_html:
        fig.write_html(f'{joint_angle_to_plot}_animation.html', full_html=True, include_plotlyjs='cdn')

    figs.append(fig)

    fig.show()

In [None]:
#@title Create Joint Angle HTML Plots
import numpy as np

joint_angle_names_no_zeros = [
    angle for angle in joint_angle_names 
    if not np.allclose(pop_mean_df[angle].values, np.zeros(len(pop_mean_df)))
    ]

joint_angle_lists = rtk.list_chunks(joint_angle_names_no_zeros, 3)

plot_colors = ['rgb(31, 119, 180)', 'rgb(255, 127, 14)',
               'rgb(44, 160, 44)', 'rgb(214, 39, 40)',
               'rgb(148, 103, 189)']

time_label = 'time_from_max_hand'
figs_angles = []

for joint_angle_list in joint_angle_lists:

    fig = rtk.get_joint_plot(boto3_session, analysis_dicts, pop_mean_df, pop_std_df, time_label, joint_angle_list)
    fig.show()
    figs_angles.append(fig)

In [None]:
#@title Write Report HTML to Local Folder
rtk.save_figs_to_html(figs+figs_angles)