# String Spec Visualization with Plotly

The purpose of this notebook is to create an interactive scatter plot of tennis strings and their measurable properties (stiffness, spin potential, tension loss %, etc.). Data was scraped from TWU's webpage. 

In [1]:
# Import standard modules
from math import *
import pandas as pd
import numpy as np

# Import Plotly
import plotly as py
import plotly.graph_objs as go
import ipywidgets as widgets

py.offline.init_notebook_mode(connected=True)

In [3]:
"""
    Read in string spec data from CSV files and clean up the data for visualization.
"""

# Get list of string brands
stiffness_df = pd.read_csv('tennis_string_stiffness.csv', names=['brand', 'string', 'stiffness'])
brand_list = list(stiffness_df.brand.unique())

# Create function to separate brand and string into 2 columns
def extract_brand_and_string(string, brand_list):
    
    string_split = string.split()
    temp_brand = string_split[0] + ' ' + string_split[1]
    if temp_brand in brand_list:
        brand = temp_brand
        string = ' '.join(string_split[2:])
    else:
        brand = string_split[0]
        string = ' '.join(string_split[1:])
        
    return (brand, string)

# Read in string performance datasheet into a dataframe and rename columns
string_specs_df = pd.read_csv('tennis_string_datasheet.csv')
string_specs_df.rename(columns={'String': 'brand_plus_string', 
                                'Ref. Ten. (lbs)': 'ref_tension_lbs', 
                                'Swing Speed': 'swing_speed', 
                                'Material': 'material', 
                                'Stiffness (lb/in)': 'stiffness_lbs_per_in',
                                'Tension Loss (%)': 'tension_loss_pct', 
                                'Spin Potential': 'spin_potential'}, inplace=True)

# Extract brand and name of string from 'brand_plus_string'
string_specs_df['brand'] = string_specs_df.brand_plus_string.apply(lambda x: extract_brand_and_string(x, brand_list)[0])
string_specs_df['string'] = string_specs_df.brand_plus_string.apply(lambda x: extract_brand_and_string(x, brand_list)[1])

# Count the number of occurences for each string and only keep strings with
# counts of 3, corresponding to the 3 references tensions of 40, 51, and 62 lbs.
string_count_df = string_specs_df.groupby(['brand', 'string'], as_index=False)['ref_tension_lbs'].count()
string_count_df.rename(columns={'ref_tension_lbs': 'num_count'}, inplace=True)
string_specs_df = pd.merge(string_specs_df, string_count_df, how='inner',
                           left_on=['brand', 'string'], right_on=['brand', 'string'])
string_specs_df = string_specs_df[string_specs_df.num_count == 3].copy()
string_specs_df.sort_values(by=['brand', 'string', 'ref_tension_lbs'], ascending=[True, True, True], inplace=True)

# Remove strings where measured stiffness decreases with increasing applied tension
stiffness_criteria_list = []
for string in list(string_specs_df.brand_plus_string.unique()):
    sub_df = string_specs_df[string_specs_df.brand_plus_string == string].copy()
    sub_df.reset_index(drop=True, inplace=True)
    stiffness_vals = list(sub_df.stiffness_lbs_per_in)
    diff_1 = stiffness_vals[1] - stiffness_vals[0]
    diff_2 = stiffness_vals[2] - stiffness_vals[1]
    if (diff_1 >= 0) and (diff_2 >= 0):
        stiffness_criteria_list.append({'brand_plus_string': string,
                                        'criteria_pass': True})
    else:
        stiffness_criteria_list.append({'brand_plus_string': string,
                                        'criteria_pass': False})
string_specs_df = pd.merge(string_specs_df, pd.DataFrame(stiffness_criteria_list), how='inner',
                           left_on='brand_plus_string', right_on='brand_plus_string')
final_specs_df = string_specs_df[string_specs_df.criteria_pass].copy()


In [4]:
final_specs_df

Unnamed: 0,brand_plus_string,ref_tension_lbs,swing_speed,material,stiffness_lbs_per_in,tension_loss_pct,spin_potential,brand,string,num_count,criteria_pass
0,Alien Black Diamond 16,40,Fast,Polyester,152.6,61.3,4.5,Alien,Black Diamond 16,3,True
1,Alien Black Diamond 16,51,Fast,Polyester,190.3,50.7,4.5,Alien,Black Diamond 16,3,True
2,Alien Black Diamond 16,62,Fast,Polyester,223.5,45.8,4.5,Alien,Black Diamond 16,3,True
3,Alpha Gut 2000 16,40,Fast,Nylon,131.4,22.6,3.1,Alpha,Gut 2000 16,3,True
4,Alpha Gut 2000 16,51,Fast,Nylon,159.4,16.5,3.1,Alpha,Gut 2000 16,3,True
5,Alpha Gut 2000 16,62,Fast,Nylon,180.6,14.4,3.1,Alpha,Gut 2000 16,3,True
6,Ashaway Dynamite 17,40,Fast,Nylon/Zyex,108.6,40.9,3.0,Ashaway,Dynamite 17,3,True
7,Ashaway Dynamite 17,51,Fast,Nylon/Zyex,127.4,29.9,3.0,Ashaway,Dynamite 17,3,True
8,Ashaway Dynamite 17,62,Fast,Nylon/Zyex,144.6,24.5,3.0,Ashaway,Dynamite 17,3,True
9,Ashaway Dynamite Soft 17,40,Fast,Nylon/Zyex,102.3,33.6,2.6,Ashaway,Dynamite Soft 17,3,True


# Create Interactive Scatter Plot

In [10]:
# Remove rows in final dataframe where the material is not specified
final_specs_df.dropna(subset=['material'], inplace=True)

# Create a dictionary of dataframes, one for each material at each reference tension
dfs_dict = {}
for material in list(final_specs_df.material.unique()):
    dfs_dict[material] = {}
    for ref_ten in [40, 51, 62]:
        dfs_dict[material][ref_ten] = final_specs_df[(final_specs_df.material == material) &
                                                     (final_specs_df.ref_tension_lbs == ref_ten) & 
                                                     (final_specs_df.spin_potential > 0)].copy()

# Define dictionary to specify colors of the scatter markers
material_color_dict = {'Polyester': 'rgba(17, 28, 242, 0.5)',
                       'Nylon': 'rgba(63, 191, 127, 0.5)',
                       'Nylon/Zyex': 'rgba(242, 137, 17, 0.5)',
                       'Nylon/Polyester': 'rgba(148, 17, 242, 0.5)',
                       'Nylon/Polyurethane': 'rgba(242, 36, 17, 0.5)',
                       'Nylon/Polyolefin': 'rgba(17, 223, 242, 0.5)',
                       'Gut': 'rgba(223, 242, 17, 0.5)',
                       'Polyolefin': 'rgba(247, 17, 228, 0.5)'}

# Define dictionary to specify axis titles
axis_title_dict = {'stiffness_lbs_per_in': 'Stiffness (lbs/in)',
                   'spin_potential': 'Spin Potential',
                   'tension_loss_pct': 'Tension Loss %'}

# Define layout
layout = go.Layout(
    xaxis={'ticks': 'inside',
           'ticklen': 10,
           'showgrid': False},
    yaxis={'ticks': 'inside',
           'ticklen': 10,
           'showgrid': False},
    legend_orientation='h',
    width=900,
    height=1000)

In [16]:
def create_scatter_plot(property_x, property_y, ref_tension):
    
    """
        Function that creates an interactive scatter plot, given 2 properties and
        reference tension (i.e. 40, 51, 62 lbs) as input.
    """
    
    # Create data traces
    traces = []
    for material in list(final_specs_df.material.unique()):
        trace = go.Scatter(x=dfs_dict[material][ref_tension][property_x],
                           y=dfs_dict[material][ref_tension][property_y],
                           mode='markers',
                           marker={'color': material_color_dict[material],
                                   'size': 10},
                           marker_line_width=1,
                           name=material,
                           hovertext=list(dfs_dict[material][ref_tension]['brand_plus_string']))
        traces.append(trace)

    # Create figure
    fig = go.Figure(data=traces, layout=layout)
    fig.update_layout(
        font={'family': "Courier New",
              'size': 20,
              'color': "rgba(0, 0, 0, 1)"})
    fig.update_layout(
        title='Ref. Tension {0} lbs'.format(ref_tension))
    fig.update_layout(
        xaxis={'title': axis_title_dict[property_x]},
        yaxis={'title': axis_title_dict[property_y]})

    # Show plot
    py.offline.iplot(fig)

## Mouse over scatter points for string info.

In [17]:
create_scatter_plot('stiffness_lbs_per_in', 'spin_potential', 51)

Although spin potential is perhaps the most commonly referenced differentiator of playing charateristics when comparing polyester to non-polyester strings, the plot above shows that there is significant overlap in spin potential between nylon and polyester strings. 

In [20]:
create_scatter_plot('stiffness_lbs_per_in', 'tension_loss_pct', 51)

The percentage loss in tension as measured by TWU seems to indicate that tension loss is the stronger differentiator, with poly strings losing ~2x the amount of tension after the first 100 hits than non-poly stirngs. 

In [19]:
create_scatter_plot('tension_loss_pct', 'spin_potential', 51)