In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np

pd.set_option('plotting.backend', 'plotly')
from datetime import date, datetime as dt
import os, re
import plotly.colors
import plotly.graph_objects as go, plotly.express as px, plotly.figure_factory as ff
from plotly.offline import init_notebook_mode
from plotly.subplots import make_subplots

from statsmodels.stats.proportion import proportion_confint

from tennis_utils.player import TennisDataLoader, TennisPlayerDataLoader, TennisPlayer
from tennis_utils.scrapers import SackmanScraper


In [2]:

player_name = 'Roger Federer'
data_path = os.getcwd()+'/data'

tdl = TennisDataLoader(data_path+'/matches.parquet', data_path+'/players.parquet')
matches_df, players_df = tdl.matches, tdl.players

tpdl = TennisPlayerDataLoader(player_name, matches_df, players_df)

# Subset selected player matches data
player_matches = tpdl.player_matches #matches_df[matches_df['player_name'] == player_name]
player_details = tpdl.player_details #players_df[players_df['player_name']==player_name]
player_rank = tpdl.player_rank

tp = TennisPlayer(player_name, player_matches, player_rank, player_details)#, rounds=['F'])
m = tp.selected_matches     


In [9]:
%timeit 

m_by_year = (m.groupby('year')
     .agg(ace = ('ace', np.sum),
          df = ('df', np.sum),
          svpt = ('svpt', np.sum),
          firstIn = ('1stIn', np.sum),
          firstInStd = ('1stIn', np.std),
          firstWon = ('1stWon', np.sum),
          firstWonStd = ('1stWon', np.std),
          secondWon = ('2ndWon', np.sum),
          secondWonStd = ('2ndWon', np.std), 
          returnWon = ('returnWon', np.sum),    
          returnWonStd = ('returnWon', np.std),    
          returnPlayed = ('returnPlayed', np.sum), 
          bpConverted = ('bpConverted', np.sum),
          bpConvertedStd = ('bpConverted', np.std),
          bpTotal = ('bpTotal', np.sum),
          bpSaved = ('bpSaved', np.sum),
          bpSavedStd = ('bpSaved', np.std),
          bpFaced = ('bpFaced', np.sum),
          tbPlayed = ('tbPlayed', np.sum),
          tbWon = ('tbWon', np.sum),
          decidingSetPlayed = ('decidingSetPlayed', np.sum),
          decidingSetWon = ('decidingSetWon', np.sum))
     .assign(meanFirstIn = lambda x: x['firstIn']/x['svpt'])
     .assign(stdFirstIn = lambda x: x['firstInStd']/x['firstIn'])
     .assign(meanFirstWon = lambda x: x['firstWon']/x['firstIn'])
     .assign(stdFirstWon = lambda x: x['firstWonStd']/x['firstWon'])
     .assign(secondIn = lambda x: x['svpt'] - x['firstIn'])
     .assign(meanSecondWon = lambda x: x['secondWon']/(x['svpt'] - x['firstIn']))
     .assign(stdSecondWon = lambda x: x['secondWonStd']/x['secondWon'])
     .assign(meanReturnWon = lambda x: x['returnWon']/x['returnPlayed'])
     .assign(stdReturnWon = lambda x: x['returnWonStd']/x['returnWon'])
     .assign(meanBpConverted = lambda x: x['bpConverted']/x['bpTotal'])
     .assign(stdBpConverted = lambda x: x['bpConvertedStd']/x['bpConverted'])
     .assign(meanBpSaved = lambda x: x['bpSaved']/x['bpFaced'])
     .assign(stdBpSaved = lambda x: x['bpSavedStd']/x['bpSaved'])
     .assign(tbLost = lambda x: x['tbPlayed'] - x['tbWon'])
     .assign(percTbWon = lambda x: x['tbWon']/x['tbPlayed'])
     .assign(DecidingSetLost = lambda x: x['decidingSetPlayed'] - x['decidingSetWon'])
     .assign(percDecidingSetWon = lambda x: x['decidingSetWon']/x['decidingSetPlayed'])
     .reset_index().fillna(0.001)
)

In [13]:
success_cols = ['firstIn', 'firstWon', 'secondWon', 'returnWon', 'bpConverted', 'bpSaved', 'tbWon', 'decidingSetWon']
total_cols = ['svpt', 'firstIn', 'secondIn', 'returnPlayed', 'bpTotal', 'bpFaced', 'tbPlayed', 'decidingSetPlayed']
lower_df, upper_df = proportion_confint(
    m_by_year[success_cols], 
    m_by_year[total_cols]
    )

lower_df.columns = [f'lower_{c}' for c in success_cols]
upper_df.columns = [f'upper_{c}' for c in success_cols]

In [14]:
lower_df

Unnamed: 0,lower_firstIn,lower_firstWon,lower_secondWon,lower_returnWon,lower_bpConverted,lower_bpSaved,lower_tbWon,lower_decidingSetWon
0,0.571311,0.705812,0.502021,0.348499,0.27994,0.603436,0.293013,0.214891
1,0.582861,0.729512,0.505671,0.372459,0.328976,0.62793,0.434565,0.507606
2,0.586902,0.743521,0.515124,0.374249,0.339284,0.619881,0.388129,0.249173
3,0.582414,0.754729,0.541256,0.405758,0.372665,0.609669,0.448182,0.465047
4,0.603135,0.768403,0.555498,0.402053,0.370419,0.674554,0.643203,0.650024
5,0.61766,0.750097,0.566649,0.405083,0.408954,0.587091,0.576719,0.661168
6,0.616068,0.754997,0.570516,0.408936,0.394127,0.651639,0.603012,0.768522
7,0.605975,0.75892,0.569861,0.396587,0.375983,0.606153,0.608552,0.509545
8,0.629214,0.755946,0.558673,0.389686,0.346965,0.63113,0.494825,0.331968
9,0.612262,0.780571,0.549964,0.370678,0.375728,0.636715,0.555428,0.262583


In [23]:
under_pressure_overall = m[['bpFaced', 'bpSaved', 'bpConverted', 'bpTotal', 'tbPlayed', 'tbWon', 'decidingSetPlayed', 'decidingSetWon']].sum()
under_pressure_overall

bpFaced               6144.0
bpSaved               4150.0
bpConverted           4788.0
bpTotal              12043.0
tbPlayed               652.0
tbWon                  427.0
decidingSetPlayed      347.0
decidingSetWon         231.0
dtype: float64

In [92]:
def plot_under_pressure(tp):

    m = tp.selected_matches
    

    m_by_year = (m.groupby('year')
        .agg(ace = ('ace', np.sum),
            df = ('df', np.sum),
            svpt = ('svpt', np.sum),
            firstIn = ('1stIn', np.sum),
            firstInStd = ('1stIn', np.std),
            firstWon = ('1stWon', np.sum),
            firstWonStd = ('1stWon', np.std),
            secondWon = ('2ndWon', np.sum),
            secondWonStd = ('2ndWon', np.std), 
            returnWon = ('returnWon', np.sum),    
            returnWonStd = ('returnWon', np.std),    
            returnPlayed = ('returnPlayed', np.sum), 
            bpConverted = ('bpConverted', np.sum),
            bpConvertedStd = ('bpConverted', np.std),
            bpTotal = ('bpTotal', np.sum),
            bpSaved = ('bpSaved', np.sum),
            bpSavedStd = ('bpSaved', np.std),
            bpFaced = ('bpFaced', np.sum),
            tbPlayed = ('tbPlayed', np.sum),
            tbWon = ('tbWon', np.sum),
            decidingSetPlayed = ('decidingSetPlayed', np.sum),
            decidingSetWon = ('decidingSetWon', np.sum))
        .assign(meanFirstIn = lambda x: x['firstIn']/x['svpt'])
        .assign(stdFirstIn = lambda x: x['firstInStd']/x['firstIn'])
        .assign(meanFirstWon = lambda x: x['firstWon']/x['firstIn'])
        .assign(stdFirstWon = lambda x: x['firstWonStd']/x['firstWon'])
        .assign(meanSecondWon = lambda x: x['secondWon']/(x['svpt'] - x['firstIn']))
        .assign(stdSecondWon = lambda x: x['secondWonStd']/x['secondWon'])
        .assign(meanReturnWon = lambda x: x['returnWon']/x['returnPlayed'])
        .assign(stdReturnWon = lambda x: x['returnWonStd']/x['returnWon'])
        .assign(meanBpConverted = lambda x: x['bpConverted']/x['bpTotal'])
        .assign(stdBpConverted = lambda x: x['bpConvertedStd']/x['bpConverted'])
        .assign(meanBpSaved = lambda x: x['bpSaved']/x['bpFaced'])
        .assign(stdBpSaved = lambda x: x['bpSavedStd']/x['bpSaved'])
        .assign(tbLost = lambda x: x['tbPlayed'] - x['tbWon'])
        .assign(percTbWon = lambda x: x['tbWon']/x['tbPlayed'])
        .assign(DecidingSetLost = lambda x: x['decidingSetPlayed'] - x['decidingSetWon'])
        .assign(percDecidingSetWon = lambda x: x['decidingSetWon']/x['decidingSetPlayed'])
        .reset_index().fillna(0.001)
    )
       

    up_overall = m[['bpFaced', 'bpSaved', 'bpConverted', 'bpTotal', 'tbPlayed', 'tbWon', 'decidingSetPlayed', 'decidingSetWon']].sum()

    colors = [
        'rgb(33,113,181)',
        'rgb(217,71,1)',
        'rgb(81, 178, 124)',
        'rgb(235, 127, 134)'
        ]


    fig = make_subplots(
        cols=3, rows=4,
        specs=[[{'colspan': 2}, None, {'type': 'pie'}],
                [{'type': 'pie'}, {'colspan': 2}, None],
                [{'colspan': 2, 'secondary_y': True}, None, {'type': 'pie'}],
                [{'type': 'pie'}, {'colspan': 2, 'secondary_y': True}, None]],
        shared_xaxes=True,
        subplot_titles=[
                'Percentage BreakPoint Converted by Year',  'Perc. BreakPoint Converted Overall',
                'Perc. BreakPoint Saved Overall',  'Percentage BreakPoint Saved by Year',
                'TieBreak Won by Year',  'TieBreak Won Overall',
                'Deciding Sets Won Overall',  'Deciding Sets Won by Year'
                ],
        vertical_spacing=0.1,
        horizontal_spacing=0.05
    )
        

    x = up_by_year['year']
    
    # Break Point Converted
    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['meanBpConverted'] + 2* up_by_year['stdBpConverted'],
            name='Upper Band',
            fill=None,
            mode='lines',
            line=dict(color='darksalmon', width=1)
        ),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['meanBpConverted'] - 2* up_by_year['stdBpConverted'],
            name='Lower Band',
            fill='tonexty', # fill area between trace0 and trace1
            mode='lines',
            line=dict(color='darksalmon', width=1)
        ),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['meanBpConverted'],
            name='Mean Perc BP Converted',
            textposition='top center',
            mode='lines+markers',
            connectgaps=True,
            marker={'color': colors[0]},
        ),
        row=1, col=1
    )

    fig.add_trace(
        go.Pie(
            labels=['BP Converted', 'BP Not Converted'],
            values=[up_overall['bpConverted'], up_overall['bpTotal'] - up_overall['bpConverted']],
            marker={'colors': ['seagreen', 'indianred'],
                        'line': {'color':'white', 'width':1}}
        ),
        row=1, col=3
    )


    # Break Point Saved
    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['meanBpSaved'] + 2* up_by_year['stdBpSaved'],
            name='Upper Band',
            fill=None,
            mode='lines',
            line=dict(color='darksalmon', width=1)
        ),
        row=2, col=2
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['meanBpSaved'] - 2* up_by_year['stdBpSaved'],
            name='Lower Band',
            fill='tonexty', # fill area between trace0 and trace1
            mode='lines',
            line=dict(color='darksalmon', width=1)
        ),
        row=2, col=2
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['meanBpSaved'],
            name='Mean Perc BP Saved',
            textposition='top center',
            mode='lines+markers',
            connectgaps=True,
            marker={'color': colors[1]},
        ),
        row=2, col=2
    )
    
    fig.add_trace(
        go.Pie(
            labels=['BP Saved', 'BP Lost'],
            values=[up_overall['bpSaved'], up_overall['bpFaced'] - up_overall['bpSaved']],
            marker={'colors': ['seagreen', 'indianred'],
                        'line': {'color':'white', 'width':1}}
        ),
        row=2, col=1
    )
    
    
    
    # Tie Break
    fig.add_trace(
        go.Bar(
            x=x, y=up_by_year['tbWon'],
            name='TieBreak Won',
            marker={'color': 'seagreen'},
            text=up_by_year['tbWon'],
            textposition='inside',
            textfont_size=8,
            opacity=0.8
        ),
        row=3, col=1,
        secondary_y=False
    )

    fig.add_trace(
        go.Bar(
            x=x, y=up_by_year['tbLost'],
            name='TieBreak Lost',
            marker={'color': 'indianred'},
            text=up_by_year['tbLost'],
            textposition='inside',
            textfont_size=8,
            opacity=0.8
        ),
        row=3, col=1,
        secondary_y=False
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['percTbWon'],
            name='Win Rate',
            line={'color':'midnightblue', 'width':2},
            mode='lines+text',
            text=up_by_year['percTbWon'].round(2),
            textposition='top center',
            textfont_size=8
        ),
        row=3, col=1,
        secondary_y=True
    )
   
    fig.add_trace(
        go.Pie(
            labels=['TB Won', 'TP Lost'],
            values=[up_overall['tbWon'], up_overall['tbPlayed'] - up_overall['tbWon']],
            marker={'colors': ['seagreen', 'indianred'],
                        'line': {'color':'white', 'width':1}}
        ),
        row=3, col=3
    )
    
    
    
    # Deciding Set
    fig.add_trace(
        go.Bar(
            x=x, y=up_by_year['decidingSetWon'],
            name='Deciding Sets Won',
            marker={'color': 'seagreen'},
            text=up_by_year['tbWon'],
            textposition='inside',
            textfont_size=8,
            opacity=0.8
        ),
        row=4, col=2,
        secondary_y=False
    )

    fig.add_trace(
        go.Bar(
            x=x, y=up_by_year['DecidingSetLost'],
            name='Deciding Sets Lost',
            marker={'color': 'indianred'},
            text=up_by_year['tbLost'],
            textposition='inside',
            textfont_size=8,
            opacity=0.8
        ),
        row=4, col=2,
        secondary_y=False
    )
    
    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['percDecidingSetWon'],
            name='Win Rate',
            line={'color':'midnightblue', 'width':2},
            mode='lines+text',
            text=up_by_year['percTbWon'].round(2),
            textposition='top center',
            textfont_size=8
        ),
        row=4, col=2,
        secondary_y=True
    )
   
    fig.add_trace(
        go.Pie(
            labels=['Deciding Set Won', 'Deciding Set Lost'],
            values=[up_overall['decidingSetWon'], up_overall['decidingSetPlayed'] - up_overall['decidingSetWon']],
            marker={'colors': ['seagreen', 'indianred'],
                        'line': {'color':'white', 'width':1}}
        ),
        row=4, col=1
    )
    

    
    
    # Layout
    fig.update_layout(
        barmode='stack',
            height = 1000, width=1000, 
            xaxis1={'title': 'Year'}, xaxis2={'title': 'Year'},
            xaxis3={'title': 'Year'}, xaxis4={'title': 'Year'},
            yaxis={'title': 'Percentage'}, 
            yaxis2={'title': 'Percentage', 'side':'right'},
            yaxis3={'title': 'Num. Tb'}, yaxis4={'title': 'Percentage', 'side':'right'},
            yaxis5={'title': 'Num. Sets'}, yaxis6={'title': 'Percentage', 'side':'right'},
            showlegend=False
        )
    return fig

plot_under_pressure(tp)