In [17]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np

pd.set_option('plotting.backend', 'plotly')
from datetime import date, datetime as dt
import os, re
import plotly.colors
import plotly.graph_objects as go, plotly.express as px, plotly.figure_factory as ff
from plotly.offline import init_notebook_mode
from plotly.subplots import make_subplots


from tennis_utils.player import TennisDataLoader, TennisPlayerDataLoader, TennisPlayer
from tennis_utils.scrapers import SackmanScraper


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
# data_path = os.getcwd() + '/data'

# tdl = TennisDataLoader(data_path + '/matches.parquet', data_path + '/players.parquet')
# print(tdl.matches['round'].unique())


# s = SackmanScraper()
# print(s)

# s.save_players(data_path + '/players.parquet')
# s.save_matches(data_path + '/raw_matches.parquet')
# s.save_tournaments(data_path + '/tournaments.parquet')
# s.save_wl_matches(data_path + '/matches.parquet')


In [19]:

player_name = 'Roger Federer'
data_path = os.getcwd()+'/data'

tdl = TennisDataLoader(data_path+'/matches.parquet', data_path+'/players.parquet')
matches_df, players_df = tdl.matches, tdl.players

tpdl = TennisPlayerDataLoader(player_name, matches_df, players_df)

# Subset selected player matches data
player_matches = tpdl.player_matches #matches_df[matches_df['player_name'] == player_name]
player_details = tpdl.player_details #players_df[players_df['player_name']==player_name]
player_rank = tpdl.player_rank

tp = TennisPlayer(player_name, player_matches, player_rank, player_details)#, rounds=['F'])

In [20]:
m = tp.selected_matches
m.columns

Index(['tourney_id', 'tourney_name', 'surface', 'draw_size', 'tourney_level',
       'tourney_date', 'match_num', 'id', 'seed', 'entry', 'player_name',
       'hand', 'ht', 'ioc', 'age', 'rank', 'rank_points', 'opponent_id',
       'opponent_name', 'opponent_rank', 'score', 'best_of', 'round',
       'minutes', 'ace', 'df', 'svpt', '1stIn', '1stWon', '2ndWon', 'SvGms',
       'bpSaved', 'bpFaced', 'percAce', 'percDf', 'perc1stIn', 'perc1stWon',
       'perc2ndWon', 'percBpSaved', 'SvLost', 'bpTotal', 'bpConverted',
       'percBpConverted', 'percSvLost', 'returnWon', 'percReturnWon',
       'percServePointsWon', 'tbPlayed', 'tbWon', 'decidingSetPlayed',
       'winner', 'result', 'year', 'decidingSetWon'],
      dtype='object')

In [None]:
m_by_year = (m.groupby('year')
     .agg(ace = ('ace', np.sum),
          df = ('df', np.sum),
          svpt = ('svpt', np.sum),
          firstIn = ('1stIn', np.sum),
          firstInStd = ('1stIn', np.std),
          firstWon = ('1stWon', np.sum),
          firstWonStd = ('1stWon', np.std),
          secondWon = ('2ndWon', np.sum),
          secondWonStd = ('2ndWon', np.std), 
          returnWon = ('returnWon', np.sum),    
          returnWonStd = ('returnWon', np.std),    
          returnPlayed = ('returnPlayed', np.sum), 
          bpConverted = ('bpConverted', np.sum),
          bpConvertedStd = ('bpConverted', np.std),
          bpTotal = ('bpTotal', np.sum),
          bpSaved = ('bpSaved', np.sum),
          bpSavedStd = ('bpSaved', np.std),
          bpFaced = ('bpFaced', np.sum),
          tbPlayed = ('tbPlayed', np.sum),
          tbWon = ('tbWon', np.sum),
          decidingSetPlayed = ('decidingSetPlayed', np.sum),
          decidingSetWon = ('decidingSetWon', np.sum))
     .assign(meanFirstIn = lambda x: x['firstIn']/x['svpt'])
     .assign(stdFirstIn = lambda x: x['firstInStd']/x['firstIn'])
     .assign(meanFirstWon = lambda x: x['firstWon']/x['firstIn'])
     .assign(stdFirstWon = lambda x: x['firstWonStd']/x['firstWon'])
     .assign(meanSecondWon = lambda x: x['secondWon']/(x['svpt'] - x['firstIn']))
     .assign(stdSecondWon = lambda x: x['secondWonStd']/x['secondWon'])
     .assign(meanReturnWon = lambda x: x['returnWon']/x['returnPlayed'])
     .assign(stdReturnWon = lambda x: x['returnWonStd']/x['returnWon'])
     .assign(meanBpConverted = lambda x: x['bpConverted']/x['bpTotal'])
     .assign(stdBpConverted = lambda x: x['bpConvertedStd']/x['bpConverted'])
     .assign(meanBpSaved = lambda x: x['bpSaved']/x['bpFaced'])
     .assign(stdBpSaved = lambda x: x['bpSavedStd']/x['bpSaved'])
     .assign(tbLost = lambda x: x['tbPlayed'] - x['tbWon'])
     .assign(percTbWon = lambda x: x['tbWon']/x['tbPlayed'])
     .assign(DecidingSetLost = lambda x: x['decidingSetPlayed'] - x['decidingSetWon'])
     .assign(percDecidingSetWon = lambda x: x['decidingSetWon']/x['decidingSetPlayed'])
     .reset_index()
     )

In [77]:
up_by_year = (m.groupby('year')
     .agg(bpConverted = ('bpConverted', np.sum),
          bpConvertedStd = ('bpConverted', np.std),
          bpTotal = ('bpTotal', np.sum),
          bpSaved = ('bpSaved', np.sum),
          bpSavedStd = ('bpSaved', np.std),
          bpFaced = ('bpFaced', np.sum),
          tbPlayed = ('tbPlayed', np.sum),
          tbWon = ('tbWon', np.sum),
          decidingSetPlayed = ('decidingSetPlayed', np.sum),
          decidingSetWon = ('decidingSetWon', np.sum))
     .assign(meanBpConverted = lambda x: x['bpConverted']/x['bpTotal'])
     .assign(stdBpConverted = lambda x: x['bpConvertedStd']/x['bpConverted'])
     .assign(meanBpSaved = lambda x: x['bpSaved']/x['bpFaced'])
     .assign(stdBpSaved = lambda x: x['bpSavedStd']/x['bpSaved'])
     .assign(tbLost = lambda x: x['tbPlayed'] - x['tbWon'])
     .assign(percTbWon = lambda x: x['tbWon']/x['tbPlayed'])
     .assign(DecidingSetLost = lambda x: x['decidingSetPlayed'] - x['decidingSetWon'])
     .assign(percDecidingSetWon = lambda x: x['decidingSetWon']/x['decidingSetPlayed'])
     .reset_index()
     )
up_by_year

Unnamed: 0,year,bpConverted,bpConvertedStd,bpTotal,bpSaved,bpSavedStd,bpFaced,tbPlayed,tbWon,decidingSetPlayed,decidingSetWon,meanBpConverted,stdBpConverted,meanBpSaved,stdBpSaved,tbLost,percTbWon,DecidingSetLost,percDecidingSetWon
0,2000,140,1.559191,432,245,3.494715,376,27,13,18,8.0,0.324074,0.011137,0.651596,0.014264,14,0.481481,10.0,0.444444
1,2001,208,1.799508,564,306,4.029264,456,41,24,23,16.0,0.368794,0.008651,0.671053,0.013168,17,0.585366,7.0,0.695652
2,2002,229,1.839959,606,260,3.035148,390,28,16,19,9.0,0.377888,0.008035,0.666667,0.011674,12,0.571429,10.0,0.473684
3,2003,293,1.579424,717,246,2.968975,374,40,24,21,14.0,0.408647,0.005391,0.657754,0.012069,16,0.6,7.0,0.666667
4,2004,270,1.883108,662,207,2.942274,285,25,20,13,11.0,0.407855,0.006974,0.726316,0.014214,5,0.8,2.0,0.846154
5,2005,324,1.537819,728,209,2.181489,327,39,28,18,15.0,0.445055,0.004746,0.639144,0.010438,11,0.717949,3.0,0.833333
6,2006,376,1.761985,881,278,2.682656,399,51,37,20,18.0,0.426788,0.004686,0.696742,0.00965,14,0.72549,2.0,0.9
7,2007,269,1.87511,650,203,2.735025,308,36,27,15,11.0,0.413846,0.006971,0.659091,0.013473,9,0.75,4.0,0.733333
8,2008,271,1.803609,708,238,3.085183,350,37,24,20,11.0,0.382768,0.006655,0.68,0.012963,13,0.648649,9.0,0.55
9,2009,240,1.799732,577,220,2.82921,320,37,26,21,10.0,0.415945,0.007499,0.6875,0.01286,11,0.702703,11.0,0.47619


In [23]:
under_pressure_overall = m[['bpFaced', 'bpSaved', 'bpConverted', 'bpTotal', 'tbPlayed', 'tbWon', 'decidingSetPlayed', 'decidingSetWon']].sum()
under_pressure_overall

bpFaced               6144.0
bpSaved               4150.0
bpConverted           4788.0
bpTotal              12043.0
tbPlayed               652.0
tbWon                  427.0
decidingSetPlayed      347.0
decidingSetWon         231.0
dtype: float64

In [83]:
def plot_under_pressure(tp):

    m = tp.selected_matches
    
    up_by_year = (m.groupby('year')
        .agg(bpConverted = ('bpConverted', np.sum),
             bpConvertedStd = ('bpConverted', np.std),
             bpTotal = ('bpTotal', np.sum),
             bpSaved = ('bpSaved', np.sum),
             bpSavedStd = ('bpSaved', np.std),
             bpFaced = ('bpFaced', np.sum),
             tbPlayed = ('tbPlayed', np.sum),
             tbWon = ('tbWon', np.sum),
             decidingSetPlayed = ('decidingSetPlayed', np.sum),
             decidingSetWon = ('decidingSetWon', np.sum))
        .assign(meanBpConverted = lambda x: x['bpConverted']/x['bpTotal'])
        .assign(stdBpConverted = lambda x: x['bpConvertedStd']/x['bpConverted'])
        .assign(meanBpSaved = lambda x: x['bpSaved']/x['bpFaced'])
        .assign(stdBpSaved = lambda x: x['bpSavedStd']/x['bpSaved'])
        .assign(tbLost = lambda x: x['tbPlayed'] - x['tbWon'])
        .assign(percTbWon = lambda x: x['tbWon']/x['tbPlayed'])
        .assign(DecidingSetLost = lambda x: x['decidingSetPlayed'] - x['decidingSetWon'])
        .assign(percDecidingSetWon = lambda x: x['decidingSetWon']/x['decidingSetPlayed'])
        .reset_index()
    )

    up_overall = m[['bpFaced', 'bpSaved', 'bpConverted', 'bpTotal', 'tbPlayed', 'tbWon', 'decidingSetPlayed', 'decidingSetWon']].sum()

    colors = [
        'rgb(33,113,181)',
        'rgb(217,71,1)',
        'rgb(81, 178, 124)',
        'rgb(235, 127, 134)'
        ]


    fig = make_subplots(
        cols=3, rows=4,
        specs=[[{'colspan': 2}, None, {'type': 'pie'}],
                [{'type': 'pie'}, {'colspan': 2}, None],
                [{'colspan': 2, 'secondary_y': True}, None, {'type': 'pie'}],
                [{'type': 'pie'}, {'colspan': 2, 'secondary_y': True}, None]],
        shared_xaxes=True,
        subplot_titles=[
                'Percentage BreakPoint Converted by Year',  'Perc. BreakPoint Converted Overall',
                'Perc. BreakPoint Saved Overall',  'Percentage BreakPoint Saved by Year',
                'TieBreak Won by Year',  'TieBreak Won Overall',
                'Deciding Sets Won Overall',  'Deciding Sets Won by Year'
                ],
        vertical_spacing=0.1,
        horizontal_spacing=0.05
    )
        

    x = up_by_year['year']
    
    # Break Point Converted
    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['meanBpConverted'] + 2* up_by_year['stdBpConverted'],
            name='Upper Band',
            fill=None,
            mode='lines',
            line=dict(color='darksalmon', width=1)
        ),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['meanBpConverted'] - 2* up_by_year['stdBpConverted'],
            name='Lower Band',
            fill='tonexty', # fill area between trace0 and trace1
            mode='lines',
            line=dict(color='darksalmon', width=1)
        ),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['meanBpConverted'],
            name='Mean Perc BP Converted',
            textposition='top center',
            mode='lines+markers',
            connectgaps=True,
            marker={'color': colors[0]},
        ),
        row=1, col=1
    )

    fig.add_trace(
        go.Pie(
            labels=['BP Converted', 'BP Not Converted'],
            values=[up_overall['bpConverted'], up_overall['bpTotal'] - up_overall['bpConverted']],
            marker={'colors': ['seagreen', 'indianred'],
                        'line': {'color':'white', 'width':1}}
        ),
        row=1, col=3
    )


    # Break Point Saved
    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['meanBpSaved'] + 2* up_by_year['stdBpSaved'],
            name='Upper Band',
            fill=None,
            mode='lines',
            line=dict(color='darksalmon', width=1)
        ),
        row=2, col=2
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['meanBpSaved'] - 2* up_by_year['stdBpSaved'],
            name='Lower Band',
            fill='tonexty', # fill area between trace0 and trace1
            mode='lines',
            line=dict(color='darksalmon', width=1)
        ),
        row=2, col=2
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['meanBpSaved'],
            name='Mean Perc BP Saved',
            textposition='top center',
            mode='lines+markers',
            connectgaps=True,
            marker={'color': colors[1]},
        ),
        row=2, col=2
    )
    
    fig.add_trace(
        go.Pie(
            labels=['BP Saved', 'BP Lost'],
            values=[up_overall['bpSaved'], up_overall['bpFaced'] - up_overall['bpSaved']],
            marker={'colors': ['seagreen', 'indianred'],
                        'line': {'color':'white', 'width':1}}
        ),
        row=2, col=1
    )
    
    
    
    # Tie Break
    fig.add_trace(
        go.Bar(
            x=x, y=up_by_year['tbWon'],
            name='TieBreak Won',
            marker={'color': 'seagreen'},
            text=up_by_year['tbWon'],
            textposition='inside',
            textfont_size=8,
            opacity=0.8
        ),
        row=3, col=1,
        secondary_y=False
    )

    fig.add_trace(
        go.Bar(
            x=x, y=up_by_year['tbLost'],
            name='TieBreak Lost',
            marker={'color': 'indianred'},
            text=up_by_year['tbLost'],
            textposition='inside',
            textfont_size=8,
            opacity=0.8
        ),
        row=3, col=1,
        secondary_y=False
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['percTbWon'],
            name='Win Rate',
            line={'color':'midnightblue', 'width':2},
            mode='lines+text',
            text=up_by_year['percTbWon'].round(2),
            textposition='top center',
            textfont_size=8
        ),
        row=3, col=1,
        secondary_y=True
    )
   
    fig.add_trace(
        go.Pie(
            labels=['TB Won', 'TP Lost'],
            values=[up_overall['tbWon'], up_overall['tbPlayed'] - up_overall['tbWon']],
            marker={'colors': ['seagreen', 'indianred'],
                        'line': {'color':'white', 'width':1}}
        ),
        row=3, col=3
    )
    
    
    
    # Deciding Set
    fig.add_trace(
        go.Bar(
            x=x, y=up_by_year['decidingSetWon'],
            name='Deciding Sets Won',
            marker={'color': 'seagreen'},
            text=up_by_year['tbWon'],
            textposition='inside',
            textfont_size=8,
            opacity=0.8
        ),
        row=4, col=2,
        secondary_y=False
    )

    fig.add_trace(
        go.Bar(
            x=x, y=up_by_year['DecidingSetLost'],
            name='Deciding Sets Lost',
            marker={'color': 'indianred'},
            text=up_by_year['tbLost'],
            textposition='inside',
            textfont_size=8,
            opacity=0.8
        ),
        row=4, col=2,
        secondary_y=False
    )
    
    fig.add_trace(
        go.Scatter(
            x=x, y=up_by_year['percDecidingSetWon'],
            name='Win Rate',
            line={'color':'midnightblue', 'width':2},
            mode='lines+text',
            text=up_by_year['percTbWon'].round(2),
            textposition='top center',
            textfont_size=8
        ),
        row=4, col=2,
        secondary_y=True
    )
   
    fig.add_trace(
        go.Pie(
            labels=['Deciding Set Won', 'Deciding Set Lost'],
            values=[up_overall['decidingSetWon'], up_overall['decidingSetPlayed'] - up_overall['decidingSetWon']],
            marker={'colors': ['seagreen', 'indianred'],
                        'line': {'color':'white', 'width':1}}
        ),
        row=4, col=1
    )
    

    
    
    # Layout
    fig.update_layout(
        barmode='stack',
            height = 1000, width=1000, 
            xaxis1={'title': 'Year'}, xaxis2={'title': 'Year'},
            xaxis3={'title': 'Year'}, xaxis4={'title': 'Year'},
            yaxis={'title': 'Percentage'}, 
            yaxis2={'title': 'Percentage', 'side':'right'},
            yaxis3={'title': 'Num. Tb'}, yaxis4={'title': 'Percentage', 'side':'right'},
            yaxis5={'title': 'Num. Sets'}, yaxis6={'title': 'Percentage', 'side':'right'},
            showlegend=False
        )
    return fig

plot_under_pressure(tp)

In [64]:
fig = make_subplots(
    cols=3, rows=2,
    specs=[[{'colspan':2}, None, {'type': 'pie'}], 
           [{}, {'colspan':2}, None], 
          ],
    vertical_spacing=0.05,
    horizontal_spacing=0.05
)
        

fig.add_trace(
    go.Scatter(
        x=t.index,
        y=t['percTbWon']
    ),
    row=1, col=1
)

fig.add_trace(
    go.Pie(
        labels = t1.index,
        values=t1
    ),
    row=1, col=3
)

fig

In [50]:
?go.Pie

[1;31mInit signature:[0m
[0mgo[0m[1;33m.[0m[0mPie[0m[1;33m([0m[1;33m
[0m    [0marg[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mautomargin[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mcustomdata[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mcustomdatasrc[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mdirection[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mdlabel[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mdomain[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mhole[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mhoverinfo[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mhoverinfosrc[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mhoverlabel[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mhovertemplate[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mhovertemplatesrc[0m[1;33m=[0m[1;32mNone[0m