In [23]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np

pd.set_option('plotting.backend', 'plotly')
from datetime import date, datetime as dt
import os, re
import plotly.colors
import plotly.graph_objects as go, plotly.express as px, plotly.figure_factory as ff
from plotly.offline import init_notebook_mode
from plotly.subplots import make_subplots

from statsmodels.stats.proportion import proportion_confint

from tennis_utils.player import TennisDataLoader, TennisPlayerDataLoader, TennisPlayer
from tennis_utils.scrapers import SackmanScraper


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:

player_name = 'Roger Federer'
data_path = os.getcwd()+'/data'

tdl = TennisDataLoader(data_path+'/matches.parquet', data_path+'/players.parquet')
matches_df, players_df = tdl.matches, tdl.players

tpdl = TennisPlayerDataLoader(player_name, matches_df, players_df)

# Subset selected player matches data
player_matches = tpdl.player_matches #matches_df[matches_df['player_name'] == player_name]
player_details = tpdl.player_details #players_df[players_df['player_name']==player_name]
player_rank = tpdl.player_rank

tp = TennisPlayer(player_name, player_matches, player_rank, player_details)#, rounds=['F'])
m = tp.selected_matches     


In [25]:
"""
https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
https://www.statsmodels.org/dev/generated/statsmodels.stats.proportion.proportion_confint.html
https://www.ucl.ac.uk/english-usage/staff/sean/resources/binomialpoisson.pdf
"""

'\nhttps://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval\nhttps://www.statsmodels.org/dev/generated/statsmodels.stats.proportion.proportion_confint.html\nhttps://www.ucl.ac.uk/english-usage/staff/sean/resources/binomialpoisson.pdf\n'

In [31]:
stats_by_year = (m.groupby('year')
            .agg(
                matches_played=('winner', np.size),
                matches_won=('winner', np.sum),
                ace = ('ace', np.sum),
                df = ('df', np.sum),
                svpt = ('svpt', np.sum),
                firstIn = ('firstIn', np.sum),
                firstWon = ('firstWon', np.sum),
                secondIn = ('secondIn', np.sum),
                secondWon = ('secondWon', np.sum),
                returnWon = ('returnWon', np.sum),    
                returnPlayed = ('returnPlayed', np.sum), 
                bpConverted = ('bpConverted', np.sum),
                bpTotal = ('bpTotal', np.sum),
                bpSaved = ('bpSaved', np.sum),
                bpFaced = ('bpFaced', np.sum),
                tbPlayed = ('tbPlayed', np.sum),
                tbWon = ('tbWon', np.sum),
                decidingSetPlayed = ('decidingSetPlayed', np.sum),
                decidingSetWon = ('decidingSetWon', np.sum))
            .assign(matches_lost = lambda x: x['matches_played'] - x['matches_won'])
            .assign(win_rate = lambda x: x['matches_won']/x['matches_played'])
            .assign(perc_ace = lambda x: x['ace']/x['svpt'])
            .assign(perc_df = lambda x: x['df']/x['svpt'])
            .assign(perc_firstIn = lambda x: x['firstIn']/x['svpt'])
            .assign(perc_firstWon = lambda x: x['firstWon']/x['firstIn'])
            .assign(perc_secondWon = lambda x: x['secondWon']/x['secondIn'])
            .assign(perc_returnWon = lambda x: x['returnWon']/x['returnPlayed'])
            .assign(perc_bpConverted = lambda x: x['bpConverted']/x['bpTotal'])
            .assign(perc_bpSaved = lambda x: x['bpSaved']/x['bpFaced'])
            .assign(tbLost = lambda x: x['tbPlayed'] - x['tbWon'])
            .assign(perc_tbWon = lambda x: x['tbWon']/x['tbPlayed'])
            .assign(decidingSetLost = lambda x: x['decidingSetPlayed'] - x['decidingSetWon'])
            .assign(perc_decidingSetWon = lambda x: x['decidingSetWon']/x['decidingSetPlayed'])
            .reset_index()
        )

In [32]:
success_cols = ['ace', 'df', 'firstIn', 'firstWon', 'secondWon', 'returnWon', 'bpConverted', 'bpSaved', 'tbWon', 'decidingSetWon']
total_cols = ['svpt', 'svpt', 'svpt', 'firstIn', 'secondIn', 'returnPlayed', 'bpTotal', 'bpFaced', 'tbPlayed', 'decidingSetPlayed']

lower_df, upper_df = proportion_confint(
    stats_by_year[success_cols], 
    stats_by_year[total_cols],
    method='wilson'
    )

lower_df.columns = [f'lower_{c}' for c in success_cols]
upper_df.columns = [f'upper_{c}' for c in success_cols]

In [62]:
m['decidingSetWon']

1165      NaN
1194      NaN
6         NaN
5191      NaN
5267      NaN
         ... 
261978    NaN
262056    1.0
262085    NaN
262095    1.0
262100    NaN
Name: decidingSetWon, Length: 1415, dtype: float64

In [63]:
success_overall = pd.Series(m[success_cols].fillna(0).to_numpy().sum(axis=0), index=[f'success_{c}' for c in success_cols]) 
total_overall = pd.Series(m[total_cols].to_numpy().sum(axis=0), index=[f'total_{c}' for c in success_cols])

In [64]:
success_overall, total_overall

(success_ace               11162.0
 success_df                 2622.0
 success_firstIn           68475.0
 success_firstWon          53054.0
 success_secondWon         23738.0
 success_returnWon         47099.0
 success_bpConverted        4788.0
 success_bpSaved            4150.0
 success_tbWon               427.0
 success_decidingSetWon      231.0
 dtype: float64,
 total_ace               110107
 total_df                110107
 total_firstIn           110107
 total_firstWon           68475
 total_secondWon          39010
 total_returnWon         118206
 total_bpConverted        12043
 total_bpSaved             6144
 total_tbWon                652
 total_decidingSetWon       347
 dtype: int64)

In [65]:
n_rows = 4
specs = [[{'colspan': 2}, None, {'type': 'pie'}] if i%2==0 else [{'type': 'pie'}, {'colspan': 2}, None] 
             for i in range(n_rows)]

specs

[[{'colspan': 2}, None, {'type': 'pie'}],
 [{'type': 'pie'}, {'colspan': 2}, None],
 [{'colspan': 2}, None, {'type': 'pie'}],
 [{'type': 'pie'}, {'colspan': 2}, None]]

In [66]:
def plot_ts_and_pie(success_overall, 
                    total_overall,
                    stats_by_year, 
                    upper_df, 
                    lower_df, 
                    columns, 
                    colors = ['rgb(33,113,181)', 'rgb(217,71,1)', 'rgb(81, 178, 124)', 'rgb(235, 127, 134)']
                    ):

    n_cols, n_rows = 3, len(columns)

    specs = [[{'colspan': 2}, None, {'type': 'pie'}] if i%2==0 else [{'type': 'pie'}, {'colspan': 2}, None] 
             for i in range(n_rows)]

    subplot_titles=[[f'Percentage {c} and 95% CI by year', f'Percentage {c} overall'] if i%2==0 else 
                    [f'Percentage {c} overall', f'Percentage {c} and 95% CI by year'] for i, c in enumerate(columns)]


    fig = make_subplots(
            cols=n_cols,
            rows=n_rows,
            specs=specs,
            shared_xaxes=True,
            vertical_spacing=0.1,
            horizontal_spacing=0.05,
            subplot_titles=sum(subplot_titles, [])
    )


    x = stats_by_year['year']
    
    for i, col in enumerate(columns):

        fig.add_trace(
            go.Scatter(
                x=x, y=upper_df[f'upper_{col}'],
                fill=None,
                mode='lines',
                line=dict(color='darksalmon', width=1)
            ),
            row=i+1, col=1 if i%2==0 else 2
        )

        fig.add_trace(
            go.Scatter(
                x=x, y=lower_df[f'lower_{col}'],
                fill='tonexty', # fill area between trace0 and trace1
                mode='lines',
                line=dict(color='darksalmon', width=1)
            ),
            row=i+1, col=1 if i%2==0 else 2
        )

        fig.add_trace(
            go.Scatter(
                x=x, y=stats_by_year[f'perc_{col}'],
                textposition='top center',
                mode='lines+markers',
                connectgaps=True,
                marker={'color': colors[i]},
            ),
            row=i+1, col=1 if i%2==0 else 2
        )

        fig.add_trace(
            go.Pie(
                values=[success_overall[f'success_{col}'], total_overall[f'total_{col}'] - success_overall[f'success_{col}']],
                marker={'colors': ['seagreen', 'indianred'],
                            'line': {'color':'white', 'width':1}}
            ),
            row=i+1, col=3 if i%2==0 else 1
        )

    # Layout
    fig.update_layout(
        barmode='stack',
            height = 450*n_rows, width=1000, 
            xaxis1={'title': 'Year'}, xaxis2={'title': 'Year'},
            xaxis3={'title': 'Year'}, xaxis4={'title': 'Year'},
            yaxis={'title': 'Percentage'}, 
            yaxis2={'title': 'Percentage', 'side':'right'},
            yaxis3={'title': 'Num. Tb'}, yaxis4={'title': 'Percentage', 'side':'right'},
            yaxis5={'title': 'Num. Sets'}, yaxis6={'title': 'Percentage', 'side':'right'},
            showlegend=False
        )
    return fig

plot_ts_and_pie(success_overall, 
                total_overall, 
                stats_by_year, 
                upper_df, 
                lower_df, 
                columns=['firstIn', 'firstWon', 'secondWon', 'returnWon', 'ace', 'df'], 
                colors=['rgb(33,113,181)', 'rgb(217,71,1)', 'rgb(81, 178, 124)', 'rgb(235, 127, 134)', 'rgb(33,113,181)', 'rgb(217,71,1)',]
                )

In [67]:
plot_ts_and_pie(success_overall, 
                total_overall, 
                stats_by_year, 
                upper_df, 
                lower_df, 
                columns=['bpSaved', 'bpConverted', 'tbWon', 'decidingSetWon'], 
                colors=['rgb(33,113,181)', 'rgb(217,71,1)', 'rgb(81, 178, 124)', 'rgb(235, 127, 134)', 'rgb(33,113,181)', 'rgb(217,71,1)',]
                )

In [68]:
def plot_under_pressure(stats_by_year, upper_df, lower_df, stats_overall):

    colors = [
        'rgb(33,113,181)',
        'rgb(217,71,1)',
        'rgb(81, 178, 124)',
        'rgb(235, 127, 134)'
        ]


    fig = make_subplots(
        cols=3, rows=4,
        specs=[[{'colspan': 2}, None, {'type': 'pie'}],
                [{'type': 'pie'}, {'colspan': 2}, None],
                [{'colspan': 2, 'secondary_y': True}, None, {'type': 'pie'}],
                [{'type': 'pie'}, {'colspan': 2, 'secondary_y': True}, None]],
        
        subplot_titles=[
                'Percentage BreakPoint Converted by Year',  'Perc. BreakPoint Converted Overall',
                'Perc. BreakPoint Saved Overall',  'Percentage BreakPoint Saved by Year',
                'TieBreak Won by Year',  'TieBreak Won Overall',
                'Deciding Sets Won Overall',  'Deciding Sets Won by Year'
                ],
        
    )
        

    x = stats_by_year['year']
    
    # Break Point Converted
    fig.add_trace(
        go.Scatter(
            x=x, y=upper_df['upper_bpConverted'],
            name='Upper Band',
            fill=None,
            mode='lines',
            line=dict(color='darksalmon', width=1)
        ),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=lower_df['lower_bpConverted'],
            name='Lower Band',
            fill='tonexty', # fill area between trace0 and trace1
            mode='lines',
            line=dict(color='darksalmon', width=1)
        ),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=stats_by_year['meanBpConverted'],
            name='Mean Perc BP Converted',
            textposition='top center',
            mode='lines+markers',
            connectgaps=True,
            marker={'color': colors[0]},
        ),
        row=1, col=1
    )

    fig.add_trace(
        go.Pie(
            labels=['BP Converted', 'BP Not Converted'],
            values=[stats_overall['bpConverted'], stats_overall['bpTotal'] - stats_overall['bpConverted']],
            marker={'colors': ['seagreen', 'indianred'],
                        'line': {'color':'white', 'width':1}}
        ),
        row=1, col=3
    )


    # Break Point Saved
    fig.add_trace(
        go.Scatter(
            x=x, y=upper_df['upper_bpSaved'], 
            name='Upper Band',
            fill=None,
            mode='lines',
            line=dict(color='darksalmon', width=1)
        ),
        row=2, col=2
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=lower_df['lower_bpSaved'],
            name='Lower Band',
            fill='tonexty', # fill area between trace0 and trace1
            mode='lines',
            line=dict(color='darksalmon', width=1)
        ),
        row=2, col=2
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=stats_by_year['meanBpSaved'],
            name='Mean Perc BP Saved',
            textposition='top center',
            mode='lines+markers',
            connectgaps=True,
            marker={'color': colors[1]},
        ),
        row=2, col=2
    )
    
    fig.add_trace(
        go.Pie(
            labels=['BP Saved', 'BP Lost'],
            values=[stats_overall['bpSaved'], stats_overall['bpFaced'] - stats_overall['bpSaved']],
            marker={'colors': ['seagreen', 'indianred'],
                        'line': {'color':'white', 'width':1}}
        ),
        row=2, col=1
    )
    
    
    
    # Tie Break
    fig.add_trace(
        go.Bar(
            x=x, y=stats_by_year['tbWon'],
            name='TieBreak Won',
            marker={'color': 'seagreen'},
            text=stats_by_year['tbWon'],
            textposition='inside',
            textfont_size=8,
            opacity=0.8
        ),
        row=3, col=1,
        secondary_y=False
    )

    fig.add_trace(
        go.Bar(
            x=x, y=stats_by_year['tbLost'],
            name='TieBreak Lost',
            marker={'color': 'indianred'},
            text=stats_by_year['tbLost'],
            textposition='inside',
            textfont_size=8,
            opacity=0.8
        ),
        row=3, col=1,
        secondary_y=False
    )

    fig.add_trace(
        go.Scatter(
            x=x, y=stats_by_year['percTbWon'],
            name='Win Rate',
            line={'color':'midnightblue', 'width':2},
            mode='lines+text',
            text=stats_by_year['percTbWon'].round(2),
            textposition='top center',
            textfont_size=8
        ),
        row=3, col=1,
        secondary_y=True
    )
   
    fig.add_trace(
        go.Pie(
            labels=['TB Won', 'TP Lost'],
            values=[stats_overall['tbWon'], stats_overall['tbPlayed'] - stats_overall['tbWon']],
            marker={'colors': ['seagreen', 'indianred'],
                        'line': {'color':'white', 'width':1}}
        ),
        row=3, col=3
    )
    
    
    
    # Deciding Set
    fig.add_trace(
        go.Bar(
            x=x, y=stats_by_year['decidingSetWon'],
            name='Deciding Sets Won',
            marker={'color': 'seagreen'},
            text=stats_by_year['tbWon'],
            textposition='inside',
            textfont_size=8,
            opacity=0.8
        ),
        row=4, col=2,
        secondary_y=False
    )

    fig.add_trace(
        go.Bar(
            x=x, y=stats_by_year['DecidingSetLost'],
            name='Deciding Sets Lost',
            marker={'color': 'indianred'},
            text=stats_by_year['tbLost'],
            textposition='inside',
            textfont_size=8,
            opacity=0.8
        ),
        row=4, col=2,
        secondary_y=False
    )
    
    fig.add_trace(
        go.Scatter(
            x=x, y=stats_by_year['percDecidingSetWon'],
            name='Win Rate',
            line={'color':'midnightblue', 'width':2},
            mode='lines+text',
            text=stats_by_year['percDecidingSetWon'].round(2),
            textposition='top center',
            textfont_size=8
        ),
        row=4, col=2,
        secondary_y=True
    )
   
    fig.add_trace(
        go.Pie(
            labels=['Deciding Set Won', 'Deciding Set Lost'],
            values=[stats_overall['decidingSetWon'], stats_overall['decidingSetPlayed'] - stats_overall['decidingSetWon']],
            marker={'colors': ['seagreen', 'indianred'],
                        'line': {'color':'white', 'width':1}}
        ),
        row=4, col=1
    )
    

    
    
    # Layout
    fig.update_layout(
        barmode='stack',
            height = 1000, width=1000, 
            xaxis1={'title': 'Year'}, xaxis2={'title': 'Year'},
            xaxis3={'title': 'Year'}, xaxis4={'title': 'Year'},
            yaxis={'title': 'Percentage'}, 
            yaxis2={'title': 'Percentage', 'side':'right'},
            yaxis3={'title': 'Num. Tb'}, yaxis4={'title': 'Percentage', 'side':'right'},
            yaxis5={'title': 'Num. Sets'}, yaxis6={'title': 'Percentage', 'side':'right'},
            showlegend=False
        )
    return fig

plot_under_pressure(stats_by_year, upper_df, lower_df, stats_overall)

KeyError: 'meanBpConverted'

In [69]:
go.Figure(
    go.Pie(
        values=[0.7, 1-0.7]
))
