# Libraries

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from ipywidgets import interact

# Data Set Match Scores

#### Load the dataset match scores

In [2]:
match_scores = pd.read_csv('match_scores_2017_unindexed_csv.csv')

#### Info about the dataset match scores

This dataset have matches from different tournaments in 2017.

In [3]:
match_scores.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3830 entries, 0 to 3829
Data columns (total 24 columns):
tourney_year_id           3830 non-null object
tourney_order             3830 non-null int64
tourney_slug              3830 non-null object
tourney_url_suffix        3830 non-null object
tourney_round_name        3830 non-null object
round_order               3830 non-null int64
match_order               3830 non-null int64
winner_name               3830 non-null object
winner_player_id          3830 non-null object
winner_slug               3830 non-null object
loser_name                3830 non-null object
loser_player_id           3830 non-null object
loser_slug                3830 non-null object
winner_seed               2480 non-null object
loser_seed                2066 non-null object
match_score_tiebreaks     3830 non-null object
winner_sets_won           3830 non-null int64
loser_sets_won            3830 non-null int64
winner_games_won          3830 non-null int64
loser_

In [4]:
match_scores.head()

Unnamed: 0,tourney_year_id,tourney_order,tourney_slug,tourney_url_suffix,tourney_round_name,round_order,match_order,winner_name,winner_player_id,winner_slug,...,loser_seed,match_score_tiebreaks,winner_sets_won,loser_sets_won,winner_games_won,loser_games_won,winner_tiebreaks_won,loser_tiebreaks_won,match_id,match_stats_url_suffix
0,2017-339,1,brisbane,/en/scores/archive/brisbane/339/2017/results,Finals,1,1,Grigor Dimitrov,d875,grigor-dimitrov,...,3.0,62 26 63,2,1,14,11,0,0,2017-339-d875-n552,/en/scores/2017/339/MS001/match-stats
1,2017-339,1,brisbane,/en/scores/archive/brisbane/339/2017/results,Semi-Finals,2,1,Grigor Dimitrov,d875,grigor-dimitrov,...,1.0,76(7) 62,2,0,13,8,1,0,2017-339-d875-r975,/en/scores/2017/339/MS002/match-stats
2,2017-339,1,brisbane,/en/scores/archive/brisbane/339/2017/results,Semi-Finals,2,2,Kei Nishikori,n552,kei-nishikori,...,2.0,76(3) 63,2,0,13,9,1,0,2017-339-n552-w367,/en/scores/2017/339/MS003/match-stats
3,2017-339,1,brisbane,/en/scores/archive/brisbane/339/2017/results,Quarter-Finals,3,1,Milos Raonic,r975,milos-raonic,...,5.0,46 63 64,2,1,16,13,0,0,2017-339-r975-n409,/en/scores/2017/339/MS004/match-stats
4,2017-339,1,brisbane,/en/scores/archive/brisbane/339/2017/results,Quarter-Finals,3,2,Stan Wawrinka,w367,stan-wawrinka,...,,67(2) 64 64,2,1,18,15,0,1,2017-339-w367-e831,/en/scores/2017/339/MS007/match-stats


In [5]:
match_scores.loc[0]

tourney_year_id                                               2017-339
tourney_order                                                        1
tourney_slug                                                  brisbane
tourney_url_suffix        /en/scores/archive/brisbane/339/2017/results
tourney_round_name                                              Finals
round_order                                                          1
match_order                                                          1
winner_name                                            Grigor Dimitrov
winner_player_id                                                  d875
winner_slug                                            grigor-dimitrov
loser_name                                               Kei Nishikori
loser_player_id                                                   n552
loser_slug                                               kei-nishikori
winner_seed                                                          7
loser_

#### Visualizing the dataset

Here we display the number of matches of each tournament with a interactive widget 

In [6]:
tourney_matches = match_scores[['tourney_slug','match_id']].groupby('tourney_slug',as_index=False).count()
tourney_matches.sort_values('match_id', inplace=True)


def explore(n): 
    display(tourney_matches.head(n))

def fig_show(n):
    fig = go.Figure(data=[go.Bar(
                x=tourney_matches['match_id'].tail(n),
                y=tourney_matches['tourney_slug'].tail(n), 
                orientation = 'h'
            )])

    fig.show()
    
_ = interact(fig_show, n = (10,tourney_matches.shape[0]))

interactive(children=(IntSlider(value=38, description='n', max=67, min=10), Output()), _dom_classes=('widget-i…

Number of players that win more than one tournament

In [22]:
tourney_finals = match_scores[match_scores['tourney_round_name'] == 'Finals']
tourney_finals = tourney_finals['winner_name'].value_counts()[tourney_finals['winner_name'].value_counts() > 1]
labels = tourney_finals.index
values = tourney_finals

fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig.update_traces(title={
                    'text': "Tournaments won by player 2017 (>2)",
                    'font': { 
                        'size' : 20
                    },
                    'position': 'top center'
                    },
                  hoverinfo='label+percent', 
                  textinfo='value', 
                  textfont_size=20)
fig.update_layout(
    autosize=False,
    width=800,
    height=800,
)
fig.show()

Players that play more finals

In [None]:
tourney_finals = match_scores[match_scores['tourney_round_name'] == 'Finals']
tourney_finals = tourney_finals['winner_name'].value_counts()[tourney_finals['winner_name'].value_counts() > 1]
labels = tourney_finals.index
values = tourney_finals

fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig.update_traces(title={
                    'text': "Times in a final 2017",
                    'font': { 
                        'size' : 20
                    },
                    'position': 'top center'
                    },
                  hoverinfo='label+percent', 
                  textinfo='value', 
                  textfont_size=20)
fig.update_layout(
    autosize=False,
    width=800,
    height=800,
)
fig.show()

In [23]:

most_winner_matches = match_scores['winner_name'].value_counts()[match_scores['winner_name'].value_counts() > 35]
labels = most_winner_matches.index
values = most_winner_matches

fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig.update_traces(title={
                    'text': "Matches won in 2017 by player",
                    'font': { 
                        'size' : 20
                    },
                    'position': 'top center'
                    },
                  hoverinfo='label+percent', 
                  textinfo='value', 
                  textfont_size=20)
fig.update_layout(
    autosize=False,
    width=800,
    height=800,
)
fig.show()

In [None]:
match_scores.loser_name.value_counts()

In [None]:
match_scores[match_scores['winner_name'] == 'Rafael Nadal']['tourney_slug'].value_counts()

# DataSet Match Stats 

In [None]:
match_stats = pd.read_csv('match_stats_2017_unindexed_csv.csv')

In [None]:
match_stats.info()

In [None]:
match_stats.head()

In [None]:
match_stats['match_stats_url_suffix'].value_counts()

In [None]:
match_stats.loc[0]

In [None]:
match_stats[match_stats['match_id']== '2017-339-n409-d801']

In [None]:
df = pd.merge(match_scores,match_stats, on ='match_id')
len(df['match_id'].unique())

In [None]:
len(match_stats['match_id'])


In [None]:
len(match_scores['match_id'])

In [None]:
match_stats['match_id'].value_counts()

In [None]:
match_scores['match_id'].value_counts()

In [None]:
match_scores[match_scores['match_id'] == '2017-337-db59-f586']

In [None]:
df['match_id'].value_counts()

In [None]:
for i in df.loc[15]:
    print(i)

In [None]:
df.winner_name.value_counts()

In [None]:
df.groupby('tourney_slug').count()