# Introduction to Plotly

1. Until now we did visualisations using Matplotlib, Seaborn and Pandas. All of them produce static image files.

2. Plotly is company based out in Canada famous for it's products like Plotly and Dash

3. Plotly creates interactive visualisations in the form of HTML files

4. Drawback- can't work with a live data source

5. Dash is used to create live data based dashboards.

In [1]:
import numpy as np 
import pandas as pd 
import plotly.offline as pyo 
import plotly.graph_objs as go

In [2]:
match = pd.read_csv('../DataSets/matches.csv')
delivery = pd.read_csv('../DataSets/deliveries.csv')
ipl = delivery.merge(match,left_on='match_id' , right_on='id')
ipl.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batsman', 'non_striker', 'bowler', 'is_super_over', 'wide_runs',
       'bye_runs', 'legbye_runs', 'noball_runs', 'penalty_runs',
       'batsman_runs', 'extra_runs', 'total_runs', 'player_dismissed',
       'dismissal_kind', 'fielder', 'id', 'season', 'city', 'date', 'team1',
       'team2', 'toss_winner', 'toss_decision', 'result', 'dl_applied',
       'winner', 'win_by_runs', 'win_by_wickets', 'player_of_match', 'venue',
       'umpire1', 'umpire2', 'umpire3'],
      dtype='object')

## Scatter Plot using Plotly

In [3]:
# scatter plots are drawn between two continuous variables .
# Q : we are going to draw a scatter plot between batman avg(x-axis) and strike rate(y-axis) of best 50 players in ipl history .

In [4]:
# fetching top 50 batsman based on total runs scored
top50 = ipl.groupby('batsman')['batsman_runs'].sum().sort_values(ascending=False).head(50).index.to_list()
new_ipl = ipl[ipl['batsman'].isin(top50)]

In [5]:
# calculating strike rate
runs = new_ipl.groupby('batsman')['batsman_runs'].sum()
balls = new_ipl.groupby('batsman')['ball'].count()
strikerate = (runs / balls) * 100
type(strikerate)
# converting series to dataframe
strikerate = strikerate.reset_index()
type(strikerate)

pandas.core.frame.DataFrame

In [20]:
strikerate

Unnamed: 0,batsman,0
0,AB de Villiers,145.129059
1,AC Gilchrist,133.054662
2,AJ Finch,126.299213
3,AM Rahane,117.486549
4,AT Rayudu,123.014257
5,BB McCullum,126.318203
6,BJ Hodge,121.422376
7,CH Gayle,144.194313
8,DA Miller,137.709251
9,DA Warner,138.318401


In [6]:
# calculating average
out = ipl[ipl['player_dismissed'].isin(top50)]
nout = out['player_dismissed'].value_counts()

avg = runs / nout
avg = avg.reset_index()
avg.rename(columns={'index' : 'batsman', 0 : 'average' },inplace=True)  

In [22]:
avg = avg.merge(strikerate,on='batsman')
avg.rename(columns={0: "strikeRate"}, inplace=True)
avg

Unnamed: 0,batsman,average,strikeRate
0,AB de Villiers,38.307692,145.129059
1,AC Gilchrist,27.223684,133.054662
2,AJ Finch,27.186441,126.299213
3,AM Rahane,33.593407,117.486549
4,AT Rayudu,27.146067,123.014257
5,BB McCullum,28.112245,126.318203
6,BJ Hodge,33.333333,121.422376
7,CH Gayle,41.022472,144.194313
8,DA Miller,34.733333,137.709251
9,DA Warner,40.14,138.318401


In [24]:
# scatter plot between avg and strike rate
trace = go.Scatter(x = avg['average'], y = avg['strikeRate'], mode = 'markers', text = avg['batsman'])
data = [trace]
layout = go.Layout(title = 'Batsman Average vs Strike Rate', xaxis = dict(title = 'Average'), yaxis = dict(title = 'Strike Rate'))
fig = go.Figure(data = data ,layout = layout)
pyo.plot(fig)

'temp-plot.html'