## Plotly - Go

---

#### 1. Import libraries

In [10]:
import pandas as pd
import numpy as np
import plotly.offline as pyo
import plotly.graph_objs as go
pd.set_option('display.max_columns', None)

---

#### 2. Import data

In [7]:
dir = r"C:\Users\SESA702302\OneDrive - Schneider Electric\Personal\0.0 Data Science Personal\9.0 Week 9 - Visualization\5.0 Plotly Go"

match = pd.read_csv(dir+"\\matches.csv")
delivery = pd.read_csv(dir+"\\deliveries.csv")

ipl = pd.merge(match, delivery, left_on = 'id', right_on = 'match_id')

In [8]:
ipl.head(2)

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,...,0,0,0,0,0,0,0,,,
1,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,...,0,0,0,0,0,0,0,,,


---

#### 3. Scatter plot

Scatter plots are drawn between continuous variables
Problem: We are going to draw a scatter plot between Batsman avg and Batsman strike rate of top 50 batsman of all time

In [24]:
# Fetching a new dataframe with top 50 batsman in terms of runs scored
top50 = ipl.groupby(['batsman'])['batsman_runs'].sum().sort_values(ascending = False).head(50).index.tolist()
new_ipl = ipl[ipl['batsman'].isin(top50)]

# Calculating strike rate
runs = new_ipl.groupby(['batsman'])['batsman_runs'].sum()
balls = new_ipl.groupby(['batsman'])['batsman_runs'].count()

sr = (runs/balls)*100
sr = sr.reset_index()

# Calculating average 
out = ipl[ipl['player_dismissed'].isin(top50)]
nouts = out['player_dismissed'].value_counts()
avg = runs/nouts
avg = avg.reset_index()
avg.columns = ['batsman', 'avg']

avg = pd.merge(avg, sr, on = 'batsman')
avg

Unnamed: 0,batsman,avg,batsman_runs
0,AB de Villiers,38.307692,145.129059
1,AC Gilchrist,27.223684,133.054662
2,AJ Finch,27.186441,126.299213
3,AM Rahane,33.593407,117.486549
4,AT Rayudu,27.146067,123.014257
5,BB McCullum,28.112245,126.318203
6,BJ Hodge,33.333333,121.422376
7,CH Gayle,41.022472,144.194313
8,DA Miller,34.733333,137.709251
9,DA Warner,40.14,138.318401


In [183]:
trace = go.Scatter(x = avg['avg'], y = avg['batsman_runs'], mode = 'markers', text=avg['batsman'],
                  marker = {"color":"green", "size":16})
data = [trace]
layout = go.Layout(title = "Batsman Avg vs SR",
                  xaxis = {"title":"Batsamn Average"},
                  yaxis = {"title":"Batsamn Strike Rate"})

fig = go.Figure(data = data, layout=layout)
fig.show()
pyo.plot(fig, filename = "IPL.html")

'IPL.html'

---

#### 4. Line Chart

Problem: Year on year performance of batsman

In [35]:
single = ipl[ipl['batsman'] == "V Kohli"]
performance = single.groupby(['season'])['batsman_runs'].sum().reset_index()

single = ipl[ipl['batsman'] == "MS Dhoni"]
performance1 = single.groupby(['season'])['batsman_runs'].sum().reset_index()
performance1

Unnamed: 0,season,batsman_runs
0,2008,414
1,2009,332
2,2010,287
3,2011,392
4,2012,357
5,2013,461
6,2014,371
7,2015,372
8,2016,284
9,2017,290


In [39]:
trace1 = go.Scatter(x = performance['season'], y = performance['batsman_runs'], mode = 'lines+markers' ,
                  marker = {"color":"red", "size":16}, name='V Kohli')
trace2 = go.Scatter(x = performance1['season'], y = performance1['batsman_runs'], mode = 'lines+markers' ,
                  marker = {"color":"yellow", "size":16}, name='MS Dhoni')
data = [trace1, trace2]
layout = go.Layout(title = "Year by Year performance",
                  xaxis = {"title":"Season"},
                  yaxis = {"title":"Total Runs"})

fig = go.Figure(data = data, layout=layout)
pyo.plot(fig, filename = "IPL.html")

'IPL.html'

---

#### 5. Multiple Line Charts

In [40]:
def batsman_comp(*name):
    data = []
    for i in name:
        single = ipl[ipl['batsman'] == i]
        performance = single.groupby(['season'])['batsman_runs'].sum().reset_index()
        
        trace = go.Scatter(x = performance['season'], y = performance['batsman_runs'], mode = 'lines+markers' ,
                 name=i)
        data.append(trace)
        
    layout = go.Layout(title = "Year by Year performance",
                  xaxis = {"title":"Season"},
                  yaxis = {"title":"Total Runs"})
    fig = go.Figure(data = data, layout=layout)
    pyo.plot(fig, filename = "year_on_year.html")

In [41]:
batsman_comp("V Kohli", "RG Sharma", "DA Warner", "CH Gayle")

---

#### 6. Bar plot

In [54]:
top10 = ipl.groupby(['batsman'])['batsman_runs'].sum().sort_values(ascending = False).head(10).index.tolist()
top10_df = ipl[ipl['batsman'].isin(top10)]

top10score = top10_df.groupby(["batsman"])['batsman_runs'].sum().reset_index()
top10score

Unnamed: 0,batsman,batsman_runs
0,AB de Villiers,3486
1,CH Gayle,3651
2,DA Warner,4014
3,G Gambhir,4132
4,MS Dhoni,3560
5,RG Sharma,4207
6,RV Uthappa,3778
7,S Dhawan,3561
8,SK Raina,4548
9,V Kohli,4423


In [59]:
trace = go.Bar(x = top10score['batsman'], y = top10score['batsman_runs'])
data = [trace]
layout = go.Layout(title = "Top 10 IPL Batsman",
                  xaxis = {"title":"Batsamn"},
                  yaxis = {"title":"Batsamn Total Runs"})

fig = go.Figure(data = data, layout=layout)
pyo.plot(fig, filename = "IPL.html")

'IPL.html'

---

#### 6.1 Bar Graph - Overlay

In [81]:
iw = top10_df.groupby(['batsman', 'inning'])['batsman_runs'].sum().reset_index()
one = iw[iw['inning'] == 1]
one = one.rename(columns={"batsman_runs":"1st Inning"})
two = iw[iw['inning'] == 2]
two = two.rename(columns={"batsman_runs":"2nd Inning"})

final = pd.merge(one, two, on = 'batsman')[['batsman', '1st Inning', '2nd Inning']]
final

Unnamed: 0,batsman,1st Inning,2nd Inning
0,AB de Villiers,2128,1345
1,CH Gayle,2003,1623
2,DA Warner,2118,1896
3,G Gambhir,1699,2433
4,MS Dhoni,2232,1328
5,RG Sharma,2344,1863
6,RV Uthappa,1516,2262
7,S Dhawan,2262,1299
8,SK Raina,2647,1893
9,V Kohli,2391,2027


In [84]:
trace1 = go.Bar(x = final['batsman'], y = final['1st Inning'],
                  marker = {"color":"#00a65a"}, name='1st Inning')
trace2 = go.Bar(x = final['batsman'], y = final['2nd Inning'],
                  marker = {"color":"#a6a65a"}, name='2nd Inning')
data = [trace1, trace2]
layout = go.Layout(title = "Inning wise score",
                  xaxis = {"title":"Season"},
                  yaxis = {"title":"Total Runs"},
                  barmode = "overlay")

fig = go.Figure(data = data, layout=layout)
pyo.plot(fig, filename = "IPL.html")

'IPL.html'

#### 6.2 Bar Graph - Stacked

In [83]:
trace1 = go.Bar(x = final['batsman'], y = final['1st Inning'],
                  marker = {"color":"#00a65a"}, name='1st Inning')
trace2 = go.Bar(x = final['batsman'], y = final['2nd Inning'],
                  marker = {"color":"#a6a65a"}, name='2nd Inning')
data = [trace1, trace2]
layout = go.Layout(title = "Inning wise score",
                  xaxis = {"title":"Season"},
                  yaxis = {"title":"Total Runs"},
                  barmode = "stack")

fig = go.Figure(data = data, layout=layout)
pyo.plot(fig, filename = "IPL.html")

'IPL.html'

#### 6.3 Bar Graph - Nested

In [None]:
trace1 = go.Bar(x = final['batsman'], y = final['1st Inning'],
                  marker = {"color":"#00a65a"}, name='1st Inning')
trace2 = go.Bar(x = final['batsman'], y = final['2nd Inning'],
                  marker = {"color":"#a6a65a"}, name='2nd Inning')
data = [trace1, trace2]
layout = go.Layout(title = "Inning wise score",
                  xaxis = {"title":"Season"},
                  yaxis = {"title":"Total Runs"})

fig = go.Figure(data = data, layout=layout)
pyo.plot(fig, filename = "IPL.html")

---

#### 7. Bubble Plot

In [114]:
new_ipl = new_ipl[new_ipl['batsman_runs'] == 6]
six = new_ipl.groupby(['batsman'])['batsman_runs'].count().reset_index().rename(columns={'batsman_runs': "no_of_six"})

x = pd.merge(avg, six, on = 'batsman').rename(columns={'batsman_runs':"strike_rate"})
x

Unnamed: 0,batsman,avg,strike_rate,no_of_six
0,AB de Villiers,38.307692,145.129059,158
1,AC Gilchrist,27.223684,133.054662,92
2,AJ Finch,27.186441,126.299213,59
3,AM Rahane,33.593407,117.486549,60
4,AT Rayudu,27.146067,123.014257,79
5,BB McCullum,28.112245,126.318203,124
6,BJ Hodge,33.333333,121.422376,43
7,CH Gayle,41.022472,144.194313,266
8,DA Miller,34.733333,137.709251,78
9,DA Warner,40.14,138.318401,160


In [113]:
trace = go.Scatter(x = x['avg'], y = x['strike_rate'], mode = 'markers', marker = {"size":x['no_of_six']},
                  text = x['batsman'])
data = [trace]
layout = go.Layout(title="Bubble Chart",
                  xaxis = {"title":"Average"},
                  yaxis = {"title":"Strike Rate"})

fig = go.Figure(data = data, layout=layout)
pyo.plot(fig, filename = "IPL.html")

'IPL.html'

---

#### 8. Box plot

In [115]:
match_agg = delivery.groupby(['match_id'])['total_runs'].sum().reset_index()
season_wise = pd.merge(match_agg, match, left_on = 'match_id', right_on = 'id')[['match_id', 'total_runs', 'season']]

In [123]:
trace = go.Box(x = season_wise['total_runs'], name = 'All Seasons', marker = {'color':'#00a65a'})
data = [trace]
layout = go.Layout(title = 'Total Score Analysis',
                  xaxis = {"title":"Total Score"})

fig = go.Figure(data = data, layout=layout)
pyo.plot(fig, filename = "IPL.html")

'IPL.html'

In [131]:
trace1 = go.Box(x = season_wise[season_wise['season'] == 2008]['total_runs'], name = '2008', marker = {'color':'#00a65a'})
trace2 = go.Box(x = season_wise[season_wise['season'] == 2017]['total_runs'], name = '2017', marker = {'color':'red'})
data = [trace1, trace2]
layout = go.Layout(title = 'Total Score Analysis',
                  xaxis = {"title":"Total Score"})

fig = go.Figure(data = data, layout=layout)
pyo.plot(fig, filename = "IPL.html")

'IPL.html'

---

#### 9. Distplots

In [132]:
import plotly.figure_factory as ff

In [136]:
hist_data = [avg['avg'], avg['batsman_runs']]
group_labels = ['Average', 'Strike Rate']
fig = ff.create_distplot(hist_data, group_labels, bin_size=[5,10])
pyo.plot(fig)

'temp-plot.html'

---

#### 10.Histogram

In [152]:
x = delivery.groupby(['batsman'])['batsman_runs'].count() >150
x = x[x].index.tolist()

new = delivery[delivery['batsman'].isin(x)]

runs = new.groupby(['batsman'])['batsman_runs'].sum()
balls = new.groupby(['batsman'])['batsman_runs'].count()

sr = (runs/balls)*100
sr = sr.reset_index().rename(columns={'batsman_runs':'strike_rate'})
sr

Unnamed: 0,batsman,strike_rate
0,A Ashish Reddy,142.857143
1,A Mishra,89.005236
2,A Symonds,124.711908
3,AA Jhunjhunwala,99.541284
4,AB Agarkar,111.875000
...,...,...
157,Y Nagar,105.166052
158,Y Venugopal Rao,113.872832
159,YK Pathan,140.751445
160,YV Takawale,104.918033


In [157]:
trace = go.Histogram(x = sr['strike_rate'], xbins={'size':2, 'start':50, 'end':120})
data = [trace]
layout = go.Layout(title = "Strike Rate Analysis",
                  xaxis = {"title":"Strike Rates"})

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig)

'temp-plot.html'

---

#### 11. Heatmap

In [180]:
six = delivery[delivery['batsman_runs'] == 6]
six = six.groupby(['batting_team', 'over'])['batsman_runs'].count().reset_index()

dot = delivery[delivery['batsman_runs'] == 0]
dot = dot.groupby(['batting_team', 'over'])['batsman_runs'].count().reset_index()

In [165]:
trace = go.Heatmap(x=six['batting_team'], y=six['over'], z=six['batsman_runs'])
data = [trace]
layout = go.Layout(title = 'Six Heatmap')
fig = go.Figure(data = data, layout=layout)
pyo.plot(fig)

'temp-plot.html'

In [181]:
from plotly import tools

trace1 = go.Heatmap(x=six['batting_team'], y=six['over'], z=six['batsman_runs'].values.tolist())
trace2 = go.Heatmap(x=dot['batting_team'], y=dot['over'], z=dot['batsman_runs'].values.tolist())

fig = tools.make_subplots(rows = 1, cols = 2, subplot_titles=["6's", "0's"], shared_yaxes=True)
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)

pyo.plot(fig)

'temp-plot.html'