In [1]:
import pandas as pd
import numpy as np

In [2]:
bbl=pd.read_csv("S:\\Datasets\\BBL_cricket\\ballbyball.csv",low_memory=False)

In [3]:
bbl.columns=bbl.columns.str.lower()
bbl.shape

(44382, 26)

In [4]:
required_cols=['striker', 'bowler', 'extras_noballs', 'runs_batsman', 'runs_extras',
              'runs_total', 'winning_team', 'mom', 'match_id', 'match_date']

In [5]:
bbl=bbl[required_cols]

In [6]:
bbl.shape

(44382, 10)

# Group BY

### 1) COUNT Distinct Match ids

```
SELECT COUNT(DISTINCT match_id)
FROM bbl

```
o/p: 190

In [7]:
bbl['match_id'].nunique()

190

### 2) Extract Distinct Match but show only 3
```
SELECT match_id
FROM bbl
GROUP BY match_id

```
o/p: 
1023581
1023583
1023585


In [8]:
bbl['match_id'].unique()[0:3]

array([1023581, 1023583, 1023585], dtype=int64)

### 3) Matches Won by Team
```
SELECT winning_team, count(distinct match_id)
FROM bbl
GROUP BY winning_team

```
o/p: <br> 
Adelaide Strikers      21 <br> 
Brisbane Heat          19 <br> 
Hobart Hurricanes      19 <br> 
Melbourne Renegades    21 <br> 
Melbourne Stars        28 <br> 
Perth Scorchers        36 <br> 
Sydney Sixers          29 <br> 
Sydney Thunder         12

In [9]:
bbl.groupby(['winning_team'])['match_id'].nunique()

winning_team
Adelaide Strikers      21
Brisbane Heat          19
Hobart Hurricanes      19
Melbourne Renegades    21
Melbourne Stars        28
Perth Scorchers        36
Sydney Sixers          29
Sydney Thunder         12
Name: match_id, dtype: int64

### 4) TOP 4 Batters with Highest no. of matches played
```
SELECT Striker, count(distinct match_id) as played_matches
FROM bbl
GROUP BY Striker
ORDER BY 2 Desc
LIMIT 4

```

In [10]:
bbl.groupby(['striker'])['match_id'].nunique()\
      .sort_values(ascending=False)\
       .head(4)

striker
LJ Wright       48
M Klinger       47
CL White        46
MC Henriques    45
Name: match_id, dtype: int64

### 4) TOP 4 Batters w.r.t average_runs_per_match
```
SELECT Striker, 
       COUNT(DISTINCT match_id) as played_matches,
       SUM(runs_total) as total_runs,
       SUM(runs_total)/COUNT(DISTINCT match_id) as Average_runs_per_match
FROM bbl
GROUP BY Striker
ORDER BY 2 Desc
LIMIT 4

```

In [11]:
df1=bbl.groupby(['striker'])\
      .agg(played_matches = ('match_id','nunique'),
           total_runs=('runs_total','sum'))

df1['average_runs_per_match']=df1['total_runs']/df1['played_matches']
df1.sort_values(by =['played_matches'],ascending=False).head(4)

Unnamed: 0_level_0,played_matches,total_runs,average_runs_per_match
striker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LJ Wright,48,1391,28.979167
M Klinger,47,1447,30.787234
CL White,46,1106,24.043478
MC Henriques,45,1189,26.422222


### 5) Bats Man statistics show when they enter the field
```
SELECT 
    striker,
    sum(runs_total) as total_runs,
    count(distinct match_id) as  matches_played,
    round(1.0 *sum(runs_total) /
    count(distinct match_id),2)as  Average_Runs,
    count(case when runs_batsman >=4 then 'boundary' else null end) as boundaries,
    count(case when runs_batsman =4 then 'boundary' else null end) as fours,
    count(case when runs_batsman =6 then 'boundary' else null end) as sixes
FROM bbl
GROUP BY Striker
ORDER BY sixes DESC, striker ASC
LIMIT 10

```

In [12]:
result=bbl.assign(
    fours = np.where(bbl['runs_batsman']==4,1,0),
    sixes = np.where(bbl['runs_batsman']==6,1,0),
    boundaries = np.where(bbl['runs_batsman']>3,1,0)
  ) \
.groupby('striker').agg({'runs_total':sum,
                         'match_id':pd.Series.nunique,
                         'fours':sum, 
                         'sixes':sum,
                         'boundaries':sum,
                         })\
                   .rename(columns={'runs_total':'total_runs','match_id':'played_matches'})
                   
result['average_runs_per_match']=result['total_runs']/result['played_matches']
result.sort_values(by =['sixes','striker'],ascending=[False,True]).head(10)

Unnamed: 0_level_0,total_runs,played_matches,fours,sixes,boundaries,average_runs_per_match
striker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CA Lynn,1397,41,89,92,181,34.073171
AJ Finch,1654,39,148,56,204,42.410256
CH Gayle,653,20,34,49,83,32.65
BJ Hodge,1305,37,107,46,153,35.27027
NJ Maddinson,1092,44,99,43,142,24.818182
SE Marsh,1363,31,115,43,158,43.967742
DT Christian,872,41,50,40,90,21.268293
LJ Wright,1391,48,142,40,182,28.979167
CL White,1106,46,75,38,113,24.043478
BR Dunk,1114,38,95,36,131,29.315789


 ##### Author: &lt; Shafi Mohiddin Shaik&gt; <hr> LinkedIn : [Click to get redirected to Shafi's Linkedin Page]( https://www.linkedin.com/in/shafi-shaik-0279651b0/)