In [1]:
# importing libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# read datasets
matches = pd.read_csv("matches.csv")
deli = pd.read_csv("deliveries.csv")

# Question 1:

In [None]:
# all matches between MI and KKR
mi_kkr = matches.query("team1.isin(['Mumbai Indians', 'Kolkata Knight Riders']) and team2.isin(['Mumbai Indians', 'Kolkata Knight Riders'])")

In [None]:
# victories of the two teams against each other
mi_kkr.winner.value_counts()

In [None]:
# victories of the two teams against each other at Eden Garden Stadium
mi_kkr.query("city == 'Kolkata'").winner.value_counts()

MI has had an overwhelming upper hand over the team of KKR, even at Eden Gardens.

In [None]:
# matches of MI
mi = matches.query("team1 == 'Mumbai Indians' or team2 == 'Mumbai Indians'")

In [None]:
# MI win ratio
len(mi[mi.winner == "Mumbai Indians"])/len(mi)

In [None]:
# matches of KKR
kkr = matches.query("team1 == 'Kolkata Knight Riders' or team2 == 'Kolkata Knight Riders'")

In [None]:
# KKR win ratio
len(kkr[kkr.winner == "Kolkata Knight Riders"])/len(kkr)

MI has a higher win ratio in IPL history compared to KKR.

In [None]:
# filtering on venue
venue = matches.query("city == 'Kolkata'")

venue.head()

In [None]:
# count of victories at Eden Gardens Stadium by the different teams
venue.winner.value_counts()

In [None]:
# matches played by MI at the Eden Gardens Stadium
len(venue.query("team1 == 'Mumbai Indians' or team2 == 'Mumbai Indians'"))

In [None]:
# matches played by KKR at the Eden Gardens Stadium
len(venue.query("team1 == 'Kolkata Knight Riders' or team2 == 'Kolkata Knight Riders'"))

In [None]:
# win ratio of KKR at Eden Gardens Stadium
print(45/74)

# win ratio of MI at Eden Gardens Stadium
print(10/13)

## Explanation:

Considering the past records of IPL, MI has a better chance than KKR to win today's match. The venue of Kolkata offers a bowling pitch, like Abu Dhabi, hence the performance of the teams at the Eden Gardens has also been studied. It is also to be noted that the average score in an innings at the Abu Dhabi stadium in this current season has been around 150-160, but MI is the only team that has managed to score above 190, and that too, three times. 

# Question 2:

In [3]:
# all deliveries faced by MI
mi_bat = deli.query("batting_team == 'Mumbai Indians'")

In [None]:
# mi deliveries between 6-15 overs
mi_6_15 = mi_bat.query("over.isin([6, 7, 8, 9, 10, 11, 12, 13, 14, 15])")

In [None]:
# mi wickets between 6-15 overs
wickets_ipl = mi_6_15[mi_6_15.player_dismissed.notnull()].groupby("match_id").count().player_dismissed

In [None]:
# histogram
sns.countplot(wickets_ipl)
plt.title("Wickets lost per match by MI in IPL history in 6-15 overs")
plt.xlabel("No. of wickets")
plt.ylabel("Frequency")
plt.show()

In [None]:
# count of mi wickets between 6-15 overs
wickets_ipl.value_counts()

In [None]:
# measures of central tendency
print(wickets_ipl.mean())
print(wickets_ipl.median())

In [None]:
# mi wickets between 6-15 overs against KKR
mi_6_15.query("bowling_team == 'Kolkata Knight Riders'").groupby("match_id").count().player_dismissed.value_counts()

In [None]:
# batsmen of MI
batsmen = mi_6_15.query(" batsman.isin(['RG Sharma', 'Q de Kock', 'SA Yadav','KA Pollard', 'Ishan Kishan', 'KH Pandya', 'HH Pandya'])")

In [None]:
# count of times MI batsmen were dismissed in 6-15 overs
batsmen[batsmen.player_dismissed.notnull()].groupby("match_id").count().player_dismissed.value_counts()

In [None]:
# renaming the column
venue = venue.rename(columns={"id":"match_id"})

In [None]:
# mi wickets between 6-15 overs at Kolkata
wickets_ipl.reset_index().merge(venue, on="match_id")["player_dismissed"].value_counts()

## Explanation:

0-2 wickets of MI will fall in the overs 6-15 in today's match. This value has been reached just on the basis of the past performances of this team in IPL.

# Question 3:

In [None]:
# all deliveries faced by KKR
kkr_deli = deli[deli.batting_team == "Kolkata Knight Riders"]

len(kkr_deli)

In [None]:
# all 4s by KKR
runs_4 = kkr_deli[kkr_deli.batsman_runs == 4].groupby("match_id").count()["batsman_runs"]

In [None]:
# histogram
plt.hist(runs_4, bins=[0, 4, 8, 12, 25])
plt.title("No. of 4s per match by KKR in IPL history")
plt.xlabel("No. of 4s")
plt.ylabel("Frequency")
plt.show()

In [None]:
# all 4s by KKR against MI
runs_4_mi = kkr_deli.query("bowling_team == 'Mumbai Indians' and batsman_runs == 4").groupby("match_id").count()["batsman_runs"]

In [None]:
# histogram
plt.hist(runs_4_mi, bins=[0, 4, 8, 12, 25])
plt.title("No. of 4s per match by KKR against MI")
plt.xlabel("No. of 4s")
plt.ylabel("Frequency")
plt.show()

## Explanation:

KKR has managed to hit more than 13 4s in majority of the IPL matches it has played, even against MI. Hence, it can be expected that KKR will hit more than 13 4s today.

# Question 4:

In [None]:
# all deliveries in MI vs KKR matches
mi_kkr_deli = deli.query("bowling_team.isin(['Mumbai Indians', 'Kolkata Knight Riders']) and batting_team.isin(['Mumbai Indians', 'Kolkata Knight Riders'])")
print(len(mi_kkr_deli))

In [None]:
# total runs made in each MI vs KKR match
total_runs_match = mi_kkr_deli.groupby("match_id").sum()["batsman_runs"]

total_runs_match.mean()

In [None]:
# histogram
plt.hist(total_runs_match, bins=[min(total_runs_match), 320, 340, 360, 370])
plt.title("Total runs per MI vs KKR match in IPL history")
plt.xlabel("No. of runs")
plt.ylabel("Frequency")
plt.show()

In [None]:
# average runs scored by KKR in a match
total_runs_kkr = kkr_deli.groupby("match_id").sum()["batsman_runs"]

total_runs_kkr.median()

In [None]:
# all deliveries of MI
mi_deli = deli[deli.batting_team == "Mumbai Indians"]

len(mi_deli)

In [None]:
# average runs scored by MI in a match
total_runs_mi = mi_deli.groupby("match_id").sum()["batsman_runs"]

total_runs_mi.median()

In [None]:
# runs at the venue of Kolkata
runs_at_kol = pd.DataFrame(deli.groupby("match_id").sum()["batsman_runs"]).merge(venue, on="match_id")["batsman_runs"]

In [None]:
# histogram
plt.hist(runs_at_kol, bins=[min(total_runs_match), 320, 340, 360, 370])
plt.title("Total runs per match at the venue of Kolkata")
plt.xlabel("No. of runs")
plt.ylabel("Frequency")
plt.show()

## Explanation:

Considering the fact that Abu Dhabi offers a bowling pitch, the total no. of matches should not be expected to cross 320.

# Question 5:

In [None]:
# all deliveries faced by Rohit Sharma
rohit = deli[deli.batsman == "RG Sharma"]
rohit_out = rohit[rohit.player_dismissed.notnull()]

In [None]:
sns.countplot(rohit_out.dismissal_kind)
plt.title("Dismissals of Rohit Sharma")
plt.xticks(rotation=90)
plt.show()

In [None]:
sns.countplot(rohit.query("bowling_team == 'Kolkata Knight Riders' and player_dismissed.notnull()").dismissal_kind)
plt.title("Dismissals of Rohit Sharma against KKR")
plt.xticks(rotation=90)
plt.show()

In [None]:
sns.countplot(rohit_out.dismissal_kind[-20:])
plt.title("Dismissals of Rohit Sharma in the last 50 matches")
plt.xticks(rotation=90)
plt.show()

## Explanation:

Rohit Sharma has been caught out majority of the times. Even in his most recent 20 IPL matches, he has been caught out 15 times. Hence, even today, Rohit Sharma can be expected to be caught out. 

In [6]:
df = mi_bat.groupby(["match_id", "batsman"], as_index=False).sum()

In [20]:
deli.query("batting_team=='Mumbai Indians' and bowling_team=='Kolkata Knight Riders'").groupby("batsman").sum()["batsman_runs"].sort_values()

batsman
MK Pandey            0
R McLaren            0
DJ Thornely          1
RD Chahar            1
Z Khan               2
BB Sran              3
MEK Hussey           3
RP Singh             3
AJ Finch             5
TL Suman             6
STR Binny            8
SL Malinga           8
MG Johnson          10
AM Nayar            13
AM Rahane           14
GR Napier           15
CM Gautam           15
MJ McClenaghan      20
BCJ Cutting         25
LMP Simmons         29
R Sathish           32
Q de Kock           32
AP Tare             34
S Dhawan            45
RV Uthappa          46
N Rana              50
AC Blizzard         51
KD Karthik          62
Ishan Kishan        63
DJ Bravo            72
JEC Franklin        74
Harbhajan Singh     74
CJ Anderson         75
E Lewis             77
HH Gibbs            79
JC Buttler          84
PA Patel            89
JP Duminy          104
KH Pandya          108
DR Smith           109
ST Jayasuriya      124
SS Tiwary          132
AS Yadav           171
KA 

In [12]:
df

Unnamed: 0,match_id,batsman,inning,over,ball,is_super_over,wide_runs,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs
0,2,AT Rayudu,12,113,38,0,0,0,1,0,0,10,1,11
1,2,HH Pandya,17,316,62,0,2,1,0,0,0,35,3,38
2,2,JC Buttler,19,75,75,0,0,0,0,0,0,38,0,38
3,2,KA Pollard,17,277,62,0,0,0,0,0,0,27,0,27
4,2,KH Pandya,6,77,20,0,2,0,0,0,0,3,2,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1407,11415,KH Pandya,7,87,24,0,0,0,0,0,0,7,0,7
1408,11415,MJ McClenaghan,2,38,11,0,0,0,0,0,0,0,0,0
1409,11415,Q de Kock,18,54,64,0,1,0,0,0,0,30,1,31
1410,11415,RD Chahar,2,38,7,0,0,0,0,0,0,0,0,0
