In [None]:
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# read dataset
matches = pd.read_csv("matches.csv")
deli = pd.read_csv("deliveries.csv")

# Question 1:

In [None]:
# all matches between CSK and RR
csk_rr = matches.query("team1.isin(['Chennai Super Kings', 'Rajasthan Royals']) and team2.isin(['Chennai Super Kings', 'Rajasthan Royals'])")

In [None]:
# victories of the two teams against each other 
csk_rr.winner.value_counts()

In [None]:
# matches of CSK
csk = matches.query("team1 == 'Chennai Super Kings' or team2 == 'Chennai Super Kings'")
len(csk)

In [None]:
# count of winners in CSK matches
csk.winner.value_counts()

In [None]:
# CSK win ratio
100/164

In [None]:
# matches of RR
rr = matches.query("team1 == 'Rajasthan Royals' or team2 == 'Rajasthan Royals'")
len(rr)

In [None]:
# count of winners in RR matches
rr.winner.value_counts()

In [None]:
# RR win ratio
75/147

# Explanation:

Solely based on historical data, it seems that CSK will win today's match.

# Question 2:

In [None]:
# all deliveries faced by RR
rr_bat = deli.query("batting_team == 'Rajasthan Royals'")

In [None]:
# RR deliveries between 1-6 overs
rr_1_6 = rr_bat.query("over.isin([1, 2, 3, 4, 5, 6])")

In [None]:
# runs scored by RR between 1-6 overs
runs = rr_1_6.groupby("match_id").sum()["total_runs"]

In [None]:
# histogram
plt.hist(runs, bins=[0, 29, 39, 49, max(runs)])
plt.title("Runs scored per match by RR in IPL history in 1-6 overs")
plt.xlabel("No. of runs")
plt.ylabel("Frequency")
plt.show()

In [None]:
# measures of central tendency
print(runs.mean())
print(runs.median())

In [None]:
# RR runs between 1-6 against CSK
runs_csk =  rr_1_6.query("bowling_team == 'Chennai Super Kings'").groupby("match_id").sum()["total_runs"]

In [None]:
# histogram
plt.hist(runs_csk, bins=[0, 29, 39, 49, max(runs)])
plt.title("Runs scored per match by RR against CSK in IPL history in 1-6 overs")
plt.xlabel("No. of runs")
plt.ylabel("Frequency")
plt.show()

# Explanation:

RR hasn't been able to perform as well as its usual against CSK. Hence, it will score 40-49 runs in the first 6 overs of the match.

# Question 3:

In [None]:
# all deliveries bowled by Deepak Chahar
chahar = deli[deli.bowler == "DL Chahar"]

In [None]:
# runs scored on his balls per match
runs = pd.DataFrame(chahar.groupby("match_id").sum()["total_runs"]).reset_index()

In [None]:
# overs bowled per match
overs = pd.DataFrame(chahar.groupby(["match_id", "over"], as_index=False).count()["match_id"].value_counts()).reset_index()
overs.columns = ["match_id", "overs"]

In [None]:
# dataframe to store the number of runs and overs per match
economy = runs.merge(overs, on="match_id")

In [None]:
# calculating the economy in each match
economy["economy"] = economy["total_runs"]/economy["overs"]

In [None]:
# measures of central tendency
print(economy["economy"].mean())
print(economy["economy"].median())

In [None]:
# histogram
plt.hist(economy["economy"], bins=[0, 6, 7.3, 8.2, max(economy["economy"])]);
plt.title("Economy of Deepak Chahar in IPL history")
plt.xlabel("Economy")
plt.ylabel("Frequency")
plt.show()

In [None]:
# renaming columns
matches = matches.rename(columns={"id":"match_id"})

In [None]:
# economy against RR
economy.merge(matches, on="match_id").query("team1 == 'Rajasthan Royals' or team2 == 'Rajasthan Royals'")["economy"]

# Explanation:

On the basis of the previous performances of Deepak Chahar and in the current season, his economy will lie in the range 7.31-8.2 in today's match.

# Question 4:

In [None]:
# all deliveries bowled by Jofra Archer
archer = deli[deli.bowler == "J Archer"]

In [None]:
# all deliveries bowled by Jofra Archer in the death overs
archer_death = archer.query("over.isin([16, 17, 18, 19, 20])")

In [None]:
# wickets taken by Jofra Archer in the death overs
wickets = archer_death.groupby("match_id").count()["player_dismissed"]

In [None]:
# histogram
sns.countplot(wickets)
plt.title("Wickets taken by Jofra Archer in IPL history in 16-20 overs")
plt.xlabel("No. of wickets")
plt.ylabel("Frequency")
plt.show()

In [None]:
# wickets taken by Jofra Archer in the death overs against CSK
archer_death.query("batting_team == 'Chennai Super Kings'").groupby("match_id").count()["player_dismissed"]

# Explanation:

Previously, Jofra Archer didnot take wickets in the death overs. This scenario has changed in the current season. The performance of CSK middle order has also bee unsatisfactory in this season. Hence,owing to these two factors, it seems that Jofra Archer will take  wickets today. 

# Question 5:

In [None]:
# all deliveries faced by Steve Smith
smith = deli[deli.batsman == "SPD Smith"]

In [None]:
# runs scored by Steve Smith per match
runs = pd.DataFrame(smith.groupby("match_id").sum()["batsman_runs"]).reset_index()

In [None]:
# balls faced by Steve Smith per match
balls = pd.DataFrame(smith.groupby("match_id").count()["ball"]).reset_index()

In [None]:
# dataframe to store runs scored, balls faced and match_id of each match
smith_sr = runs.merge(balls, on="match_id")

In [None]:
# calculate strike rate
smith_sr["Strike Rate"] = smith_sr["batsman_runs"]/smith_sr["ball"]*100

In [None]:
# statistical measures
print(smith_sr["Strike Rate"].describe())

In [None]:
# histogram
plt.hist(smith_sr["Strike Rate"], bins=[min(smith_sr["Strike Rate"]), 120, 133, 143, max(smith_sr["Strike Rate"])]);
plt.title("Strike Rate of Steve Smith in IPL history")
plt.xlabel("Strike Rate")
plt.ylabel("Frequency")
plt.show()

In [None]:
# strike rate against CSK
sr_csk = smith_sr.merge(matches, on="match_id").query("team1 == 'Chennai Super Kings' or team2 == 'Chennai Super Kings'")["Strike Rate"]

In [None]:
# histogram
plt.hist(sr_csk, bins=[min(sr_csk), 120, 133, 143, max(sr_csk)]);
plt.title("Strike Rate of Steve Smith against CSK in IPL history")
plt.xlabel("Strike Rate")
plt.ylabel("Frequency")
plt.show()

# Explanation:

Steve Smith usually has a strike rate of less than 120. However, in the previous match, he scored 57 runs with a strike rate of 158. Taking into account his current form, a strike rate of 121-133 could be a safe estimate for him. 