In [None]:
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
# read dataset
matches = pd.read_csv("matches.csv")
deli = pd.read_csv("deliveries.csv")

# Question 1:

In [None]:
# all matches between SRH and KXIP
srh_kxip = matches.query("team1.isin(['Deccan Chargers', 'Sunrisers Hyderabad', 'Kings XI Punjab']) and team2.isin(['Deccan Chargers', 'Sunrisers Hyderabad', 'Kings XI Punjab'])")

In [None]:
# victories of the two teams against each other 
srh_kxip.winner.value_counts()

In [None]:
# matches of SRH
srh = matches.query("team1.isin(['Deccan Chargers', 'Sunrisers Hyderabad']) or team2.isin(['Deccan Chargers', 'Sunrisers Hyderabad'])")

len(srh)

In [None]:
# count of winners in SRH matches
srh.winner.value_counts()

In [None]:
# SRH win ratio
(58+29)/183

In [None]:
# matches of KXIP
kxip = matches.query("team1 == 'Kings XI Punjab' or team2 == 'Kings XI Punjab'")

In [None]:
# KXIP win ratio
len(kxip[kxip.winner == "Kings XI Punjab"])/len(kxip)

# Question 2:

In [None]:
# all deliveries faced by David Warner
warner = deli[deli.batsman == 'DA Warner']

In [None]:
# ratio of matches in which Warner got out
len(warner[warner.player_dismissed == 'DA Warner'])/len(warner.match_id.unique())

In [None]:
# all deliveries faced by David Warner against KXIP
warner_kxip = warner[warner.bowling_team == 'Kings XI Punjab']

In [None]:
# ratio of matches against KXIP in which Warner got out
len(warner_kxip[warner_kxip.player_dismissed == 'DA Warner'])/len(warner_kxip.match_id.unique())

In [None]:
# runs scored by Warner per match
runs = pd.DataFrame(warner.groupby("match_id", as_index=False).sum()[["match_id", "batsman_runs"]])

In [None]:
# balls faced, including extras, by Warner per match
ball = pd.DataFrame(warner.groupby("match_id", as_index=False).count()[["match_id", "ball"]])

In [None]:
# extras faced by Warner per match
extra = pd.DataFrame(warner[warner.extra_runs != 0].groupby("match_id", as_index=False).count()[["match_id", "over"]])

In [None]:
# merging the two dataframes
balls = ball.set_index('match_id').join(extra.set_index('match_id')).fillna(0)

# calculating the number of balls faced after removing extras
balls['final'] = balls["ball"] - balls["over"]

In [None]:
# resetting index
balls = balls.reset_index()

In [None]:
# dataframe to calculate and store strike rate
sr = runs.merge(balls, on="match_id")

In [None]:
# displaying the dataframe
sr

In [None]:
# calculating the strike rate
sr["strike_rate"] = sr["batsman_runs"]/sr["final"]*100

sr.head()

In [None]:
# histogram 
plt.hist(sr["strike_rate"], bins=[min(sr["strike_rate"]), 125, max(sr["strike_rate"])])
plt.title("Strike Rate of David Warner")
plt.xlabel("Strike Rate")
plt.ylabel("Frequency")
plt.show()

In [None]:
plt.hist(sr.iloc[-12:][:]["strike_rate"], bins=[min(sr.iloc[-12:][:]["strike_rate"]), 125, max(sr.iloc[-12:][:]["strike_rate"])]);
plt.title("Strike Rate of David Warner in 2019")
plt.xlabel("Strike Rate")
plt.ylabel("Frequency")
plt.show()

# Question 3:

In [None]:
# deliveries faced by KL Rahul
rahul = deli[deli.batsman == "KL Rahul"]

In [None]:
# runs scored by KL Rahul
rahul_runs = rahul.groupby("match_id", as_index=False).sum()[["match_id", "batsman_runs"]]

In [None]:
# deliveries faced by KXIP
kxip = deli.query("batting_team.isin(['Kings XI Punjab'])")

In [None]:
# runs scored by KXIP
kxip_runs = kxip.groupby("match_id", as_index=False).sum()[["match_id", "total_runs"]]

In [None]:
# merging the two dataframes
runs = rahul_runs.merge(kxip_runs, on="match_id", how="inner")

In [None]:
# displaying the dataframe
runs.head()

In [None]:
# contribution of KL Rahul to the total score of KXIP
runs["contrib"] = runs["batsman_runs"]/runs["total_runs"] * 100

In [None]:
# histogram
plt.hist(runs["contrib"], bins=[0, 8, 15, 23, max(runs["contrib"])]);
plt.title("Contribution of KL Rahul to the total score of KXIP")
plt.xlabel("%")
plt.ylabel("Frequency")
plt.show()

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  
    print(runs)

In [None]:
plt.hist(runs.iloc[19:]["contrib"], bins=[0, 8, 15, 23, max(runs.iloc[19:]["contrib"])]);
plt.title("Contribution of KL Rahul to the total score of KXIP in 2019")
plt.xlabel("%")
plt.ylabel("Frequency")
plt.show()

In [None]:
# bowlers that KL Rahul has been dismissed by
rahul[rahul.player_dismissed == "KL Rahul"].bowler.value_counts()

# Question 4:

In [None]:
# function to calculate balls taken to score a fifty
def get_balls_for_50(df):
    
    # empty list to store the number of balls
    num_balls = []

    # iterating over all the matches in the dataframe
    for m_id in df.match_id.unique():

        # getting the record of one match using the match id
        data = df[df.match_id == m_id]
        
        # calculating cumulative sum of the score
        cumsum = np.cumsum(data["total_runs"])
        
        # finding the index position of the ball on which the score of 50 is crossed
        end_pos = pd.DataFrame(cumsum >= 50).idxmax()[0]

        # dataframe which stores the details of the balls below the score of 50
        balls_taken_df = data.loc[:end_pos][:]

        # total balls taken
        total_balls = len(balls_taken_df)
        # extra balls like wide, no ball, etc. 
        extra_balls = len(balls_taken_df[balls_taken_df.extra_runs != 0])

        # final number of balls
        final_balls = total_balls - extra_balls
        num_balls.append(final_balls)
        
        # removing the cases where the score of 50 was not crossed
        final = [val for val in num_balls if val>1]
        
    return final

In [None]:
# all deliveries faced by SRH
srh = deli.query("batting_team.isin(['Deccan Chargers', 'Sunrisers Hyderabad'])")

In [None]:
# balls taken to score a 50 considering all the matches of SRH
num = get_balls_for_50(srh)

In [None]:
# displaying the results
num

In [None]:
# histogram
plt.hist(num, bins=[0, 30, 37, 43, max(num)])
plt.title("Balls taken to score 50 runs")
plt.xlabel("No. of runs")
plt.ylabel("Frequency")
plt.show()

In [None]:
# balls taken to score a 50 considering all the matches of SRH against KXIP
num_srh = get_balls_for_50(srh.query("bowling_team.isin(['Kings XI Punjab'])"))

# histogram
plt.hist(num_srh, bins=[0, 30, 37, 43, max(num)])
plt.title("Balls taken to score 50 runs")
plt.xlabel("No. of runs")
plt.ylabel("Frequency")
plt.show()

In [None]:
# balls taken to score a 50 considering all the matches of 2019
num_srh_2019 = get_balls_for_50(srh.query("match_id > 11000 & inning==1"))

# histogram
plt.hist(num_srh_2019, bins=[0, 30, 37, 43, max(num)])
plt.title("Balls taken to score 50 runs")
plt.xlabel("No. of runs")
plt.ylabel("Frequency")
plt.show()

In [None]:
# balls taken to score a 50 considering all the matches of 2019
num_srh_2019 = get_balls_for_50(srh.query("match_id > 11000 & inning==2"))

# histogram
plt.hist(num_srh_2019, bins=[0, 30, 37, 43, max(num)])
plt.title("Balls taken to score 50 runs")
plt.xlabel("No. of runs")
plt.ylabel("Frequency")
plt.show()

# Question 5:

In [None]:
# all deliveries faced by Chris Gayle
gayle = deli[deli.batsman == "CH Gayle"]

In [None]:
# empty list to store runs
runs = []

# iterating over all the matches played by him
for m_id in gayle.match_id.unique():
    
    # data on each match
    match = gayle[gayle.match_id == m_id]
    
    # condition to check if he has played more than 10 balls
    if len(match) >=10:
        data = match.iloc[:10]               # getting the 1st 10 balls   
    else:
        data = match.copy()            # getting entire dataframe in case he did not play 10 balls
    
    r = data.batsman_runs.sum()
    
    # add runs to the list
    runs.append(r)

In [None]:
len(runs)

In [None]:
# histogram
plt.hist(runs, bins=[0, 10, 18, 26, max(runs)])
plt.title("Runs scored in the 1st 10 balls")
plt.xlabel("Runs")
plt.ylabel("Frequency")
plt.show()

In [None]:
plt.hist(runs[-13:], bins=[0, 10, 15, 20, max(runs)]);
plt.title("Runs scored in the 1st 10 balls in 2019")
plt.xlabel("Runs")
plt.ylabel("Frequency")
plt.show()