In [None]:
# importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# reading datasets
matches = pd.read_csv("matches.csv")
deli = pd.read_csv("deliveries.csv")

# Question 1:

In [None]:
# all matches between SRH and RR
srh_rr = matches.query("team1.isin(['Sunrisers Hyderabad', 'Deccan Chargers', 'Rajasthan Royals']) and team2.isin(['Sunrisers Hyderabad', 'Deccan Chargers', 'Rajasthan Royals'])")

In [None]:
# victories of the two teams against each other 
srh_rr.winner.value_counts()

In [None]:
# matches of SRH
srh = matches.query("team1.isin(['Sunrisers Hyderabad', 'Deccan Chargers']) or team2.isin(['Sunrisers Hyderabad', 'Deccan Chargers'])")

len(srh)

In [None]:
# count of winners in SRH matches
srh.winner.value_counts()

In [None]:
# SRH win ratio
(58+29)/183

In [None]:
# matches of RR
rr = matches.query("team1 == 'Rajasthan Royals' or team2 == 'Rajasthan Royals'")

len(rr)

In [None]:
# counts of winners in RR matches
rr.winner.value_counts()

In [None]:
# RR win ratio
75/147

# Question 2:

In [None]:
# all balls bowled by Rashid Khan
rashid = deli[deli.bowler == "Rashid Khan"]

In [None]:
# empty list to store the number of balls taken to get the 1st wicket
wicket_balls = []

In [None]:
# iterating over all the matches
for m_id in rashid.match_id.unique():
    # making a dataframe of all the balls bowled in the match
    # resetting the index will help directly find the number of balls
    balls = list(rashid[rashid.match_id == m_id].reset_index()["index"])
   
    try:
        # get the index of the 1st wicket taken by Rashid
        first_wicket = rashid[(rashid.match_id == m_id) & rashid.player_dismissed.notnull()].index[0]
        # append the value to the list
        wicket_balls.append(balls.index(first_wicket))
    except:
        # in case of matches, where he did not get any wicket, add the value of 30 to the list
        # this value is impossible since a bowler bowls only 4 overs at max and will be removed later
        wicket_balls.append(100)

In [None]:
# filtering out the edge cases
wicket_balls1 = [b for b in wicket_balls if b<100]

In [None]:
# histogram
plt.hist(wicket_balls1, bins=[0, 9, 17, 25])
plt.title("Balls taken to get the 1st wicket in IPL")
plt.xlabel("No. of balls")
plt.ylabel("Frequency")
plt.show()

In [None]:
# getting the records for IPL 2019
wicket_balls2 = [b for b in wicket_balls[-15:] if b<30]

In [None]:
# histogram
plt.hist(wicket_balls2, bins=[0, 9, 17, 25]);
plt.title("Balls taken to get the 1st wicket in IPL 2019")
plt.xlabel("No. of balls")
plt.ylabel("Frequency")
plt.show()

# Question 3:

In [None]:
# all deliveries faced by David Warner
warner = deli[deli.batsman == "DA Warner"]
warner_out = warner[warner.player_dismissed.notnull()]

In [None]:
# barplot
sns.countplot(warner_out.dismissal_kind)
plt.title("Dismissals of David Warner")
plt.xticks(rotation=90)
plt.show()

In [None]:
# barplot
sns.countplot(warner.query("bowling_team == 'Rajasthan Royals' and player_dismissed.notnull()").dismissal_kind)
plt.title("Dismissals of David Warner against RR")
plt.xticks(rotation=90)
plt.show()

In [None]:
# barplot
sns.countplot(warner_out.dismissal_kind[-20:])
plt.title("Dismissals of David Warner in the last 20 matches")
plt.xticks(rotation=90)
plt.show()

# Question 4:

In [None]:
# batsmen of RR along with their batting positions
batsmen = {"BA Stokes":[0, 1], "RV Uthappa":[0, 1], "SV Samson":[2], "SPD Smith":[3], "JC Buttler":[4],
           "R Tewatia":[5]}

In [None]:
# disctionary to store the runs in IPL by each batsman
b_runs = {}

# iterating over the batsmen
for b in batsmen.keys():
    
    # getting the runs scored in each match
    runs = list(deli[deli.batsman == b].groupby("match_id").sum()["batsman_runs"])
    # adding to the dictionary
    b_runs.update({b:runs})

In [None]:
# displaying the dictionary
b_runs

In [None]:
# measures of central tendency
for key in b_runs.keys():
    print(key)                         # batsman
    print(np.median(b_runs[key]))      # median score
    print(np.mean(b_runs[key]))        # mean score

In [None]:
# function to get the runs made by the batsmen at their current batting position
def get_runs(inn):
    
    # empty dictionary to store the runs
    b_runs = {}
    
    # iterating over the batsmen
    for batsman in batsmen.keys():
        runs = []                  # empty list to store the runs of the specific batsman
        print(batsman)             # name of the batsman
        # filtering out the deliveries faced by him in the given innings (1 or 2)
        b_m_id = deli[(deli.batsman == batsman) & (deli.inning == inn)].match_id.unique()
        
        # iterating over all the matches
        for m_id in b_m_id:
            
            # getting the indexes of all the wicket deliveries
            index = list(deli[(deli.match_id == m_id)  & (deli.inning == inn) & (deli.player_dismissed.notnull())].index)
            # adding another delivery to get the names of the batsmen who remained not out in the innings
            index.append(index[-1]+1)
            
            b = []            # to store the names of all the batsmen whi batted in the match in their order
            
            # iterating over the deliveries
            for i in index:
                # getting the row corresponding to that delivery
                d = deli.iloc[i]
                bat1 = d["batsman"]           # name of the batsman
                bat2 = d["non_striker"]       # name of the non striker

                # in case the batsman name is not in the list, add it
                if bat1 not in b:
                    b.append(bat1)
                # in case the non striker name is not in the list, add it
                if bat2 not in b:
                    b.append(bat2)
                # this ensures that the batsmen are added to the list in order of their batting position

            # if the batsman has batted at the given position, add his runs
            if b.index(batsman) in batsmen[batsman]:
                runs.append(deli[(deli.match_id == m_id) &(deli.batsman == batsman)].batsman_runs.sum())

        # add the data to the dictionary
        tmp = {batsman:runs}
        b_runs.update(tmp)
    
    # return the dictionary
    return b_runs

In [None]:
# get the runs made by the batsmen at their current batting position in the 1st innings
b_runs1 = get_runs(1)

In [None]:
b_runs1

In [None]:
# measures of central tendency
for key in b_runs1.keys():
    print(key)
    print(np.median(b_runs1[key]))
    print(np.mean(b_runs1[key]))

In [None]:
# get the runs made by the batsmen at their current batting position in the 1st innings
b_runs2 = get_runs(2)

In [None]:
b_runs2

In [None]:
# measures of central tendency
for key in b_runs2.keys():
    print(key)
    print(np.median(b_runs2[key]))
    print(np.mean(b_runs2[key]))

# Question 5:

In [None]:
# Total no. of no balls in IPL history
len(deli[deli.noball_runs != 0])

In [None]:
# Total no. of matches in IPL history
len(deli.match_id.unique())

In [None]:
# average no. of no balls per match
714/756

In [None]:
# finding out the no. of no balls bowled by SRH and RR
noballs_srh_rr = deli[deli.noball_runs != 0].query("bowling_team.isin(['Sunrisers Hyderabad', 'Deccan Chargers', 'Rajasthan Royals'])")

In [None]:
# no balls bowled by SRH and RR
len(noballs_srh_rr)

In [None]:
# no. of matches in which SRH and RR have bowled
len(noballs_srh_rr.match_id.unique())

In [None]:
# average no. of no balls per match by SRH and RR
108/147

In [None]:
# no. of no balls per match by SRH and RR
noballs_srh_rr.groupby("match_id").count()["inning"].value_counts()