In [None]:
# import library
library(dplyr)

In [None]:
# read datasets
matches = read.csv("matches.csv")
deli = read.csv("deliveries.csv")

# Question 1:

In [None]:
# teams playing today's match
teams = c("Delhi Daredevils", "Delhi Capitals", "Mumbai Indians")

In [None]:
# all matches between DC and MI
dc_mi = subset(matches, (team1 %in% teams & team2 %in% teams))

head(dc_mi)

In [None]:
dc_mi$winner

In [None]:
# victories of the teams against each other
count(dc_mi, winner)

In [None]:
# matches of DC
dc = subset(matches, (team1 %in% c('Delhi Capitals', 'Delhi Daredevils')| team2 %in% c('Delhi Capitals', 'Delhi Daredevils')))

In [None]:
# total no. of matches played by DC
nrow(dc)

In [None]:
# winners in those matches
count(dc, winner)

In [None]:
# win ratio of DC
(10+67)/177

In [None]:
# matches of MI
mi = subset(matches, (team1 == 'Mumbai Indians'| team2 == 'Mumbai Indians'))

In [None]:
# total no. of matches played by MI
nrow(mi)

In [None]:
# winners in those matches
count(mi, winner)

In [None]:
# win ratio of MI
109/187

# Question 2:

In [None]:
# total wickets taken in each IPL match in the powerplay overs
total_wickets = subset(deli, (player_dismissed != '') & (over %in% seq(1, 6, 1))) %>% group_by(match_id) %>% summarise(wickets = n())

head(total_wickets)

In [None]:
# histogram
hist(total_wickets$wickets, freq=TRUE, breaks=c(0, 2, 4, 5, max(total_wickets$wickets)))

In [None]:
# all deliveries in DC vs MI matches
dc_mi_deli = subset(deli, (bowling_team %in% teams & batting_team %in% teams))

head(dc_mi_deli)

In [None]:
# total wickets taken in DC vs MI matches in the powerplay overs
wickets_dc_mi = subset(dc_mi_deli, (player_dismissed != '') & (over %in% seq(1, 6, 1))) %>% group_by(match_id) %>% summarise(wickets = n())

# histogram
hist(wickets_dc_mi$wickets, freq=TRUE, breaks=c(0, 2, 4, 5, max(wickets_dc_mi$wickets)))

In [None]:
# all deliveries faced by MI
mi_bat = subset(deli, batting_team == 'Mumbai Indians')

# total wickets lost by MI in each match in the powerplay overs
wickets_mi = subset(mi_bat, (player_dismissed != '') & (over %in% seq(1, 6, 1))) %>% group_by(match_id) %>% summarise(wickets = n())

#statistical measures
summary(wickets_mi$wickets)

In [None]:
# all deliveries faced by DC
dc_bat = subset(deli, batting_team %in% c('Delhi Capitals', 'Delhi Daredevils'))

# total wickets lost by DC in each match in the powerplay overs
wickets_dc = subset(dc_bat, (player_dismissed != '') & (over %in% seq(1, 6, 1))) %>% group_by(match_id) %>% summarise(wickets = n())

#statistical measures
summary(wickets_dc$wickets)

# Question 3:

In [None]:
# all bowlers playing today's match
bowlers = c('K Rabada', 'AR Patel', 'R Ashwin', 'HV Patel', 'MP Stoinis',
            'KA Pollard', 'HH Pandya', 'KH Pandya', 'NM Coulter-Nile', 'RD Chahar', 'TA Boult', 'JJ Bumrah')

In [None]:
# function to get the bowling details
get_bowling_info = function(bname, df){
    
    # filtering out the deliveries bowled by each of the bowlers
    data = subset(df, with(df, grepl(bname, bowler)))
    
    # runs given out in each of the match
    runs_per_match = data %>% group_by(match_id) %>% summarise(runs = sum(batsman_runs))
    # overs bowled in each of the match
    overs_per_match = data %>%  group_by(match_id) %>% summarise(overs=length(unique(over)))
    # merging the dataframes
    econ = merge(runs_per_match, overs_per_match)
    # filtering out the matches in which the bowler bowled only one over
    econ = subset(econ, overs > 1)
    # calculating economy
    econ$economy = econ$runs/econ$overs
    
    # no. of boundaries hit per match
    boundaries = subset(data, batsman_runs %in% c(4, 6)) %>% group_by(match_id) %>% summarise(num_b = n())
    # calculating 6s
    runs_6 = subset(data, batsman_runs == 6) %>% group_by(match_id) %>% summarise(runs_6 = n())
    # merging the dataframes
    details = merge(boundaries, runs_6, all=TRUE)
    
    # final dataframe
    final = merge(econ, details, all=TRUE)
    # filling out the null values
    final[is.na(final)] = 0
    
    # returning the results
    return (summary(final[, 4:6]))
}

## Performance throughout IPL

In [None]:
# for loop to iterate over the bowler names
for (b_name in bowlers){
    # print bowler name
    print(b_name)
    # printing the statistical measures 
    print(get_bowling_info(b_name, deli))
}

## Performance in IPL 2019

In [None]:
# for loop to iterate over the bowler names
for (b_name in bowlers){
    # print bowler name
    print(b_name)
    # printing the statistical measures 
    print(get_bowling_info(b_name, subset(deli, match_id > 11000)))
}

# Question 4:

In [None]:
# DC batsmen playing today's match
dc_batsmen = c('S Dhawan', 'AM Rahane', 'SS Iyer', 'RR Pant', 'S Hetmyer', 'AR Patel', 'MP Stoinis')
# MI batsmen playing today's match
mi_batsmen = c('Q de Kock', 'RG Sharma', 'Ishan Kishan', 'SA Yadav', 'SS Tiwary', 'KA Pollard', 'HH Pandya', 'KH Pandya')

In [None]:
# function to get the % of matches in which the batsman scores above 30 and below 30
get_matches = function(bname, df){
    
    # filtering out the deliveries faced by each of the batsman
    data = subset(df, with(df, grepl(bname, batsman)))
    # runs scored per match
    runs_per_match = data %>% group_by(match_id) %>% summarise(runs = sum(batsman_runs))
    # no. of matches played by the batsman
    matches = length(unique(runs_per_match$match_id))
    # matches in which he scored above 30 runs
    score_above30 = nrow(subset(runs_per_match, runs > 30))/matches * 100
    # matches in which he scored below 30 runs
    score_below30 = nrow(subset(runs_per_match, runs < 30))/matches * 100
    # returning the percentages
    return (c(score_above30, score_below30))
}

## Performance of DC batsmen in all the seasons of IPL

In [None]:
# for loop to iterate over the bowler names
for (b_name in dc_batsmen){
    # print bowler name
    print(b_name)
    # printing the statistical measures 
    print(get_matches(b_name, deli))
}

## Performance of DC batsmen in IPL 2019

In [None]:
# for loop to iterate over the bowler names
for (b_name in dc_batsmen){
    # print bowler name
    print(b_name)
    # printing the statistical measures 
    print(get_matches(b_name, subset(deli, match_id > 11000)))
}

## Performance of DC batsmen against Mumbai Indians

In [None]:
# for loop to iterate over the bowler names
for (b_name in dc_batsmen){
    # print bowler name
    print(b_name)
    # printing the statistical measures 
    print(get_matches(b_name, subset(deli, bowling_team == 'Mumbai Indians')))
}

## Performance of MI batsmen in all the seasons of IPL

In [None]:
# for loop to iterate over the bowler names
for (b_name in mi_batsmen){
    # print bowler name
    print(b_name)
    # printing the statistical measures 
    print(get_matches(b_name, deli))
}

## Performance of MI batsmen in IPL 2019

In [None]:
# for loop to iterate over the bowler names
for (b_name in mi_batsmen){
    # print bowler name
    print(b_name)
    # printing the statistical measures 
    print(get_matches(b_name, subset(deli, match_id > 11000)))
}

## Performance of MI batsmen against DC

In [None]:
# for loop to iterate over the bowler names
for (b_name in mi_batsmen){
    # print bowler name
    print(b_name)
    # printing the statistical measures 
    print(get_matches(b_name, subset(deli, bowling_team %in% c('Delhi Capitals', 'Delhi Daredevils'))))
}

# Question 5:

In [None]:
# function to get the economy of Jasprit Bumrah
get_economy = function(df){
    # filtering out the deliveries bowled by him in the death overs
    data = subset(df, with(df, grepl('JJ Bumrah', bowler) & (over %in% seq(16, 20))))
    
    # runs given out in each of the match
    runs_per_match = data %>% group_by(match_id) %>% summarise(runs = sum(batsman_runs))
    # overs bowled in each of the match
    overs_per_match = data %>%  group_by(match_id) %>% summarise(overs=length(unique(over)))
    # merging the dataframes
    econ = merge(runs_per_match, overs_per_match)
    # calculating economy
    econ$economy = econ$runs/econ$overs
    
    # returning the statistical measures
    return (summary(econ$economy))
}

In [None]:
# performance in all the seasons of IPL
get_economy(deli)

In [None]:
# performance in IPL 2019
get_economy(subset(deli, match_id > 11000))

In [None]:
# performance against Delhi Daredevils
get_economy(subset(deli, batting_team %in% c('Delhi Capitals', 'Delhi Daredevils')))