In [None]:
# import library
library(dplyr)

In [None]:
# read datasets
matches = read.csv("matches.csv")
deli = read.csv("deliveries.csv")

# Question 1:

In [None]:
# teams playing today's match
teams = c("Sunrisers Hyderabad", "Deccan Chargers", "Mumbai Indians")

In [None]:
# all matches between SRH and MI
srh_mi = subset(matches, (team1 %in% teams & team2 %in% teams))

head(srh_mi)

In [None]:
srh_mi$winner

In [None]:
# victories of the teams against each other
count(srh_mi, winner)

In [None]:
# matches of SRH
srh = subset(matches, (team1 %in% c('Sunrisers Hyderabad', 'Deccan Chargers')| team2 %in% c('Sunrisers Hyderabad', 'Deccan Chargers')))

In [None]:
# total no. of matches played by SRH
nrow(srh)

In [None]:
# winners in those matches
count(srh, winner)

In [None]:
# win ratio of SRH
(58+29)/183

In [None]:
# matches of MI
mi = subset(matches, (team1 == 'Mumbai Indians'| team2 == 'Mumbai Indians'))

In [None]:
# total no. of matches played by MI
nrow(mi)

In [None]:
# winners in those matches
count(mi, winner)

In [None]:
# win ratio of MI
109/187

# Question 2:

In [None]:
# names of all batsmen playing today's match
batsmen = c('DA Warner', 'WP Saha', 'J Bairstow', 'MK Pandey', 'KS Williamson', 'JO Holder',
           'Q de Kock', 'Ishan Kishan', 'SA Yadav', 'SS Tiwary', 'KA Pollard', 'HH Pandya', 'KH Pandya')

In [None]:
# function to get stamina score for the batsman
get_stamina_score = function(bname){
    # deliveries faced  by the batsman
    data = subset(deli, with(deli, grepl(bname, batsman)))
    
    # runs scored in 1s, 2s and 3s per match by the batsman
    runs_123 = subset(data, batsman_runs %in% c(1, 2, 3)) %>% group_by(match_id) %>% summarise(runs123 = sum(batsman_runs))
    # runs scored in boundaries per match by the batsman
    runs_boundaries = subset(data, batsman_runs %in% c(4, 6)) %>% group_by(match_id) %>% summarise(runs_b = sum(batsman_runs))
    # merging the dataframes
    runs = merge(runs_123, runs_boundaries, all=TRUE)
    # filling the null values with 0
    runs[is.na(runs)] = 0
    # balls faced per match
    balls_per_match = data %>% group_by(match_id) %>% summarise(balls = n())
    # balls faced as a nonstriker
    balls_ns = subset(deli, with(deli, grepl(bname, non_striker))) %>% group_by(match_id) %>% summarise(balls_ns = n())
    # merging the dataframes
    balls = merge(balls_per_match, balls_ns, all=TRUE)
    # filling the null values with 0
    balls[is.na(balls)] = 0
    
    # total balls faced on the crease
    balls$balls_on_crease = balls$balls + balls$balls_ns
    # merging the dataframes
    tmp = merge(runs, balls, all=TRUE)
    # filling the null values with 0
    tmp[is.na(tmp)] = 0
    
    # runs scored per match
    runs_per_match = data %>% group_by(match_id) %>% summarise(runs = sum(batsman_runs))
    
    # merging to create the final dataframe
    result = merge(runs_per_match, tmp, all=TRUE)
    # filling the null values with 0
    result[is.na(result)] = 0
    # filtering out the matches in which the batsman scored less than 15 runs
    result = subset(result, runs>15)
    
    # calculating the stamina score
    result$stamina_score = ((result$runs123/result$runs_b) + (result$balls/result$balls_on_crease))/result$runs
    # returning the median measure
    return (median(result$stamina_score))
}

In [None]:
# to store the stamina score for each of the batsmen
stamina_score = numeric()

# for loop to iterate over the batsman names
for (b_name in batsmen){
    # printing the batsman name
    print(b_name)
    # calculating and storing the stamina score
    stamina_score = cbind(stamina_score, get_stamina_score(b_name))
}

In [None]:
# final dataframe
final = data.frame(batsman=batsmen, ss=c(stamina_score))

# displaying the dataframe
head(final)

In [None]:
# calculating the normalised stamina score
final$ss_norm = (final$ss - min(final$ss))/(max(final$ss) - min(final$ss))

# displaying the dataframe
final

# Question 3:

In [None]:
# function to calculate the smash rate, given the batsman name
get_smash_rate = function(bname){
    
    # filtering out the deliveries faced by the batsman
    data = subset(deli, with(deli, grepl(bname, batsman)))
    # no. of boundaries hit per match
    boundaries = subset(data, batsman_runs %in% c(4, 6)) %>% group_by(match_id) %>% summarise(num_b = n())
    # balls faced per match
    balls_per_match = data %>% group_by(match_id) %>% summarise(balls = n())
    
    # merging the dataframes
    result = merge(boundaries, balls_per_match, all=TRUE)
    # filling the null values with 0
    result[is.na(result)] = 0
    
    # calculating the smash rate
    result$smash_rate = (result$num_b/result$balls)*100
    
    # returning the statistical measures
    return(summary(result$smash_rate))
}

In [None]:
# for loop to iterate over the batsman names
for (b_name in batsmen){
    # print batsman name
    print(b_name)
    # printing the statistical measures 
    print(get_smash_rate(b_name))
}

# Question 4:

In [None]:
# all bowlers in today's match
bowlers = c('JO Holder', 'Rashid Khan', 'S Nadeem', 'Sandeep Sharma', 'T Natarajan',
           'KA Pollard', 'HH Pandya', 'KH Pandya', 'NM Coulter-Nile', 'RD Chahar', 'TA Boult', 'JJ Bumrah')

In [None]:
# function to get the boundary leaker value for each bowler
get_boundary_leaker = function(bname){
    
    # filtering out the deliveries bowled by the bowler
    data = subset(deli, with(deli, grepl(bname, bowler)))
    # no. of boundaries hit per match
    boundaries = subset(data, batsman_runs %in% c(4, 6)) %>% group_by(match_id) %>% summarise(num_b = n())
    # balls faced per match
    balls_per_match = data %>% group_by(match_id) %>% summarise(balls = n())
    
    # merging the dataframes
    result = merge(boundaries, balls_per_match, all=TRUE)
    # filling the null values with 0
    result[is.na(result)] = 0
    
    # calculating the boundary leaker value
    result$boundary_leaker = result$balls/(result$num_b + 1)
    
    # returning the statistical measures
    return(summary(result$boundary_leaker))
}

In [None]:
# for loop to iterate over the bowler names
for (b_name in bowlers){
    # print bowler name
    print(b_name)
    # printing the statistical measures 
    print(get_boundary_leaker(b_name))
}

# Question 5:

In [None]:
# total wickets taken in each IPL match
total_wickets = subset(deli, player_dismissed != '') %>% group_by(match_id) %>% summarise(wickets = n())

head(total_wickets)

In [None]:
# histogram
hist(total_wickets$wickets, freq=TRUE, breaks=c(0, 5, 10, 15, max(total_wickets$wickets)))

In [None]:
# all deliveries in SRH vs MI matches
srh_mi_deli = subset(deli, (bowling_team %in% teams & batting_team %in% teams))

head(srh_mi_deli)

In [None]:
# total wickets taken in SRH vs MI matches
wickets_srh_mi = subset(srh_mi_deli, player_dismissed != '') %>% group_by(match_id) %>% summarise(wickets = n())

# histogram
hist(wickets_srh_mi$wickets, freq=TRUE, breaks=c(0, 5, 10, 15, max(wickets_srh_mi$wickets)))

In [None]:
# all deliveries faced by MI
mi_bat = subset(deli, batting_team == 'Mumbai Indians')

# total wickets lost by MI in each match
wickets_mi = subset(mi_bat, player_dismissed != '') %>% group_by(match_id) %>% summarise(wickets = n())

#statistical measures
summary(wickets_mi$wickets)

In [None]:
# all deliveries faced by SRH
srh_bat = subset(deli, batting_team %in% c('Sunrisers Hyderabad', 'Deccan Chargers'))

# total wickets lost by SRH in each match
wickets_srh = subset(srh_bat, player_dismissed != '') %>% group_by(match_id) %>% summarise(wickets = n())

#statistical measures
summary(wickets_srh$wickets)