In [None]:
# import library
library(dplyr)

In [None]:
# read datasets
matches = read.csv("matches.csv")
deli = read.csv("deliveries.csv")

# Question 1:

In [None]:
# teams playing today's match
teams = c("Delhi Daredevils", "Delhi Capitals", "Royal Challengers Bangalore")

In [None]:
# all matches between DC and RCB
dc_rcb = subset(matches, (team1 %in% teams & team2 %in% teams))

head(dc_rcb)

In [None]:
dc_rcb$winner

In [None]:
# victories of the teams against each other
count(dc_rcb, winner)

In [None]:
# matches of DC
dc = subset(matches, (team1 %in% c('Delhi Capitals', 'Delhi Daredevils')| team2 %in% c('Delhi Capitals', 'Delhi Daredevils')))

In [None]:
# total no. of matches played by DC
nrow(dc)

In [None]:
# winners in those matches
count(dc, winner)

In [None]:
# win ratio of DC
(10+67)/177

In [None]:
# matches of RCB
rcb = subset(matches, (team1 == 'Royal Challengers Bangalore'| team2 == 'Royal Challengers Bangalore'))

In [None]:
# total no. of matches played by RCB
nrow(rcb)

In [None]:
# winners in those matches
count(rcb, winner)

In [None]:
# win ratio of RCB
84/180

# Question 2:

In [None]:
# batsmen playing today's match
batsmen = c('S Dhawan', 'AM Rahane', 'SS Iyer', 'RR Pant', 'S Hetmyer', 'AR Patel', 'MP Stoinis',
           'V Kohli', 'AB de Villiers', 'Gurkeerat Singh', 'CH Morris', 'Washington Sundar')

In [None]:
# function to get batting details
get_batting_info = function(bname){
    
    # filtering out the deliveries for each batsman
    data = subset(deli, with(deli, grepl(bname, batsman)))
    # runs scored per match
    runs_per_match = data %>% group_by(match_id) %>% summarise(runs = sum(batsman_runs))
    # balls faced per match
    balls_per_match = subset(data, extra_runs==0) %>% group_by(match_id) %>% summarise(balls = n())
    # merging the dataframes
    sr = merge(runs_per_match, balls_per_match)
    # calculating the strike rate
    sr$strike_rate = (sr$runs/sr$balls)*100
    # 4s hit
    runs_4 = subset(data, batsman_runs == 4) %>% group_by(match_id) %>% summarise(runs_4 = n())
    # 6s hit
    runs_6 = subset(data, batsman_runs == 6) %>% group_by(match_id) %>% summarise(runs_6 = n())
    # merging the dataframes
    boundaries = merge(runs_4, runs_6)
    # final dataframe
    final = merge(sr, boundaries)
    # finding out the total number of boundaries
    final$boundaries = final$runs_4 + final$runs_6
    # returning the result
    return (summary(final[, c('strike_rate', 'runs_4', 'boundaries')]))
}

In [None]:
# for loop to iterate over the batsman names
for (b_name in batsmen){
    # print batsman name
    print(b_name)
    # printing the statistical measures 
    print(get_batting_info(b_name))
}

# Question 3:

In [None]:
# all deliveries bowled by Rabada
rabada = subset(deli, with(deli, grepl("K Rabada", bowler)) )

head(rabada)

In [None]:
# function to get the balls taken for the first wicket
get_balls_for_wicket = function(data){
    
    # empty list to store the number of balls taken to get the 1st wicket
    wickets = numeric()
    
    # iterating over all the matches
    for (m_id in unique(data$match_id)){
        # making a dataframe of all the balls bowled in the match and storing the index 
        # to directly find the number of balls
        balls = row.names(subset(data, (match_id == m_id)))

        # get the index of the 1st wicket taken by Rabada
        first_wicket_df = subset(data, (match_id == m_id & player_dismissed != ""))
        first_wicket = as.integer(row.names(first_wicket_df)[1])
        
        # append the value to the list
        if (!is.null(first_wicket)){
            wickets = c(wickets, balls %>% match(x=first_wicket))
        }
    }
    # in case of matches, where he did not get any wicket, add a 0
    wickets[is.na(wickets)] = 0
    
    # return the result
    return (wickets)
    }

In [None]:
# balls taken to get the 1st wicket in all IPL matches
wickets = get_balls_for_wicket(rabada)

wickets

In [None]:
# Rabada did not take a wicket in 2 out of 18 matches
table(wickets)

In [None]:
# histogram
hist(wickets[wickets!=0], freq=TRUE, breaks=c(0, 6, 12, 18, max(wickets)))

In [None]:
# all deliveries bowled against RCB
rabada_rcb = subset(rabada, batting_team == 'Royal Challengers Bangalore')

In [None]:
# balls taken to get the 1st wicket in matches against RCB
wickets_rcb = get_balls_for_wicket(rabada_rcb)

wickets_rcb

# Question 4

In [None]:
# all bowlers playing today's match
bowlers = c('K Rabada', 'AR Patel', 'R Ashwin', 'HV Patel', 'MP Stoinis',
           'Washington Sundar', 'CH Morris', 'N Saini', 'Mohammed Siraj', 'YS Chahal')

In [None]:
# function to get the bowling details
get_bowling_info = function(bname){
    
    # filtering out the deliveries bowled by each of the bowlers
    data = subset(deli, with(deli, grepl(bname, bowler)))
    # runs given out in each of the match
    runs_per_match = data %>% group_by(match_id) %>% summarise(runs = sum(batsman_runs))
    # overs bowled in each of the match
    overs_per_match = data %>%  group_by(match_id) %>% summarise(overs=length(unique(over)))
    # merging the dataframes
    econ = merge(runs_per_match, overs_per_match)
    # calculating economy
    econ$economy = econ$runs/econ$overs
    # calculating dots 
    dots = subset(data, total_runs==0) %>% group_by(match_id) %>% summarise(dots = n())
    # calculating 4s
    runs_4 = subset(data, batsman_runs == 4) %>% group_by(match_id) %>% summarise(runs_4 = n())

    # merging the dataframes
    details = merge(dots, runs_4, all=TRUE)
    # final dataframe
    final = merge(econ, details, all=TRUE)
    # filling out the null values
    final[is.na(final)] = 0
    
    # returning the results
    return (summary(final[, 4:6]))
}

In [None]:
# for loop to iterate over the bowler names
for (b_name in bowlers){
    # print bowler name
    print(b_name)
    # printing the statistical measures 
    print(get_bowling_info(b_name))
}