In [None]:
# import library
library(dplyr)

In [None]:
# read datasets
matches = read.csv("matches.csv")
deli = read.csv("deliveries.csv")

# Question 1:

In [None]:
# teams playing today's match
teams = c("Kings XI Punjab", "Rajasthan Royals")

In [None]:
# all matches between KXIP and RR
kxip_rr= subset(matches, (team1 %in% teams & team2 %in% teams))

head(kxip_rr)

In [None]:
kxip_rr$winner

In [None]:
# victories of the teams against each other
count(kxip_rr, winner)

In [None]:
# matches of KXIP
kxip = subset(matches, (team1 == 'Kings XI Punjab'| team2 == 'Kings XI Punjab' ))

In [None]:
# total no. of matches played by KXIP
nrow(kxip)

In [None]:
# winners in those matches
count(kxip, winner)

In [None]:
# win ratio of KXIP
82/176

In [None]:
# matches of RR
rr = subset(matches, (team1 == 'Rajasthan Royals'| team2 == 'Rajasthan Royals' ))

In [None]:
# total no. of matches played by RR
nrow(rr)

In [None]:
# winners in those matches
count(rr, winner)

In [None]:
# win ratio of RR
75/147

# Question 2:

In [None]:
# batsmen playing in today's match
batsman = c('KL Rahul', 'Mandeep Singh', 'CH Gayle', 'N Pooran', 'GJ Maxwell', 'DJ Hooda',
           'RV Uthappa', 'BA Stokes', 'SV Samson', 'JC Buttler', 'SPD Smith', 'R Parag')

In [None]:
# function to get the runs through boundaries to total runs ratio
get_boundary_ratio = function(b_name){
    # grouping by matches and calculating the balls played and total runs scored per match
    runs = subset(deli, with(deli, grepl(b_name, batsman))) %>% group_by(match_id) %>% summarise(balls = n(), 
                                                                                             total_runs = sum(batsman_runs))

    # calculating the runs scored through boundaries per match
    boundaries = subset(deli, with(deli, grepl(b_name, batsman)) & batsman_runs %in% c(4, 6) ) %>% group_by(match_id) %>% summarise(boundary_runs = sum(batsman_runs))
    
    # merging the two dataframes
    data = merge(runs, boundaries)

    # filtering out the matches in which the batsman played less than 10 balls
    data = subset(data, balls >= 10)

    # calculating the ratio
    data$ratio = data$boundary_runs/data$total_runs
    
    # returning the statistical measures
    return (summary(data$ratio))
}

In [None]:
# for loop to iterate over the batsman names
for (b_name in batsman){
    # print batsman name
    print(b_name)
    # printing the statistical measures 
    print(get_boundary_ratio(b_name))
}

# Question 4:

In [None]:
# all dot balls per match in IPL
dots = subset(deli, total_runs == 0) %>% group_by(match_id) %>% summarise(dot_balls=n())

# histogram
hist(dots$dot_balls, freq=TRUE, breaks=c(0, 60, 90, 120, max(dots$dot_balls)))

In [None]:
# all deliveries in KXIP vs RR matches
deli_kxip_rr = subset(deli, bowling_team %in% teams & batting_team %in% teams)

# all dot balls in KXIP vs RR matches
dots_kxip_rr = subset(deli_kxip_rr, total_runs == 0) %>% group_by(match_id) %>% summarise(dot_balls=n())

# histogram
hist(dots_kxip_rr$dot_balls, freq=TRUE, breaks=c(0, 60, 90, 120, max(dots_kxip_rr$dot_balls)))

In [None]:
# all deliveries faced by KXIP
deli_kxip_bat = subset(deli, batting_team == 'Kings XI Punjab')

# all dot balls by KXIP
dots_kxip = subset(deli_kxip_bat, total_runs == 0) %>% group_by(match_id) %>% summarise(dot_balls=n())

# statistical measures
summary(dots_kxip$dot_balls)

In [None]:
# all deliveries faced by RR
deli_rr_bat = subset(deli, batting_team == 'Rajasthan Royals')

# all dot balls by RR
dots_rr = subset(deli_rr_bat, total_runs == 0) %>% group_by(match_id) %>% summarise(dot_balls=n())

# statistical measures
summary(dots_rr$dot_balls)

# Question 5:

In [None]:
# all deliveries bowled by Shami
shami = subset(deli, with(deli, grepl("Shami", bowler)) )

head(shami)

In [None]:
# function to get the balls taken for the first wicket
get_balls_for_wicket = function(data){
    
    # empty list to store the number of balls taken to get the 1st wicket
    wickets = numeric()
    
    # iterating over all the matches
    for (m_id in unique(data$match_id)){
        # making a dataframe of all the balls bowled in the match and storing the index 
        # to directly find the number of balls
        balls = row.names(subset(data, (match_id == m_id)))

        # get the index of the 1st wicket taken by Rashid
        first_wicket_df = subset(data, (match_id == m_id & player_dismissed != ""))
        first_wicket = as.integer(row.names(first_wicket_df)[1])
        
        # append the value to the list
        if (!is.null(first_wicket)){
            wickets = c(wickets, balls %>% match(x=first_wicket))
        }
    }
    # in case of matches, where he did not get any wicket, add a 0
    wickets[is.na(wickets)] = 0
    
    # return the result
    return (wickets)
    }

In [None]:
# balls taken to get the 1st wicket in all IPL matches
wickets = get_balls_for_wicket(shami)

wickets

In [None]:
# Shami did not take a wicket in 14 out of 49 matches
table(wickets)

In [None]:
# histogram
hist(wickets[wickets!=0], freq=TRUE, breaks=c(0, 6, 12, 18, max(wickets)))

In [None]:
# all deliveries bowled against RR
shami_rr = subset(shami, batting_team == 'Rajasthan Royals')

In [None]:
# balls taken to get the 1st wicket in matches against RR
wickets_rr = get_balls_for_wicket(shami_rr)

wickets_rr