In [None]:
# import library
library(dplyr)

In [None]:
# read datasets
matches = read.csv("matches.csv")
deli = read.csv("deliveries.csv")

# Question 1:

In [None]:
# teams playing today's match
teams = c("Sunrisers Hyderabad", "Deccan Chargers", "Royal Challengers Bangalore")

In [None]:
# all matches between SRH and RCB
srh_rcb = subset(matches, (team1 %in% teams & team2 %in% teams))

head(srh_rcb)

In [None]:
srh_rcb$winner

In [None]:
# victories of the teams against each other
count(srh_rcb, winner)

In [None]:
# matches of SRH
srh = subset(matches, (team1 %in% c('Sunrisers Hyderabad', 'Deccan Chargers')| team2 %in% c('Sunrisers Hyderabad', 'Deccan Chargers')))

In [None]:
# total no. of matches played by SRH
nrow(srh)

In [None]:
# winners in those matches
count(srh, winner)

In [None]:
# win ratio of SRH
(58+29)/183

In [None]:
# matches of RCB
rcb = subset(matches, (team1 == 'Royal Challengers Bangalore'| team2 == 'Royal Challengers Bangalore'))

In [None]:
# total no. of matches played by RCB
nrow(rcb)

In [None]:
# winners in those matches
count(rcb, winner)

In [None]:
# win ratio of RCB
84/180

# Question 2:

In [None]:
# all deliveries bowled by Sandeep Sharma
sandeep = subset(deli, with(deli, grepl('Sandeep Sharma', bowler)))

In [None]:
#runs given by him per match
runs = sandeep %>%  group_by(match_id) %>% summarise(runs=sum(wide_runs, noball_runs, batsman_runs))

In [None]:
# overs bowled by him per match
overs = sandeep %>%  group_by(match_id) %>% summarise(overs=length(unique(over)))

In [None]:
# merging the dataframes
df = merge(runs, overs)

# calculating the economy
df$economy = df$runs/df$overs

In [None]:
# histogram
hist(df$economy, freq=TRUE, breaks=c(0, 7.5, max(df$economy)))

In [None]:
# boundaries given by him per match
boundaries = subset(sandeep, batsman_runs %in% c(4, 6)) %>%  group_by(match_id) %>% summarise(boundaries=n())

# histogram
hist(boundaries$boundaries, freq=TRUE, breaks=c(0, 4, max(boundaries$boundaries)))

In [None]:
# all matches he has played against RCB
rcb_match_ids = unique(subset(sandeep, batting_team=='Royal Challengers Bangalore')$match_id)

In [None]:
# economy against RCB
subset(df, match_id %in% rcb_match_ids)

In [None]:
# boundaries against RCB
subset(boundaries, match_id %in% rcb_match_ids)

# Question 3:

In [None]:
# batsmen playing today's match
batsmen = c('V Kohli', 'AB de Villiers', 'Gurkeerat Singh', 'CH Morris', 'Washington Sundar',
           'DA Warner', 'WP Saha', 'J Bairstow', 'MK Pandey', 'KS Williamson', 'JO Holder')

In [None]:
# function to get batting details
get_batting_info = function(bname, df){
    
    # filtering out the deliveries for each batsman
    data = subset(df, with(df, grepl(bname, batsman)))
    
    # runs scored per match
    runs_per_match = data %>% group_by(match_id) %>% summarise(runs = sum(batsman_runs))
    # balls faced per match
    balls_per_match = subset(data, extra_runs==0) %>% group_by(match_id) %>% summarise(balls = n())
    # merging the dataframes
    sr = merge(runs_per_match, balls_per_match)
    # calculating the strike rate
    sr$strike_rate = (sr$runs/sr$balls)*100
   
    # 4s hit
    runs_4 = subset(data, batsman_runs == 4) %>% group_by(match_id) %>% summarise(runs_4 = n())
    # 6s hit
    runs_6 = subset(data, batsman_runs == 6) %>% group_by(match_id) %>% summarise(runs_6 = n())
    # merging the dataframes
    boundaries = merge(runs_4, runs_6)
    
    # final dataframe
    final = merge(sr, boundaries)
    # finding out the total number of boundaries
    final$boundaries = final$runs_4 + final$runs_6
    # returning the result
    return (summary(final[, c('strike_rate', 'runs_4', 'boundaries')]))
}

## Performance of the batsmen in IPL

In [None]:
# for loop to iterate over the batsman names
for (b_name in batsmen){
    # print batsman name
    print(b_name)
    # printing the statistical measures 
    print(get_batting_info(b_name, deli))
}

## Performance of the batsmen in IPL 2019

In [None]:
# for loop to iterate over the batsman names
for (b_name in batsmen){
    # print batsman name
    print(b_name)
    # printing the statistical measures 
    print(get_batting_info(b_name, subset(deli, match_id > 11000)))
}

# Question 4:

In [None]:
# function to get ratio for the batsman
get_ratio = function(bname, df){
    # deliveries faced  by the batsman
    data = subset(df, with(df, grepl(bname, batsman)))
    
    # runs scored in 1s, 2s and 3s per match by the batsman
    runs_123 = subset(data, batsman_runs %in% c(1, 2, 3)) %>% group_by(match_id) %>% summarise(runs123 = sum(batsman_runs))
    # runs scored in boundaries per match by the batsman
    runs_boundaries = subset(data, batsman_runs %in% c(4, 6)) %>% group_by(match_id) %>% summarise(runs_b = sum(batsman_runs))
    # merging the dataframes
    runs = merge(runs_123, runs_boundaries, all=TRUE)
    # filling the null values with 0
    runs[is.na(runs)] = 0
    
    # balls faced per match
    balls_per_match = data %>% group_by(match_id) %>% summarise(balls = n())
    
    # merging the dataframes
    tmp = merge(runs, balls_per_match, all=TRUE)
    # filling the null values with 0
    tmp[is.na(tmp)] = 0
    
    # runs scored per match
    runs_per_match = data %>% group_by(match_id) %>% summarise(runs = sum(batsman_runs))
    
    # merging to create the final dataframe
    result = merge(runs_per_match, tmp, all=TRUE)
    # filling the null values with 0
    result[is.na(result)] = 0
    # filtering out the matches in which the batsman scored less than 15 runs
    result = subset(result, balls>10)
    
    # calculating the ratio
    result$ratio = (result$runs_b - result$runs123)/result$runs
    # returning the median measure
    return (summary(result$ratio))
}

## Performance of the batsmen in IPL

In [None]:
# for loop to iterate over the batsman names
for (b_name in batsmen){
    # print batsman name
    print(b_name)
    # printing the statistical measures 
    print(get_ratio(b_name, deli))
}

## Performance of the batsmen in IPL 2019

In [None]:
# for loop to iterate over the batsman names
for (b_name in batsmen){
    # print batsman name
    print(b_name)
    # printing the statistical measures 
    print(get_ratio(b_name, subset(deli, match_id > 11000)))
}

# Question 5

In [None]:
# total wickets taken in each IPL match in the powerplay overs
total_wickets = subset(deli, (player_dismissed != '') & (over %in% seq(1, 6, 1))) %>% group_by(match_id) %>% summarise(wickets = n())

head(total_wickets)

In [None]:
# histogram
hist(total_wickets$wickets, freq=TRUE, breaks=c(0, 2, 3, 5, 6, max(total_wickets$wickets)))

In [None]:
# all deliveries in SRH vs RCB matches
srh_rcb_deli = subset(deli, (bowling_team %in% teams & batting_team %in% teams))

head(srh_rcb_deli)

In [None]:
# total wickets taken in SRH vs RCB matches in the powerplay overs
wickets_srh_rcb = subset(srh_rcb_deli, (player_dismissed != '') & (over %in% seq(1, 6, 1))) %>% group_by(match_id) %>% summarise(wickets = n())

# histogram
hist(wickets_srh_rcb$wickets, freq=TRUE, breaks=c(0, 2, 3, 5, 6, max(wickets_srh_rcb$wickets)))

In [None]:
# all deliveries faced by RCB
rcb_bat = subset(deli, batting_team == 'Royal Challengers Bangalore')

# total wickets lost by RCB in each match in the powerplay overs
wickets_rcb = subset(rcb_bat, (player_dismissed != '') & (over %in% seq(1, 6, 1))) %>% group_by(match_id) %>% summarise(wickets = n())

#statistical measures
summary(wickets_rcb$wickets)

In [None]:
# all deliveries faced by SRH
srh_bat = subset(deli, batting_team %in% c('Deccan Chargers', 'Sunrisers Hyderabad'))

# total wickets lost by SRH in each match in the powerplay overs
wickets_srh = subset(srh_bat, (player_dismissed != '') & (over %in% seq(1, 6, 1))) %>% group_by(match_id) %>% summarise(wickets = n())

#statistical measures
summary(wickets_srh$wickets)