/
mini.R
156 lines (145 loc) · 7.32 KB
/
mini.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
###|
###| "Mini" section (OLD VERSION)
###| Create a dataframe where each punter appears once
###|
#' Summarize data for players
#' @description This function is essentially a convenient wrapper for \code{dplyr::summarise}
#' which includes all of the relevant columns from a \code{puntr}-style data frame.
#' NOTE: \code{puntr::create_mini}, \code{puntr::create_miniY}, and \code{puntr::create_miniG} are being phased out
#' in favor of \code{puntr::by_punters}, \code{puntr::by_punter_seasons}, and \code{puntr::by_punter_games}
#' @param punts The play-by-play punting data to be summarized
#' @param threshold The minimum number of career punts needed to be included, defaults to 64
#' @param ... Any additional arguments will be passed through to \code{dplyr::summarise}
#' @return A tibble \code{mini} where each row is a punter and each column is a stat
#' @examples
#' \dontrun{
#' create_mini(punts)
#' }
#' @export
create_mini <- function(punts, ..., threshold=64) {
mini <- punts %>%
dplyr::group_by(punter_player_name) %>%
dplyr::filter(dplyr::n() > threshold) %>%
dplyr::summarize(NumPunts = dplyr::n(),
Gross = mean(GrossYards),
Net = mean(NetYards),
RERUN = mean(RERUN),
SHARP = mean(SHARP),
SHARP_OF = mean(SHARP_OF, na.rm = TRUE),
SHARP_PD = mean(SHARP_PD, na.rm = TRUE),
SHARPnet = mean(SHARPnet),
SHARPnet_OF = mean(SHARPnet_OF, na.rm = TRUE),
SHARPnet_PD = mean(SHARPnet_PD, na.rm = TRUE),
SHARP_RERUN = mean(SHARP_RERUN),
SHARP_RERUN_OF = mean(SHARP_RERUN_OF, na.rm = TRUE),
SHARP_RERUN_PD = mean(SHARP_RERUN_PD, na.rm = TRUE),
Punt_eaepaae_avg = mean(pEPA),
pEPA = mean(pEPA),
Punt_eaepaae_tot = sum(pEPA),
returnpct = mean(returned),
first_year = min(season),
last_year = max(season),
team_logo_espn = getmode_local(team_logo_espn),
team_color = getmode_local(team_color),
team_color2 = getmode_local(team_color2),
...
)
return(mini)
}
#' Summarize data for player-seasons
#' @description This function is essentially a convenient wrapper for \code{dplyr::summarise}
#' which includes all of the relevant columns from a \code{puntr}-style data frame.
#' This function differs from \code{puntr::create_mini}
#' in that it groups by both \code{punter_player_name} and \code{season} (and adds a convenient \code{seasonid} column to uniquely identify each row).
#' NOTE: \code{puntr::create_mini}, \code{puntr::create_miniY}, and \code{puntr::create_miniG} are being phased out
#' in favor of \code{puntr::by_punters}, \code{puntr::by_punter_seasons}, and \code{puntr::by_punter_games}
#' @param punts The play-by-play punting data to be summarized
#' @param threshold The minimum number of punts for a season to be included, defaults to 32
#' @param ... Any additional arguments will be passed through to \code{dplyr::summarise}
#' @return A tibble \code{miniY} where each row is a punter-season and each column is a stat
#' @examples
#' \dontrun{
#' create_miniY(punts)
#' }
#' @export
create_miniY <- function(punts, ..., threshold=32) {
mini <- punts %>%
dplyr::group_by(punter_player_name, season) %>%
dplyr::filter(dplyr::n() > threshold) %>%
dplyr::summarize(NumPunts = dplyr::n(),
Gross = mean(GrossYards),
Net = mean(NetYards),
RERUN = mean(RERUN),
SHARP = mean(SHARP),
SHARP_OF = mean(SHARP_OF, na.rm = TRUE),
SHARP_PD = mean(SHARP_PD, na.rm = TRUE),
SHARPnet = mean(SHARPnet),
SHARPnet_OF = mean(SHARPnet_OF, na.rm = TRUE),
SHARPnet_PD = mean(SHARPnet_PD, na.rm = TRUE),
SHARP_RERUN = mean(SHARP_RERUN),
SHARP_RERUN_OF = mean(SHARP_RERUN_OF, na.rm = TRUE),
SHARP_RERUN_PD = mean(SHARP_RERUN_PD, na.rm = TRUE),
Punt_eaepaae_avg = mean(pEPA),
pEPA = mean(pEPA),
Punt_eaepaae_tot = sum(pEPA),
returnpct = mean(returned),
team_logo_espn = getmode_local(team_logo_espn),
team_color = getmode_local(team_color),
team_color2 = getmode_local(team_color2),
...
)
mini <- mini %>%
dplyr::mutate(seasonid = purrr::map2_chr(punter_player_name, season, glue::glue, .sep=" "))
return(mini)
}
#' Summarize data for player-games
#' @description This function is essentially a convenient wrapper for \code{dplyr::summarise}
#' which includes all of the relevant columns from a \code{puntr}-style data frame.
#' This function differs from \code{puntr::create_mini} and \code{puntr::create_miniY}
#' in that it groups by \code{punter_player_name}, \code{season}, and \code{week}
#' (and adds a convenient \code{weekid} column to uniquely identify each row).
#' NOTE: \code{puntr::create_mini}, \code{puntr::create_miniY}, and \code{puntr::create_miniG} are being phased out
#' in favor of \code{puntr::by_punters}, \code{puntr::by_punter_seasons}, and \code{puntr::by_punter_games}
#' @param punts The play-by-play punting data to be summarized
#' @param threshold The minimum number of punts for a week to be included, defaults to 1
#' @param ... Any additional arguments will be passed through to \code{dplyr::summarise}
#' @return A tibble \code{miniG} where each row is a punter-week and each column is a stat
#' @examples
#' \dontrun{
#' create_miniG(punts)
#' }
#' @export
create_miniG <- function(punts, ..., threshold=1) {
mini <- punts %>%
dplyr::group_by(punter_player_name, season, week) %>%
dplyr::filter(dplyr::n() >= threshold) %>%
dplyr::summarize(NumPunts = dplyr::n(),
Gross = mean(GrossYards),
Net = mean(NetYards),
RERUN = mean(RERUN),
SHARP = mean(SHARP),
SHARP_OF = mean(SHARP_OF, na.rm = TRUE),
SHARP_PD = mean(SHARP_PD, na.rm = TRUE),
SHARPnet = mean(SHARPnet),
SHARPnet_OF = mean(SHARPnet_OF, na.rm = TRUE),
SHARPnet_PD = mean(SHARPnet_PD, na.rm = TRUE),
SHARP_RERUN = mean(SHARP_RERUN),
SHARP_RERUN_OF = mean(SHARP_RERUN_OF, na.rm = TRUE),
SHARP_RERUN_PD = mean(SHARP_RERUN_PD, na.rm = TRUE),
Punt_eaepaae_avg = mean(pEPA),
pEPA = mean(pEPA),
Punt_eaepaae_tot = sum(pEPA),
returnpct = mean(returned),
team_logo_espn = getmode_local(team_logo_espn),
team_color = getmode_local(team_color),
team_color2 = getmode_local(team_color2),
...
)
mini <- mini %>%
dplyr::mutate(weekid = purrr::pmap_chr(list(punter_player_name, ' ', season, ' w', week), glue::glue))
return(mini)
}
getmode_local <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}