-
Notifications
You must be signed in to change notification settings - Fork 55
/
yt_search.R
254 lines (224 loc) · 10.3 KB
/
yt_search.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#' Search YouTube
#'
#' Search for videos, channels and playlists. (By default, the function
#' searches for videos.)
#'
#' @param term Character. Search term; required; no default
#' For using Boolean operators, see the API documentation.
#' Here's some of the relevant information:
#' "Your request can also use the Boolean NOT (-) and OR (|) operators to
#' exclude videos or to
#' find videos that are associated with one of several search terms. For
#' example, to search
#' for videos matching either "boating" or "sailing", set the q parameter
#' value to boating|sailing.
#' Similarly, to search for videos matching either "boating" or "sailing"
#' but not "fishing",
#' set the q parameter value to boating|sailing -fishing"
#' @param max_results Maximum number of items that should be returned.
#' Integer. Optional. Can be between 0 and 50. Default is 50.
#' Search results are constrained to a maximum of 500 videos if type is
#' video and we have a value of \code{channel_id}.
#' @param channel_id Character. Only return search results from this
#' channel; Optional.
#' @param channel_type Character. Optional. Takes one of two values:
#' \code{'any', 'show'}. Default is \code{'any'}
#' @param event_type Character. Optional. Takes one of three values:
#' \code{'completed', 'live', 'upcoming'}
#' @param location Character. Optional. Latitude and Longitude within
#' parentheses, e.g. "(37.42307,-122.08427)"
#' @param location_radius Character. Optional. e.g. "1500m", "5km",
#' "10000ft", "0.75mi"
#' @param published_after Character. Optional. RFC 339 Format.
#' For instance, "1970-01-01T00:00:00Z"
#' @param published_before Character. Optional. RFC 339 Format.
#' For instance, "1970-01-01T00:00:00Z"
#' @param relevance_language Character. Optional. The relevance_language
#' argument instructs the API to return search results that are most relevant to
#' the specified language. The parameter value is typically an ISO 639-1
#' two-letter language code. However, you should use the values zh-Hans for
#' simplified Chinese and zh-Hant for traditional Chinese. Please note that
#' results in other languages will still be returned if they are highly relevant
#' to the search query term.
#' @param type Character. Optional. Takes one of three values:
#' \code{'video', 'channel', 'playlist'}. Default is \code{'video'}.
#' @param video_caption Character. Optional. Takes one of three values:
#' \code{'any'} (return all videos; Default), \code{'closedCaption', 'none'}.
#' Type must be set to video.
#' @param video_type Character. Optional. Takes one of three values:
#' \code{'any'} (return all videos; Default), \code{'episode'}
#' (return episode of shows), 'movie' (return movies)
#' @param video_syndicated Character. Optional. Takes one of two values:
#' \code{'any'} (return all videos; Default), \code{'true'}
#' (return only syndicated videos)
#' @param region_code Character. Required. Has to be a ISO 3166-1 alpha-2 code
#' (see \url{https://www.iso.org/obp/ui/#search}).
#' @param video_definition Character. Optional.
#' Takes one of three values: \code{'any'} (return all videos; Default),
#' \code{'high', 'standard'}
#' @param video_license Character. Optional.
#' Takes one of three values: \code{'any'} (return all videos; Default),
#' \code{'creativeCommon'} (return videos with Creative Commons
#' license), \code{'youtube'} (return videos with standard YouTube license).
#' @param relevance_language Character. Default is "en".
#' @param simplify Boolean. Return a data.frame if \code{TRUE}.
#' Default is \code{TRUE}.
#' If \code{TRUE}, it returns a list that carries additional information.
#' @param page_token specific page in the result set that should be
#' returned, optional
#' @param get_all get all results, iterating through all the results
#' pages. Default is \code{TRUE}.
#' Result is a \code{data.frame}. Optional.
#' @param \dots Additional arguments passed to \code{\link{tuber_GET}}.
#'
#' @return data.frame with 16 elements: \code{video_id, publishedAt,
#' channelId, title, description,
#' thumbnails.default.url, thumbnails.default.width, thumbnails.default.height,
#' thumbnails.medium.url,
#' thumbnails.medium.width, thumbnails.medium.height, thumbnails.high.url,
#' thumbnails.high.width,
#' thumbnails.high.height, channelTitle, liveBroadcastContent}
#'
#' @export
#'
#' @references \url{https://developers.google.com/youtube/v3/docs/search/list}
#'
#' @examples
#'
#' \dontrun{
#'
#' # Set API token via yt_oauth() first
#'
#' yt_search(term = "Barack Obama")
#' yt_search(term = "Barack Obama", published_after = "2016-10-01T00:00:00Z")
#' yt_search(term = "Barack Obama", published_before = "2016-09-01T00:00:00Z")
#' yt_search(term = "Barack Obama", published_before = "2016-03-01T00:00:00Z",
#' published_after = "2016-02-01T00:00:00Z")
#' yt_search(term = "Barack Obama", published_before = "2016-02-10T00:00:00Z",
#' published_after = "2016-01-01T00:00:00Z")
#' }
yt_search <- function(term = NULL, max_results = 50, channel_id = NULL,
channel_type = NULL, type = "video", event_type = NULL,
location = NULL, location_radius = NULL,
published_after = NULL, published_before = NULL,
video_definition = "any", video_caption = "any",
video_license = "any", video_syndicated = "any",
region_code = NULL, relevance_language = "en",
video_type = "any", simplify = TRUE, get_all = TRUE,
page_token = NULL, ...) {
if (!is.character(term)) stop("Must specify a search term.\n")
if (max_results < 0 | max_results > 50) {
stop("max_results only takes a value between 0 and 50.")
}
if (type == "video" && !(video_license %in% c("any", "creativeCommon", "youtube"))) {
stop("video_license can only take values: any, creativeCommon, or youtube.")
}
if (type == "video" && !(video_syndicated %in% c("any", "true"))) {
stop("video_syndicated can only take values: any or true.")
}
if (type == "video" && !(video_type %in% c("any", "episode", "movie"))) {
stop("video_type can only take values: any, episode, or movie.")
}
if (is.character(published_after)) {
if (is.na(as.POSIXct(published_after, format = "%Y-%m-%dT%H:%M:%SZ"))) {
stop("The date is not properly formatted in RFC 339 Format.")
}
}
if (is.character(published_before)) {
if (is.na(as.POSIXct(published_before, format = "%Y-%m-%dT%H:%M:%SZ"))) {
stop("The date is not properly formatted in RFC 339 Format.")
}
}
if (type != "video") {
video_caption <- video_license <- video_definition <-
video_type <- video_syndicated <- NULL
}
if (!is.null(location) && is.null(location_radius)) {
stop("Location radius must be specified with location")
}
querylist <- list(part = "snippet",
q = term,
maxResults = max_results,
channelId = channel_id,
type = type,
channelType = channel_type,
eventType = event_type,
location = location,
locationRadius = location_radius,
publishedAfter = published_after,
publishedBefore = published_before,
videoDefinition = video_definition,
videoCaption = video_caption,
videoType = video_type,
videoSyndicated = video_syndicated,
videoLicense = video_license,
regionCode = region_code,
relevanceLanguage = relevance_language,
pageToken = page_token)
# Sending NULLs to Google seems to short its wiring
querylist <- querylist[names(querylist)[sapply(querylist, function(x) !is.null(x))]]
res <- tuber_GET("search", querylist, ...)
if (identical(get_all, TRUE)) {
if (type == "video") {
simple_res <- lapply(res$items,
function(x) {
c(video_id = x$id$videoId, unlist(x$snippet))
})
} else {
simple_res <- lapply(res$items, function(x) unlist(x$snippet))
}
fin_res <- ldply(simple_res, rbind)
page_token <- res$nextPageToken
while (is.character(page_token)) {
a_res <- yt_search(part = "snippet",
term = term,
max_results = max_results,
channel_id = channel_id,
type = type,
relevance_language = relevance_language,
region_code = region_code,
channel_type = channel_type,
event_type = event_type,
location = location,
location_radius = location_radius,
published_after = published_after,
published_before = published_before,
video_definition = video_definition,
video_caption = video_caption,
video_type = video_type,
video_syndicated = video_syndicated,
video_license = video_license,
simplify = FALSE, get_all = FALSE,
page_token = page_token)
if (type == "video") {
a_simple_res <- lapply(a_res$items,
function(x) {
c(video_id = x$id$videoId, unlist(x$snippet))
})
} else {
a_simple_res <- lapply(a_res$items, function(x) unlist(x$snippet))
}
a_resdf <- ldply(a_simple_res, rbind)
fin_res <- rbind(fin_res, a_resdf)
page_token <- a_res$nextPageToken
}
return(fin_res)
}
if (identical(simplify, TRUE)) {
if (res$pageInfo$totalResults != 0) {
if (type == "video") {
simple_res <- lapply(res$items,
function(x) {
c(video_id = x$id$videoId, unlist(x$snippet))
})
} else {
simple_res <- lapply(res$items, function(x) unlist(x$snippet))
}
resdf <- ldply(simple_res, rbind)
return(resdf)
} else {
return(data.frame())
}
}
return(res)
}