Allow link headers returned from paged requests to used in new reques…

…ts (#52) * allow link headers returned from paged requests to be easily passed back in to new requests * use Compat
JuliaWeb · May 4, 2016 · 4e3dab1 · 4e3dab1
1 parent e44efe3
commit 4e3dab1
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 47 deletions.
diff --git a/README.md b/README.md
@@ -187,33 +187,54 @@ Paginated methods return both the response values, and some pagination metadata.
 For example, let's request a couple pages of GitHub.jl's PRs, and configure our result pagination to see how it works:
 
 ```julia
-julia> myparams = Dict("state" => "all", "per_page" => 3, "page" => 2); # show all PRs (both open and closed), and give me 3 items per page starting at page 2
+# show all PRs (both open and closed), and give me 3 items per page starting at page 2
+julia> myparams = Dict("state" => "all", "per_page" => 3, "page" => 2);
 
 julia> prs, page_data = pull_requests("JuliaWeb/GitHub.jl"; params = myparams, page_limit = 2);
 
 julia> prs # 3 items per page * 2 page limit == 6 items, as expected
 6-element Array{GitHub.PullRequest,1}:
+ GitHub.PullRequest(44)
+ GitHub.PullRequest(43)
+ GitHub.PullRequest(42)
+ GitHub.PullRequest(41)
  GitHub.PullRequest(39)
  GitHub.PullRequest(38)
- GitHub.PullRequest(37)
- GitHub.PullRequest(34)
- GitHub.PullRequest(32)
- GitHub.PullRequest(30)
 
 julia> page_data
-Dict{UTF8String,Int64} with 3 entries:
-  "last"  => 5
-  "left"  => 2
-  "next"  => 4
+Dict{UTF8String,UTF8String} with 4 entries:
+  "prev"  => "https://api.github.com/repositories/16635105/pulls?page=2&per_page=3&state=all"
+  "next"  => "https://api.github.com/repositories/16635105/pulls?page=4&per_page=3&state=all"
+  "first" => "https://api.github.com/repositories/16635105/pulls?page=1&per_page=3&state=all"
+  "last"  => "https://api.github.com/repositories/16635105/pulls?page=7&per_page=3&state=all"
 ```
 
 In the above, `prs` contains the results from page 2 and 3. We know this because we specified page 2 as our starting page (`"page" => 2`), and limited the response to 2 pages max (`page_limit = 2`). In addition, we know that exactly 2 pages were actually retrieved, since there are 6 items and we said each page should only contain 3 items (`"per_page" => 3`).
 
-The values provided by `page_data` are calculated by assuming the same `per_page` value given in the original request. Here's a description of each key in `page_data`:
+The values provided by `page_data` are the same values that are included in the [Link header](https://developer.github.com/v3/#link-header) of the last requested item. You can continue paginating by starting a new paginated request at one of these links using the `start_page` keyword argument:
+
+```julia
+# Continue paging, starting with `page_data["next"]`.
+# Note that the `params` kwarg can't be used here because
+# the link passed to `start_page` has its own parameters
+julia> prs2, page_data2 = pull_requests("JuliaWeb/GitHub.jl"; page_limit = 2, start_page = page_data["next"]);
 
-- `page_data["last"]`: The last page of results available to be queried. In our example, the final page we could query for is page 5.
-- `page_data["left"]`: The number of pages left between the final page delivered in our result and `page_data["last"]`. Our final page was page 3, and the last page is page 5, so we have 2 pages of results left to retrieve.
-- `page_data["next"]`: The index of the next page after the final page delivered in our result. In the example, our final page was page 3, so the next page will be 4.
+julia> prs2
+6-element Array{GitHub.PullRequest,1}:
+ GitHub.PullRequest(37)
+ GitHub.PullRequest(34)
+ GitHub.PullRequest(32)
+ GitHub.PullRequest(30)
+ GitHub.PullRequest(24)
+ GitHub.PullRequest(22)
+
+julia> page_data2
+Dict{UTF8String,UTF8String} with 4 entries:
+  "prev"  => "https://api.github.com/repositories/16635105/pulls?page=4&per_page=3&state=all"
+  "next"  => "https://api.github.com/repositories/16635105/pulls?page=6&per_page=3&state=all"
+  "first" => "https://api.github.com/repositories/16635105/pulls?page=1&per_page=3&state=all"
+  "last"  => "https://api.github.com/repositories/16635105/pulls?page=7&per_page=3&state=all"
+```
 
 ## Handling Webhook Events
 

diff --git a/REQUIRE b/REQUIRE
@@ -1,5 +1,6 @@
 julia 0.4
 
+Compat
 JSON
 HttpServer
 MbedTLS

diff --git a/src/GitHub.jl b/src/GitHub.jl
@@ -1,5 +1,7 @@
 module GitHub
 
+using Compat
+
 ##########
 # import #
 ##########

diff --git a/src/utils/requests.jl b/src/utils/requests.jl
@@ -47,55 +47,55 @@ rate_limit(; options...) = gh_get_json("/rate_limit"; options...)
 # Pagination #
 ##############
 
-ispaginated(r) = haskey(r.headers, "Link")
-
-isnextlink(str) = contains(str, "rel=\"next\"")
-islastlink(str) = contains(str, "rel=\"last\"")
-
-has_next_page(r) = isnextlink(r.headers["Link"])
-has_last_page(r) = islastlink(r.headers["Link"])
-
-split_links(r) = split(r.headers["Link"], ',')
-get_link(pred, links) = match(r"<.*?>", links[findfirst(pred, links)]).match[2:end-1]
-
-get_next_page(r) = get_link(isnextlink, split_links(r))
-get_last_page(r) = get_link(islastlink, split_links(r))
-
-function request_next_page(r, headers)
-    nextlink = get_link(isnextlink, split_links(r))
-    return Requests.get(nextlink, headers = headers)
+has_page_links(r) = haskey(r.headers, "Link")
+get_page_links(r) = split(r.headers["Link"], ',')
+
+function find_page_link(links, rel)
+    relstr = "rel=\"$(rel)\""
+    for i in 1:length(links)
+        if contains(links[i], relstr)
+            return i
+        end
+    end
+    return 0
 end
 
-function github_paged_request(request_method, endpoint; page_limit = Inf,
-                              auth = AnonymousAuth(), handle_error = true,
-                              headers = Dict(), params = Dict())
-    r = github_request(request_method, endpoint;
-                       auth = auth, handle_error = handle_error,
-                       headers = headers, params = params)
+extract_page_url(link) = match(r"<.*?>", link).match[2:end-1]
+
+function github_paged_get(endpoint; page_limit = Inf, start_page = "", handle_error = true,
+                          headers = Dict(), params = Dict(), options...)
+    if isempty(start_page)
+        r = gh_get(endpoint; handle_error = handle_error, headers = headers, params = params, options...)
+    else
+        @assert isempty(params) "`start_page` kwarg is incompatible with `params` kwarg"
+        r = Requests.get(start_page, headers = headers)
+    end
     results = HttpCommon.Response[r]
-    init_page = get(params, "page", 1)
     page_data = Dict{GitHubString, GitHubString}()
-    if ispaginated(r)
+    if has_page_links(r)
         page_count = 1
-        while has_next_page(r) && page_count < page_limit
-            next_page = get_next_page(r)
-            r = request_next_page(r, headers)
+        while page_count < page_limit
+            links = get_page_links(r)
+            next_index = find_page_link(links, "next")
+            next_index == 0 && break
+            r = Requests.get(extract_page_url(links[next_index]), headers = headers)
             handle_error && handle_response_error(r)
             push!(results, r)
             page_count += 1
         end
-        if has_last_page(r)
-            page_data["last"] = get_last_page(r)
-        end
-        if has_next_page(r)
-            page_data["next"] = get_next_page(r)
+        links = get_page_links(r)
+        for page in ("next", "last", "first", "prev")
+            page_index = find_page_link(links, page)
+            if page_index != 0
+                page_data[page] = extract_page_url(links[page_index])
+            end
         end
     end
     return results, page_data
 end
 
 function gh_get_paged_json(endpoint = ""; options...)
-    results, page_data = github_paged_request(Requests.get, endpoint; options...)
+    results, page_data = github_paged_get(endpoint; options...)
     return mapreduce(Requests.json, vcat, results), page_data
 end