# Pagination

In [1]:
# Loading the packages
import requests
import json

In [2]:
# We will use API for job listings on Github
# Documentation can be found on: https://jobs.github.com/api

In [3]:
# define base URL
base_site = "https://jobs.github.com/positions.json"

In [4]:
# Submiting a GET request
r = requests.get(base_site, params = {"description": "data science", "location": "los angeles"})
r.status_code

200

In [5]:
# Inspect the response
r.json()

[{'id': '11cbce13-e6cd-4c79-b904-d292b569b22f',
  'type': 'Full Time',
  'url': 'https://jobs.github.com/positions/11cbce13-e6cd-4c79-b904-d292b569b22f',
  'created_at': 'Wed Jul 15 12:17:30 UTC 2020',
  'company': 'OpenPlay',
  'company_url': 'http:',
  'location': 'Santa Monica',
  'title': 'Software Enginee',
  'description': '<h2>Company Description</h2>\n<p><strong>Hello, we’re OpenPlay!</strong></p>\n<p>We’re looking for a full-stack engineer to work with us on our web-based music distribution application. You’ll work with us to design, develop, and deploy new application features for one of the world’s largest record companies. This is a full-time position that can be local (Los Angeles) or fully remote.</p>\n<h2>What’s an OpenPlay?</h2>\n<p>We’re a small software team making distribution and workflow products for some of the world’s largest music companies. We’re big on testing, constant incremental improvement, craftsmanship, and pragmatism, and so far have been able to use th

In [6]:
# How many jobs have been found?
len(r.json())

4

### The page parameter

In [7]:
# Let's search for all jobs (no filter parameters)
r =  requests.get(base_site)
r.ok

True

In [8]:
r.json()

[{'id': '47ab6c9d-7ea0-4ae0-94d2-27e0e02ead41',
  'type': 'Full Time',
  'url': 'https://jobs.github.com/positions/47ab6c9d-7ea0-4ae0-94d2-27e0e02ead41',
  'created_at': 'Thu Aug 13 14:53:11 UTC 2020',
  'company': 'Verition Fund Management',
  'company_url': 'http://www.veritionfund.com',
  'location': 'NYC/CT/Remote',
  'title': 'Senior Developer',
  'description': '<p>Verition Fund Management is looking to hire a Senior Developer to join the Firm’s low latency trading platform team. The role is full time, and can be remote for the right candidate.</p>\n<p>Required:\n-Advanced hands on C++ 11/14/17 experience developing multithreaded applications\n-Experience debugging with gdb\n-Experience writing in Python (pandas, numpy) as well as Bash and GTest\n-Experienced on linux systems\n-Familiar with git, TCP/IP, UDP, ZMQ (or other messaging libraries)</p>\n<p>The following would be a plus:\n-Work experience in financial industry, specifically on low latency trading systems\n-Experience i

In [9]:
len(r.json())

50

In [10]:
# According to the documentation, the results are split into pages
# These were the results from the first page only

In [11]:
# To get the next page, we need to make another GET request with parameter 'page'
r =  requests.get(base_site, params = {"page": 2})
r.status_code

200

In [12]:
r.json()

[{'id': '5494086a-cf57-46f1-86bb-206bd7f9284b',
  'type': 'Full Time',
  'url': 'https://jobs.github.com/positions/5494086a-cf57-46f1-86bb-206bd7f9284b',
  'created_at': 'Mon Aug 03 08:37:13 UTC 2020',
  'company': 'InnoGames GmbH',
  'company_url': 'https://www.innogames.com',
  'location': 'Hamburg',
  'title': 'Java Software Developer - Core Team',
  'description': '<p>As <strong>Java Software Developer</strong>, you work closely with our game teams to shape the foundation of our next big hits and contribute as a productive member of an agile development team in all phases of the development lifecycle.</p>\n<p><strong>Your mission:</strong></p>\n<ul>\n<li>Develop features shared across our game backends</li>\n<li>Improve performance-critical components, increase the robustness and scalability of our software</li>\n<li>Improve our libraries and backend services with your knowledge of software architecture and high-quality code</li>\n<li>Maintain and extend our development tools and e

In [13]:
len(r.json())

50

In [14]:
# Making a request to a non-existing page
r = requests.get(base_site, params = {"page": 10})
r.status_code

200

In [15]:
# The response is an empty list
r.json()

[]

### Extracting results from multiple pages

In [16]:
# Let's obtain the results of the first 5 pages
results = []

In [17]:
for i in range(5):
    r =  requests.get(base_site, params = {"page": i+1})
    
    if len(r.json()) == 0:   # We have reached the end of the results
        break
    else:
        # Add the response results to our list of results
        results.extend(r.json())


In [18]:
# number of found jobs
len(results)

240