# Pull Data from API using Python

### 1. Connecting to an URL on web (an API)

In [11]:
# import tne necessary library
import requests

In [17]:
# connetct to the API
response_API = requests.get('https://gmail.googleapis.com/$discovery/rest?version=v1')

In [18]:
# check the status of the connection to the API
response_API.status_code

200

- **200**: a healthy connection with the API on web
- **204**: successfully made a connection to the API but did not return any data
- **401**: Authentication failed
- **403**: Access is forbidden by the API service
- **404**: API service is not found
- **500**: Internal Server Error occurred

In [None]:
# advanced operation: add endpoint and api_key to the request function

endpoint = "https://api.nasa.gov/mars-photos/api/v1/rovers/curiosity/photos"

api_key = 'DEMO_KEY'

query_params = {'api_key':api_key, 'earth_data':'2020-07-01'}

response = requests.get(endpoint, params = query_params)

### 2. Getting the data from API

After making a healthy connection with the API, the next task is to pull the data from the API.

In [19]:
data = response_API.text

### 3.1 Parse the data into JSON format

JSON(JavaScript Object Notation) is the language of APIs. JSON is the primary format in which data is pased back and forth to APIs.

Python has standard JSON package, which can convert lists and dictionaries to JSON, and conver strings to lists and dictionaries. 
- **json.dumps()**: convert a python object and converts it to json(?)
- **json.loads()**: convert a json to a python object

Herein, we use .json() to convert the response to a Python dictionary. 

In [20]:
import json
#method 1: using two steps
response_API.text
json.loads(data)

#method 2: just one step
response_API.json()

{'ownerName': 'Google',
 'name': 'gmail',
 'parameters': {'prettyPrint': {'location': 'query',
   'default': 'true',
   'type': 'boolean',
   'description': 'Returns response with indentations and line breaks.'},
  'quotaUser': {'description': 'Available to use for quota purposes for server-side applications. Can be any arbitrary string assigned to a user, but should not exceed 40 characters.',
   'location': 'query',
   'type': 'string'},
  '$.xgafv': {'description': 'V1 error format.',
   'type': 'string',
   'location': 'query',
   'enumDescriptions': ['v1 error format', 'v2 error format'],
   'enum': ['1', '2']},
  'key': {'description': 'API key. Your API key identifies your project and provides you with API access, quota, and reports. Required unless you provide an OAuth 2.0 token.',
   'location': 'query',
   'type': 'string'},
  'access_token': {'description': 'OAuth access token.',
   'location': 'query',
   'type': 'string'},
  'upload_protocol': {'location': 'query',
   'typ

In [26]:
# example about extracting university data 

# define two parameter, n means the number of records to return, -1 means return all the data
def api_extract(country, n = -1):
    # initializing an error variable to 0
    err = 0
    # convert country to lowercase and replace spaces with %20
    req = request.get(f"http://universities.hipolabs.com/search?country={country.lower().replace(' ','%20')}")
    
    if req.status_code != 200:
        err = 1
    # getting the request output in JSON format
    js = req.json()

    try:
        if err == 1:
            raise Exception()
        
        df = pd.DataFrame()
        for i in js[0].keys():
            df[i] = [j[i] for j in js]
    
    except:
        return 'No Data Found'
    
    else:
        if n!= -1:
            return df.head(n)
        else:
            return df
            

### 3.2 Parse the data into text format(?)

In [None]:
# another example 
from bs4 import BeautifulSoup
soup = BeautifulSoup(data, 'lxml')
text = [p.text for p in soup.find(class_ = 'post_content').find_all('p')]