In [2]:
import pandas as pd
import numpy as np
from datetime import datetime
import itertools

# JSON API
import requests
import json

# data visualization
import matplotlib
import seaborn as sns
import statsmodels.api as sm

%matplotlib inline

# ignore warnings
import warnings
warnings.filterwarnings("ignore")

#### 1. Using the code from the lesson as a guide, create a data frame named "items" that has all of the data for items.

In [3]:
base_url = 'https://python.zach.lol'
print(requests.get(base_url).text)

{"api":"/api/v1","help":"/documentation"}



This api provides some documentation, so we can take a look at it.

In [4]:
response = requests.get(base_url + '/documentation')
print(response.json()['payload'])


The API accepts GET requests for all endpoints, where endpoints are prefixed
with

    /api/{version}

Where version is "v1"

Valid endpoints:

- /stores[/{store_id}]
- /items[/{item_id}]
- /sales[/{sale_id}]

All endpoints accept a `page` parameter that can be used to navigate through
the results.



#### 0. Getting the file from web api:

Based on this, let's take a look at the items. We'll make our request, and explore the shape of the response that we get back.

In [5]:
response = requests.get('https://python.zach.lol/api/v1/items')
data = response.json()
data.keys()

dict_keys(['payload', 'status'])

In [6]:
data['payload'].keys()

dict_keys(['items', 'max_page', 'next_page', 'page', 'previous_page'])

In [7]:
print('max_page: %s' % data['payload']['max_page'])
print('next_page: %s' % data['payload']['next_page'])

max_page: 3
next_page: /api/v1/items?page=2


#### 1.  Extract the data for "items."

There are three pages of data here.  The code should continue fetching data from the next page until all of the data is extracted. 

In [37]:
def get_items():
    base_url = 'https://python.zach.lol'
    response = requests.get('https://python.zach.lol/api/v1/items')
    data = response.json()
    items = pd.DataFrame(data['payload']['items'])
    response = requests.get(base_url + data['payload']['next_page'])
    data = response.json()
    while data['payload']['page'] <= data['payload']['max_page']:
        items = pd.concat([items, pd.DataFrame(data['payload']['items'])]).reset_index(drop=True)
        if data['payload']['page'] == data['payload']['max_page']:
            break
        response = requests.get(base_url + data['payload']['next_page'])
        data = response.json()
    return items

In [12]:
def items_2():
    items = get_items()

In [15]:
def items_3():
    items.to_csv('items.csv')

##### Problem 1 complete.

#### 2. Do the same thing, but for stores.

In [24]:
def get_stores():
    response = requests.get('https://python.zach.lol/api/v1/stores')
    data = response.json()
    stores = pd.DataFrame(data['payload']['stores'])
    return stores

In [25]:
stores = get_stores()

In [34]:
stores.shape

(10, 5)

In [39]:
stores.columns

Index(['store_address', 'store_city', 'store_id', 'store_state',
       'store_zipcode'],
      dtype='object')

In [26]:
stores.tail()

Unnamed: 0,store_address,store_city,store_id,store_state,store_zipcode
5,1015 S WW White Rd,San Antonio,6,TX,78220
6,12018 Perrin Beitel Rd,San Antonio,7,TX,78217
7,15000 San Pedro Ave,San Antonio,8,TX,78232
8,735 SW Military Dr,San Antonio,9,TX,78221
9,8503 NW Military Hwy,San Antonio,10,TX,78231


In [35]:
stores.to_csv('stores.csv')

##### Problem 2 complete.

#### 3.  Extract the data for sales.

There are a lot of pages of data here, so your code will need to be a little more complex. Your code should continue fetching data from the next page until all of the data is extracted. 

In [16]:
def get_sales():
    base_url = 'https://python.zach.lol'
    response = requests.get('https://python.zach.lol/api/v1/sales')
    data = response.json()
    sales = pd.DataFrame(data['payload']['sales'])
    response = requests.get(base_url + data['payload']['next_page'])
    data = response.json()
    while data['payload']['page'] <= data['payload']['max_page']:
        sales = pd.concat([sales, pd.DataFrame(data['payload']['sales'])]).reset_index(drop=True)
        if data['payload']['page'] == data['payload']['max_page']:
            break
        response = requests.get(base_url + data['payload']['next_page'])
        data = response.json()
    return sales

In [None]:
sales = get_sales()

In [38]:
sales.columns

Index(['item', 'sale_amount', 'sale_date', 'sale_id', 'store'], dtype='object')

In [19]:
sales.tail()

Unnamed: 0,item,sale_amount,sale_date,sale_id,store
912995,50,63.0,"Wed, 27 Dec 2017 00:00:00 GMT",912996,10
912996,50,59.0,"Thu, 28 Dec 2017 00:00:00 GMT",912997,10
912997,50,74.0,"Fri, 29 Dec 2017 00:00:00 GMT",912998,10
912998,50,62.0,"Sat, 30 Dec 2017 00:00:00 GMT",912999,10
912999,50,82.0,"Sun, 31 Dec 2017 00:00:00 GMT",913000,10


In [20]:
sales.to_csv('sales.csv')

##### Problem 3 complete.

In [None]:
cust_churn.to_csv('customer_churn.csv')

In [None]:
len(data['payload']['sales'])

In [None]:
response = requests.get('http://example.com')

In [None]:
response.text

We've seen 404 codes before:  page not found

In [None]:
requests.get('http://example.com/lkajdfjadkljfalkdjf')

* a 400 code is user error, while a 500 error means a server error

In [None]:
response = requests.get("https://swapi.co/api/people/1/")

In [None]:
data = response.json()

In [None]:
data['name']

In [None]:
for url in data['starships']:
    response = requests.get(url)
    starship = response.json()
    print(starship['name'])

In [None]:
response

In [None]:
data = response.json()

In [None]:
data.keys()

In [None]:
data['crew']

Grabbing data from page 1

In [None]:
df = pd.DataFrame(data['payload']['items'])
df.head()

Grabbing data from page 2

In [None]:
response = requests.get(base_url + data['payload']['next_page'])
data = response.json()

In [None]:
df = pd.concat([df, pd.DataFrame(data['payload']['items'])]).reset_index()

In [None]:
df.tail()

In [None]:
print('max_page: %s' % data['payload']['max_page'])
print('next_page: %s' % data['payload']['next_page'])

Grabbing data from page 3

In [None]:
response = requests.get(base_url + data['payload']['next_page'])
data = response.json()

In [None]:
df = pd.concat([df, pd.DataFrame(data['payload']['items'])]).reset_index()
df.tail()

In [None]:
print('max_page: %s' % data['payload']['max_page'])
print('next_page: %s' % data['payload']['next_page'])

In [None]:
df.shape

##### All rows and columns captured properly.

##### Now rename the df as "items."

In [None]:
items = df

In [None]:
items.head()

##### Problem 1 complete.

#### 2. Do the same thing, but for stores.

In [28]:
response = requests.get('https://python.zach.lol/api/v1/stores')
data = response.json()
data.keys()

dict_keys(['payload', 'status'])

In [29]:
data['payload'].keys()

dict_keys(['max_page', 'next_page', 'page', 'previous_page', 'stores'])

In [30]:
print('max_page: %s' % data['payload']['max_page'])
print('next_page: %s' % data['payload']['next_page'])

max_page: 1
next_page: None


Grabbing data from page 1

In [33]:
df = pd.DataFrame(data['payload']['stores'])
df.tail()

Unnamed: 0,store_address,store_city,store_id,store_state,store_zipcode
5,1015 S WW White Rd,San Antonio,6,TX,78220
6,12018 Perrin Beitel Rd,San Antonio,7,TX,78217
7,15000 San Pedro Ave,San Antonio,8,TX,78232
8,735 SW Military Dr,San Antonio,9,TX,78221
9,8503 NW Military Hwy,San Antonio,10,TX,78231


In [32]:
df.shape

(10, 5)

##### All rows and columns captured properly.

##### Now rename the df as "stores."

In [None]:
stores = df
stores.head()

##### Problem 2 complete.