In [1]:
import requests
import pandas as pd

In [2]:
response = requests.get('https://api.data.codeup.com/api/v1/stores')
data = response.json()
stores = pd.DataFrame(data['payload']['stores'])

In [3]:
stores

Unnamed: 0,store_address,store_city,store_id,store_state,store_zipcode
0,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
1,9255 FM 471 West,San Antonio,2,TX,78251
2,2118 Fredericksburg Rdj,San Antonio,3,TX,78201
3,516 S Flores St,San Antonio,4,TX,78204
4,1520 Austin Hwy,San Antonio,5,TX,78218
5,1015 S WW White Rd,San Antonio,6,TX,78220
6,12018 Perrin Beitel Rd,San Antonio,7,TX,78217
7,15000 San Pedro Ave,San Antonio,8,TX,78232
8,735 SW Military Dr,San Antonio,9,TX,78221
9,8503 NW Military Hwy,San Antonio,10,TX,78231


2 Approaches:

- loop until we don't have a next page
- make 1 request and figure out the max page, then loop over all the pages

In [4]:
# Approach 1
domain = 'https://api.data.codeup.com'
endpoint = '/api/v1/items'
items = []

while True:
    url = domain + endpoint
    response = requests.get(url)
    data = response.json()
    print(f'\rGetting page {data["payload"]["page"]} of {data["payload"]["max_page"]}: {url}', end='')
    items.extend(data['payload']['items'])
    endpoint = data['payload']['next_page']
    if endpoint is None:
        break

Getting page 3 of 3: https://api.data.codeup.com/api/v1/items?page=3

In [5]:
items = pd.DataFrame(items)

In [6]:
# Approach 2
base_url = 'https://api.data.codeup.com/api/v1/sales?page='
sales = []

# make the first request
url = base_url + str(1)
response = requests.get(url)
data = response.json()
max_page = data['payload']['max_page']
sales.extend(data['payload']['sales'])

# We already made the request to the first page, so we'll start at 2.
# We add 1 to max_page because the range() function is exclusive of the endpoint
page_range = range(2, max_page + 1)

for page in page_range:
    url = base_url + str(page)
    print(f'\rFetching page {page}/{max_page} {url}', end='')
    response = requests.get(url)
    data = response.json()
    sales.extend(data['payload']['sales'])

Fetching page 183/183 https://api.data.codeup.com/api/v1/sales?page=183

In [8]:
sales = pd.DataFrame(sales)

In [None]:
stores.to_csv('stores.csv', index=False)
items.to_csv('items.csv', index=False)
sales.to_csv('sales.csv', index=False)

In [9]:
stores.head()

Unnamed: 0,store_address,store_city,store_id,store_state,store_zipcode
0,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
1,9255 FM 471 West,San Antonio,2,TX,78251
2,2118 Fredericksburg Rdj,San Antonio,3,TX,78201
3,516 S Flores St,San Antonio,4,TX,78204
4,1520 Austin Hwy,San Antonio,5,TX,78218


In [18]:
sales.head()

Unnamed: 0,item,sale_amount,sale_date,sale_id,store
0,1,13.0,"Tue, 01 Jan 2013 00:00:00 GMT",1,1
1,1,11.0,"Wed, 02 Jan 2013 00:00:00 GMT",2,1
2,1,14.0,"Thu, 03 Jan 2013 00:00:00 GMT",3,1
3,1,13.0,"Fri, 04 Jan 2013 00:00:00 GMT",4,1
4,1,10.0,"Sat, 05 Jan 2013 00:00:00 GMT",5,1


In [13]:
items.item_id.nunique() == items.shape[0]

True

In [19]:
sales = sales.rename(columns={'item': 'item_id', 'store': 'store_id'})

In [23]:
df = pd.merge(sales, items, how='left', on='item_id')
df = pd.merge(df, stores, how='left', on='store_id')

How do we combine these datasets? Why would we combine these datasets?

## Futher Reading

- [Example of speeding up the data extraction with multithreading](https://codeupclassroom.github.io/ada-ds-methodologies/store_api.html)