In [1]:
import pandas as pd
import numpy as np
import acquire

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

plt.rc('figure', figsize=(11, 9))
plt.rc('font', size=13)

import os
import requests

import warnings
warnings.filterwarnings("ignore")

Create the response 

In [2]:
# base_url = "https://python.zach.lol"
# response = requests.get(base_url)
# response.json()

In [3]:
# response = acquire.get_response("https://python.zach.lol")

In [4]:
# response = requests.get('https://python.zach.lol/api/v1/items')

# data = response.json()
# data

In [5]:
# data.keys()

In [6]:
base_url = 'https://python.zach.lol'
response = requests.get(base_url + '/documentation')
data = response.json()
print(data['payload'])


The API accepts GET requests for all endpoints, where endpoints are prefixed
with

    /api/{version}

Where version is "v1"

Valid endpoints:

- /stores[/{store_id}]
- /items[/{item_id}]
- /sales[/{sale_id}]

All endpoints accept a `page` parameter that can be used to navigate through
the results.



In [7]:
# This is page 1 of items out of 3 pages

api_url = base_url + '/api/v1/'
response = requests.get(api_url + 'items')

In [8]:
# Use .json() method on our response, and we have a dictionary object

data = response.json()
print(type(data))
data

<class 'dict'>


{'payload': {'items': [{'item_brand': 'Riceland',
    'item_id': 1,
    'item_name': 'Riceland American Jazmine Rice',
    'item_price': 0.84,
    'item_upc12': '35200264013',
    'item_upc14': '35200264013'},
   {'item_brand': 'Caress',
    'item_id': 2,
    'item_name': 'Caress Velvet Bliss Ultra Silkening Beauty Bar - 6 Ct',
    'item_price': 6.44,
    'item_upc12': '11111065925',
    'item_upc14': '11111065925'},
   {'item_brand': 'Earths Best',
    'item_id': 3,
    'item_name': 'Earths Best Organic Fruit Yogurt Smoothie Mixed Berry',
    'item_price': 2.43,
    'item_upc12': '23923330139',
    'item_upc14': '23923330139'},
   {'item_brand': 'Boars Head',
    'item_id': 4,
    'item_name': 'Boars Head Sliced White American Cheese - 120 Ct',
    'item_price': 3.14,
    'item_upc12': '208528800007',
    'item_upc14': '208528800007'},
   {'item_brand': 'Back To Nature',
    'item_id': 5,
    'item_name': 'Back To Nature Gluten Free White Cheddar Rice Thin Crackers',
    'item_price':

In [9]:
# let's look at the keys in payload
# payload is also a key for a dictionary of items
# items is the key for a list of dictionaries (items)

data['payload'].keys()

dict_keys(['items', 'max_page', 'next_page', 'page', 'previous_page'])

In [10]:
# We have a list of 20 items from page one, and now we want to add items from the next page

items = data['payload']['items']

In [11]:
# Look at first two item dictionaries in items list

data['payload']['items'][:2]

[{'item_brand': 'Riceland',
  'item_id': 1,
  'item_name': 'Riceland American Jazmine Rice',
  'item_price': 0.84,
  'item_upc12': '35200264013',
  'item_upc14': '35200264013'},
 {'item_brand': 'Caress',
  'item_id': 2,
  'item_name': 'Caress Velvet Bliss Ultra Silkening Beauty Bar - 6 Ct',
  'item_price': 6.44,
  'item_upc12': '11111065925',
  'item_upc14': '11111065925'}]

In [12]:
# Our next page will be page 2 of items out of 3
data['payload']['next_page']

'/api/v1/items?page=2'

In [13]:
# Grab the next page

response = requests.get(base_url + data['payload']['next_page'])
data = response.json()

In [14]:
# Add items from the second page to our list

items.extend(data['payload']['items'])
len(items)

40

In [15]:
# Our next page is page 3 of itmes out of 3

data['payload']['next_page']

'/api/v1/items?page=3'

In [16]:
# Grab the next page in the same way and add items to our list
# There are only 10 items on this last page (len(data['payload']['items']))

response = requests.get(base_url + data['payload']['next_page'])
data = response.json()

items.extend(data['payload']['items'])
len(items)

50

In [17]:
#data['payload']
data['payload']['next_page'] == None

True

In [18]:
# Use our items, our list of dictionaries, to create a DataFrame

items_df = pd.DataFrame(items)
print(items_df.shape)
items_df.head(2)

(50, 6)


Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013
1,Caress,2,Caress Velvet Bliss Ultra Silkening Beauty Bar...,6.44,11111065925,11111065925


In [19]:
api_url = base_url + '/api/v1/'
response = requests.get(api_url + 'sales')
data = response.json()
data['payload']['max_page']

183

In [20]:
api_url = base_url + '/api/v1/'
response = requests.get(api_url + 'stores')
data = response.json()
data['payload']['max_page']

1

In [21]:
def get_df(name):
    """
    This function takes in the string
    'items', 'stores', or 'sales' and
    returns a df containing all pages and
    creates a .csv file for future use.
    """
    base_url = 'https://python.zach.lol'
    api_url = base_url + '/api/v1/'
    response = requests.get(api_url + name)
    data = response.json()
    
    # create list from 1st page
    my_list = data['payload'][name]
    
    # loop through the pages and add to list
    while data['payload']['next_page'] != None:
        response = requests.get(base_url + data['payload']['next_page'])
        data = response.json()
        my_list.extend(data['payload'][name])
    
    # Create DataFrame from list
    df = pd.DataFrame(my_list)
    
    # Write DataFrame to csv file for future use
    df.to_csv(name + '.csv')
    return df

In [22]:
items_df = get_df('items')
print(items_df.shape)
items_df.head()

(50, 6)


Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013
1,Caress,2,Caress Velvet Bliss Ultra Silkening Beauty Bar...,6.44,11111065925,11111065925
2,Earths Best,3,Earths Best Organic Fruit Yogurt Smoothie Mixe...,2.43,23923330139,23923330139
3,Boars Head,4,Boars Head Sliced White American Cheese - 120 Ct,3.14,208528800007,208528800007
4,Back To Nature,5,Back To Nature Gluten Free White Cheddar Rice ...,2.61,759283100036,759283100036


In [None]:
sales_df = get_df('sales')
print(sales_df.shape)
sales_df.head()

In [None]:
stores_df = get_df('stores')
print(stores_df.shape)
stores_df.head()

In [None]:
df = pd.merge(sales_df, stores_df, left_on='store', right_on='store_id').drop(columns={'store'})
df.head(2)

In [None]:
df = pd.merge(df, items_df, left_on='item', right_on='item_id').drop(columns={'item'})
df.head(2)

In [None]:
# Create a function that checks for a csv, and if one doesn't exist it creates one
# The function should also create one large df using all three df

def get_store_data():
    """
    This function checks for csv files
    for items, sales, and stores, and 
    if there are none, it creates them and 
    merges them into one df that it writes
    to csv and reads in the future
    """
    # check for csv files or create them
    if os.path.isfile('items.csv'):
        items_df = pd.read_csv('items.csv', index_col=0)
    else:
        items_df = get_df('items')
        
    if os.path.isfile('stores.csv'):
        stores_df = pd.read_csv('stores.csv', index_col=0)
    else:
        stores_df = get_df('stores')
        
    if os.path.isfile('sales.csv'):
        sales_df = pd.read_csv('sales.csv', index_col=0)
    else:
        sales_df = get_df('sales')
        
    if os.path.isfile('big_df.csv'):
        df = pd.read_csv('big_df.csv', parse_dates=True, index_col='sale_date')
        return df
    else:
        # merge all of the DataFrames into one
        df = pd.merge(sales_df, stores_df, left_on='store', right_on='store_id').drop(columns={'store'})
        df = pd.merge(df, items_df, left_on='item', right_on='item_id').drop(columns={'item'})

        # convert sale_date to DateTime Index
        df['sale_date'] = pd.to_datetime(df.sale_date)
        df = df.sort_index()

        # write merged DateTime df with all data to directory for future use
        df.to_csv('big_df.csv')
        return df

In [None]:
df = get_store_data()
df.head(2)


In [None]:
url = 'https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv'
df = pd.read_csv(url, parse_dates=True, index_col='Date')
df.head()

In [None]:
# Function that checks for a csv, and if it doesn't exist it reads url and creates one
# Function returns the df with a DateTime Index by using parse_dates=True

def german_energy_csv():
    """
    This function returns a df with a datetime index
    using the opsd_germany url/csv.
    """
    if os.path.isfile('german_energy.csv'):
        df = pd.read_csv('german_energy.csv', parse_dates=True, index_col='Date').sort_index()
    else:
        url = 'https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv'
        df = pd.read_csv(url, parse_dates=True, index_col="Date").sort_index()
        df.to_csv('german_energy.csv')
    return df

In [None]:
gdf = german_energy_csv()
gdf.head(2)