In [2]:
import pandas as pd
import numpy as np 
import matplotlib as plt 
import seaborn as sns 
import acquire 
import requests

### making HTTP Requests

In [2]:
#example 
response = requests.get('http://aphorisms.glitch.me/')
response

<Response [200]>

### Example JSON API

In [3]:
url = 'https://swapi.dev/api/people/5'
response = requests.get(url)
print(response.text)

{"name":"Leia Organa","height":"150","mass":"49","hair_color":"brown","skin_color":"light","eye_color":"brown","birth_year":"19BBY","gender":"female","homeworld":"https://swapi.dev/api/planets/2/","films":["https://swapi.dev/api/films/1/","https://swapi.dev/api/films/2/","https://swapi.dev/api/films/3/","https://swapi.dev/api/films/6/"],"species":[],"vehicles":["https://swapi.dev/api/vehicles/30/"],"starships":[],"created":"2014-12-10T15:20:09.791000Z","edited":"2014-12-20T21:17:50.315000Z","url":"https://swapi.dev/api/people/5/"}


In [4]:
{"name":"Leia Organa","height":"150","mass":"49","hair_color":"brown","skin_color":"light","eye_color":"brown","birth_year":"19BBY","gender":"female","homeworld":"https://swapi.dev/api/planets/2/","films":["https://swapi.dev/api/films/1/","https://swapi.dev/api/films/2/","https://swapi.dev/api/films/3/","https://swapi.dev/api/films/6/"],"species":[],"vehicles":["https://swapi.dev/api/vehicles/30/"],"starships":[],"created":"2014-12-10T15:20:09.791000Z","edited":"2014-12-20T21:17:50.315000Z","url":"https://swapi.dev/api/people/5/"}


{'name': 'Leia Organa',
 'height': '150',
 'mass': '49',
 'hair_color': 'brown',
 'skin_color': 'light',
 'eye_color': 'brown',
 'birth_year': '19BBY',
 'gender': 'female',
 'homeworld': 'https://swapi.dev/api/planets/2/',
 'films': ['https://swapi.dev/api/films/1/',
  'https://swapi.dev/api/films/2/',
  'https://swapi.dev/api/films/3/',
  'https://swapi.dev/api/films/6/'],
 'species': [],
 'vehicles': ['https://swapi.dev/api/vehicles/30/'],
 'starships': [],
 'created': '2014-12-10T15:20:09.791000Z',
 'edited': '2014-12-20T21:17:50.315000Z',
 'url': 'https://swapi.dev/api/people/5/'}

In [5]:
#use .json to get a data structure to work with
data = response.json()
print(type(data))
data

<class 'dict'>


{'name': 'Leia Organa',
 'height': '150',
 'mass': '49',
 'hair_color': 'brown',
 'skin_color': 'light',
 'eye_color': 'brown',
 'birth_year': '19BBY',
 'gender': 'female',
 'homeworld': 'https://swapi.dev/api/planets/2/',
 'films': ['https://swapi.dev/api/films/1/',
  'https://swapi.dev/api/films/2/',
  'https://swapi.dev/api/films/3/',
  'https://swapi.dev/api/films/6/'],
 'species': [],
 'vehicles': ['https://swapi.dev/api/vehicles/30/'],
 'starships': [],
 'created': '2014-12-10T15:20:09.791000Z',
 'edited': '2014-12-20T21:17:50.315000Z',
 'url': 'https://swapi.dev/api/people/5/'}

### Examine different API

In [6]:
base_url = 'https://python.zgulde.net'
print(requests.get(base_url).text)

{"api":"/api/v1","help":"/documentation"}



In [7]:
{"api":"/api/v1","help":"/documentation"}

{'api': '/api/v1', 'help': '/documentation'}

In [8]:
response = requests.get(base_url + '/documentation')
print(response.json()['payload'])


The API accepts GET requests for all endpoints, where endpoints are prefixed
with

    /api/{version}

Where version is "v1"

Valid endpoints:

- /stores[/{store_id}]
- /items[/{item_id}]
- /sales[/{sale_id}]

All endpoints accept a `page` parameter that can be used to navigate through
the results.



In [9]:
response = requests.get('https://python.zgulde.net/api/v1/items')

data = response.json()
data.keys()

dict_keys(['payload', 'status'])

In [10]:
data['payload'].keys()

dict_keys(['items', 'max_page', 'next_page', 'page', 'previous_page'])

In [11]:
current_page = data['payload']['page']
max_page = data['payload']['max_page']
next_page = data['payload']['next_page']

print(f'current_page: {current_page}')
print(f'max_page: {max_page}')
print(f'next_page: {next_page}')



current_page: 1
max_page: 3
next_page: /api/v1/items?page=2


In [12]:
data['payload']['items'][:2]

[{'item_brand': 'Riceland',
  'item_id': 1,
  'item_name': 'Riceland American Jazmine Rice',
  'item_price': 0.84,
  'item_upc12': '35200264013',
  'item_upc14': '35200264013'},
 {'item_brand': 'Caress',
  'item_id': 2,
  'item_name': 'Caress Velvet Bliss Ultra Silkening Beauty Bar - 6 Ct',
  'item_price': 6.44,
  'item_upc12': '11111065925',
  'item_upc14': '11111065925'}]

In [13]:
#turn this request into a pandas dataframe 
df = pd.DataFrame(data['payload']['items'])
df.head()

Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013
1,Caress,2,Caress Velvet Bliss Ultra Silkening Beauty Bar...,6.44,11111065925,11111065925
2,Earths Best,3,Earths Best Organic Fruit Yogurt Smoothie Mixe...,2.43,23923330139,23923330139
3,Boars Head,4,Boars Head Sliced White American Cheese - 120 Ct,3.14,208528800007,208528800007
4,Back To Nature,5,Back To Nature Gluten Free White Cheddar Rice ...,2.61,759283100036,759283100036


In [14]:
#now that we've gotten the data from the first page, we can extract the data from the next page and add it to our DF
response = requests.get(base_url + data['payload']['next_page'])
data = response.json()

current_page = data['payload']['page']
max_page = data['payload']['max_page']
next_page = data['payload']['next_page']

print(f'current_page: {current_page}')
print(f'max_page: {max_page}')
print(f'next_page: {next_page}')

df = pd.concat([df, pd.DataFrame(data['payload']['items'])]).reset_index()

current_page: 2
max_page: 3
next_page: /api/v1/items?page=3


In [15]:
#repeat the process again 
response = requests.get(base_url + data['payload']['next_page'])
data = response.json()

current_page = data['payload']['page']
max_page = data['payload']['max_page']
next_page = data['payload']['next_page']

print(f'current_page: {current_page}')
print(f'max_page: {max_page}')
print(f'next_page: {next_page}')

df = pd.concat([df, pd.DataFrame(data['payload']['items'])]).reset_index()

current_page: 3
max_page: 3
next_page: None


In [16]:
#When the next_page is 'none', we'll stop making requests 
df.shape

(50, 8)

### Exercises

In [17]:
# Examine just the base URL 
base_url = requests.get('https://api.data.codeup.com')

In [18]:
response = base_url

In [19]:
print(response.text)

{"api":"/api/v1","help":"/documentation"}



In [20]:
#The API provides some documentation; here is the code to examine it 
url = 'https://api.data.codeup.com/documentation'
response = requests.get(url)
print(response.json()['payload'])


The API accepts GET requests for all endpoints, where endpoints are prefixed
with

    /api/{version}

Where version is "v1"

Valid endpoints:

- /stores[/{store_id}]
- /items[/{item_id}]
- /sales[/{sale_id}]

All endpoints accept a `page` parameter that can be used to navigate through
the results.



### ITEMS 

In [20]:
# Examine items 
response = requests.get('https://api.data.codeup.com/api/v1/items')
data = response.json()
# look at the dictionary keys 
data.keys()

dict_keys(['payload', 'status'])

In [21]:
#within payload, let's look at the keys/content
data['payload'].keys()

dict_keys(['items', 'max_page', 'next_page', 'page', 'previous_page'])

In [22]:
#use built-in properties to get to subsequent pages and look at the shape of the pages 
current_page = data['payload']['page']
max_page = data['payload']['max_page']
next_page = data['payload']['next_page']

print(f'current_page: {current_page}')
print(f'max_page: {max_page}')
print(f'next_page: {next_page}')

current_page: 1
max_page: 3
next_page: /api/v1/items?page=2


In [23]:
base_url = 'https://api.data.codeup.com/api/v1/items?page='
items = []

#first request 
url = base_url + str(1)
#making a request and storing the response to the request as a string 
response = requests.get(url)
#storing the response in json form 
data = response.json()
max_page = data['payload']['max_page']
items.extend(data['payload']['items'])
page_range = range(2, max_page + 1)
#request to get the remaining pages 
for page in page_range: 
    url = base_url + str(page)
    print(f'/rFetching page {page}/{max_page} {url}', end='')
    response = requests.get(url)
    data = response.json()
    items.extend(data['payload']['items'])
    

/rFetching page 2/3 https://api.data.codeup.com/api/v1/items?page=2/rFetching page 3/3 https://api.data.codeup.com/api/v1/items?page=3

In [24]:
items = pd.DataFrame(items)
items.head()

Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013
1,Caress,2,Caress Velvet Bliss Ultra Silkening Beauty Bar...,6.44,11111065925,11111065925
2,Earths Best,3,Earths Best Organic Fruit Yogurt Smoothie Mixe...,2.43,23923330139,23923330139
3,Boars Head,4,Boars Head Sliced White American Cheese - 120 Ct,3.14,208528800007,208528800007
4,Back To Nature,5,Back To Nature Gluten Free White Cheddar Rice ...,2.61,759283100036,759283100036


In [25]:
items.to_csv('items.csv', index = False)

In [26]:
items.shape

(50, 6)

### Stores 

In [33]:
# Examine items 
response = requests.get('https://python.zgulde.net/api/v1/stores')
data = response.json()
#look at the dictionary keys 
data.keys()

dict_keys(['payload', 'status'])

In [34]:
#within payload, let's look at the keys/content
data['payload'].keys()

dict_keys(['max_page', 'next_page', 'page', 'previous_page', 'stores'])

In [35]:
#use built-in properties to get to subsequent pages and look at the shape of the pages 
current_page = data['payload']['page']
max_page = data['payload']['max_page']
next_page = data['payload']['next_page']

print(f'current_page: {current_page}')
print(f'max_page: {max_page}')
print(f'next_page: {next_page}')

current_page: 1
max_page: 1
next_page: None


In [36]:
# turn the data into a pandas dataframe 
df = pd.DataFrame(data['payload']['stores'])
df.head()

Unnamed: 0,store_address,store_city,store_id,store_state,store_zipcode
0,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
1,9255 FM 471 West,San Antonio,2,TX,78251
2,2118 Fredericksburg Rdj,San Antonio,3,TX,78201
3,516 S Flores St,San Antonio,4,TX,78204
4,1520 Austin Hwy,San Antonio,5,TX,78218


In [37]:
#examine the shape
df.shape

(10, 5)

In [39]:
stores.to_csv('stores.csv', index = False)

### Extract the data for sales 

In [40]:
# Examine items 
response = requests.get('https://python.zgulde.net/api/v1/sales')
data = response.json()
#look at the dictionary keys 
data.keys()

dict_keys(['payload', 'status'])

In [41]:
#within payload, let's look at the keys/content
data['payload'].keys()

dict_keys(['max_page', 'next_page', 'page', 'previous_page', 'sales'])

In [42]:
#use built-in properties to get to subsequent pages and look at the shape of the pages 
current_page = data['payload']['page']
max_page = data['payload']['max_page']
next_page = data['payload']['next_page']

print(f'current_page: {current_page}')
print(f'max_page: {max_page}')
print(f'next_page: {next_page}')

current_page: 1
max_page: 183
next_page: /api/v1/sales?page=2


In [43]:
#define the base_url 
base_url = 'https://api.data.codeup.com/api/v1/sales?page='
#provide an open list to extend to 
sales = []
#make the first request 
url = base_url + str(1)
#use requests to get url and store as variable 
response = requests.get(url)
#store variable into json file 
data = response.json()
#define max page 
max_page = data['payload']['max_page']
#add payload sales as first entry to sales list defined earlier 
sales.extend(data['payload']['sales'])

#first page request is complete, now to add the rest of the dataframe 
#Add 1 to max_page because the range() function is exclusive of the endpoint 

page_range = range(2, max_page + 1)

for page in page_range: 
    '''
    #define the base url 
    domain = 'https://api.data.codeup.com'
    # path or where we are accessing the data inside the url
    endpoint = '/api/v1/sales'
    # create an empty list to place the data within 
    sales = []
    
    while True: 
    #combine base and endpoint to allow for looping 
    url = domain + endpoint 
    #making a request and storing the response to the request as a string 
    response = requests.get(url)
    #storing the response in json form 
    data = response.json()
    #prints the page number for which the data is being acquired out of the total number of pages, as well as the URL
    # the \r replaces the previous print statement with the new one based on the number of characters in the new statement.
    print(f'\rGetting page {data['payload']['page']}) of {data['payload']['max_page']}: {url}', end='')
    # .extend adds elements from a list to another list 
    items.extend(data['payload']['sales'])
    # reassigning the endpoint variable to have the path to the next page. 
    endpoint = data['payload']['next_page']
    if endpint is None: 
    break
    ''' 
    url = base_url + str(page)
    print(f'/rFetching page {page}/{max_page} {url}', end='')
    response = requests.get(url)
    data = response.json()
    sales.extend(data['payload']['sales'])
    

/rFetching page 2/183 https://api.data.codeup.com/api/v1/sales?page=2/rFetching page 3/183 https://api.data.codeup.com/api/v1/sales?page=3/rFetching page 4/183 https://api.data.codeup.com/api/v1/sales?page=4/rFetching page 5/183 https://api.data.codeup.com/api/v1/sales?page=5/rFetching page 6/183 https://api.data.codeup.com/api/v1/sales?page=6/rFetching page 7/183 https://api.data.codeup.com/api/v1/sales?page=7/rFetching page 8/183 https://api.data.codeup.com/api/v1/sales?page=8/rFetching page 9/183 https://api.data.codeup.com/api/v1/sales?page=9/rFetching page 10/183 https://api.data.codeup.com/api/v1/sales?page=10/rFetching page 11/183 https://api.data.codeup.com/api/v1/sales?page=11/rFetching page 12/183 https://api.data.codeup.com/api/v1/sales?page=12/rFetching page 13/183 https://api.data.codeup.com/api/v1/sales?page=13/rFetching page 14/183 https://api.data.codeup.com/api/v1/sales?page=14/rFetching page 15/183 https://api.data.codeup.com/api/v1/sales?page=15/rFetching page 16/183

/rFetching page 118/183 https://api.data.codeup.com/api/v1/sales?page=118/rFetching page 119/183 https://api.data.codeup.com/api/v1/sales?page=119/rFetching page 120/183 https://api.data.codeup.com/api/v1/sales?page=120/rFetching page 121/183 https://api.data.codeup.com/api/v1/sales?page=121/rFetching page 122/183 https://api.data.codeup.com/api/v1/sales?page=122/rFetching page 123/183 https://api.data.codeup.com/api/v1/sales?page=123/rFetching page 124/183 https://api.data.codeup.com/api/v1/sales?page=124/rFetching page 125/183 https://api.data.codeup.com/api/v1/sales?page=125/rFetching page 126/183 https://api.data.codeup.com/api/v1/sales?page=126/rFetching page 127/183 https://api.data.codeup.com/api/v1/sales?page=127/rFetching page 128/183 https://api.data.codeup.com/api/v1/sales?page=128/rFetching page 129/183 https://api.data.codeup.com/api/v1/sales?page=129/rFetching page 130/183 https://api.data.codeup.com/api/v1/sales?page=130/rFetching page 131/183 https://api.data.codeup.com

In [44]:
data.keys()

dict_keys(['payload', 'status'])

In [45]:
#create the pd.dataframe 
sales = pd.DataFrame(sales)
sales.head

<bound method NDFrame.head of         item  sale_amount                      sale_date  sale_id  store
0          1         13.0  Tue, 01 Jan 2013 00:00:00 GMT        1      1
1          1         11.0  Wed, 02 Jan 2013 00:00:00 GMT        2      1
2          1         14.0  Thu, 03 Jan 2013 00:00:00 GMT        3      1
3          1         13.0  Fri, 04 Jan 2013 00:00:00 GMT        4      1
4          1         10.0  Sat, 05 Jan 2013 00:00:00 GMT        5      1
...      ...          ...                            ...      ...    ...
912995    50         63.0  Wed, 27 Dec 2017 00:00:00 GMT   912996     10
912996    50         59.0  Thu, 28 Dec 2017 00:00:00 GMT   912997     10
912997    50         74.0  Fri, 29 Dec 2017 00:00:00 GMT   912998     10
912998    50         62.0  Sat, 30 Dec 2017 00:00:00 GMT   912999     10
912999    50         82.0  Sun, 31 Dec 2017 00:00:00 GMT   913000     10

[913000 rows x 5 columns]>

In [46]:
#export sales to CSV file 
sales.to_csv('sales', index = False)

### Question four complete in above code 

### Question five: Combine the data from your three separate dataframes into one large dataframe

In [47]:
#inspect the head of each dataframe 
items.head()

Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013
1,Caress,2,Caress Velvet Bliss Ultra Silkening Beauty Bar...,6.44,11111065925,11111065925
2,Earths Best,3,Earths Best Organic Fruit Yogurt Smoothie Mixe...,2.43,23923330139,23923330139
3,Boars Head,4,Boars Head Sliced White American Cheese - 120 Ct,3.14,208528800007,208528800007
4,Back To Nature,5,Back To Nature Gluten Free White Cheddar Rice ...,2.61,759283100036,759283100036


In [48]:
#inspect the head of each dataframe 
stores.head()

Unnamed: 0,store_address,store_city,store_id,store_state,store_zipcode
0,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
1,9255 FM 471 West,San Antonio,2,TX,78251
2,2118 Fredericksburg Rdj,San Antonio,3,TX,78201
3,516 S Flores St,San Antonio,4,TX,78204
4,1520 Austin Hwy,San Antonio,5,TX,78218


In [49]:
#inspect the head of each dataframe 
sales.head()

Unnamed: 0,item,sale_amount,sale_date,sale_id,store
0,1,13.0,"Tue, 01 Jan 2013 00:00:00 GMT",1,1
1,1,11.0,"Wed, 02 Jan 2013 00:00:00 GMT",2,1
2,1,14.0,"Thu, 03 Jan 2013 00:00:00 GMT",3,1
3,1,13.0,"Fri, 04 Jan 2013 00:00:00 GMT",4,1
4,1,10.0,"Sat, 05 Jan 2013 00:00:00 GMT",5,1


In [50]:
#rename columns to have common index to be able to merge data
sales = sales.rename(columns= {'item': 'item_id', 'store': 'store_id'} )

In [51]:
#merge dataframes using pd.merge on common features item_id and store_id
df = pd.merge(sales, items, how='left', on='item_id')
df = pd.merge(df, stores, how= 'left', on='store_id')

In [52]:
df.shape

(913000, 14)

### Acquire the Open power systems data for Germany

In [27]:
#read in csv file using pd.read_csv
germany_electric = pd.read_csv('https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv')
germany_electric.head()

Unnamed: 0,Date,Consumption,Wind,Solar,Wind+Solar
0,2006-01-01,1069.184,,,
1,2006-01-02,1380.521,,,
2,2006-01-03,1442.533,,,
3,2006-01-04,1457.217,,,
4,2006-01-05,1477.131,,,


In [28]:
#review information 
germany_electric.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4383 entries, 0 to 4382
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Date         4383 non-null   object 
 1   Consumption  4383 non-null   float64
 2   Wind         2920 non-null   float64
 3   Solar        2188 non-null   float64
 4   Wind+Solar   2187 non-null   float64
dtypes: float64(4), object(1)
memory usage: 171.3+ KB


In [29]:
#review shape 
germany_electric.shape

(4383, 5)

In [30]:
#are there any null values 
germany_electric.isna().sum()

Date              0
Consumption       0
Wind           1463
Solar          2195
Wind+Solar     2196
dtype: int64

In [35]:
#2 columns have 50% null values a
#1 column has 33% null values 
#in many instances with null values this high we would drop the column, but in this case we would look key data for analysis 
# because the null values are so high if you impute them, you are probably going to introduce bias 
#KNN imputer is not an option because the dataset doesn't have enough features to base its prediction of the missing null row 
#for this reason drop pall nulls by row 
germany_electric = germany_electric.dropna()

In [36]:
#export to csv
germany_electric.to_csv('germany electric.csv', index = False)

### Acquire.py imports 

In [58]:
SD = acquire.get_store_item_demand_data()

Reading from CSV file. . .
Reading from csv file. . .


NameError: name 'get_stores_data' is not defined

In [None]:
GE = acquire.german_electric 