# Web scraping: headers, the networks tab and parsing an API URL
## Helpful links and resources
- [urllib](https://docs.python.org/3/library/urllib.parse.html#) is a Python library that will pick apart URLs
- [Sessions object - request library](https://docs.python-requests.org/en/master/user/advanced/#session-objects)

In [92]:
#import libraries

import requests
import pandas as pd
from urllib.parse import urlparse, parse_qs
import json

## The networks tab
### Static data files
[Covid cases in the US - New York Times](https://www.nytimes.com/interactive/2021/us/covid-cases.html)

In [2]:
# right click -> Inspect -> Networks to see files
# Look for XHR
# Look for keywords in the file names
# data.json
# .json file extensions
# The “Request URL” is where the data lives

In [3]:
# get static data file
covid_cases_r = requests.get('https://static01.nyt.com/newsgraphics/2021/coronavirus-tracking/data/pages/usa/data.json')

In [4]:
covid_cases = covid_cases_r.json()

In [5]:
# covid_cases
# --> it's a dictionary
covid_cases.keys()

dict_keys(['updated', 'updated_datetime', 'location', 'counties', 'states', 'clusters', 'page_notes', 'headline_override'])

In [6]:
covid_cases['states'][0]
# check link to see if it works

{'country': 'United States',
 'display_name': 'Alabama',
 'nyt_abbr': 'Ala.',
 'geoid': 'USA-01',
 'href': 'https://www.nytimes.com/interactive/2021/us/alabama-covid-cases.html',
 'latest': {'total': {'cases': 554605, 'deaths': 11402},
  'average': {'cases': 472.42857142857144,
   'deaths': 6.285714285714286,
   'hospitalized': 315.8333333333333,
   'tests': 2862.714285714286},
  'vaccination': {'date': '2021-07-09',
   'people_vaccinated_pct_of_pop': 40.49576,
   'people_vaccinated_pct_of_pop_display': '40%',
   'people_fully_vaccinated_pct_of_pop': 33.16871,
   'people_fully_vaccinated_pct_of_pop_display': '33%',
   'people_vaccinated_12plus_pct_of_pop': 47.4,
   'people_vaccinated_12plus_pct_of_pop_display': '47%',
   'people_vaccinated_18plus_pct_of_pop': 50.7,
   'people_vaccinated_18plus_pct_of_pop_display': '51%',
   'people_vaccinated_65plus_pct_of_pop': 79.3,
   'people_vaccinated_65plus_pct_of_pop_display': '79%',
   'people_fully_vaccinated_12plus_pct_of_pop': 38.8,
   'peop

In [7]:
# put list with states in a dataframe
df_states = pd.DataFrame(covid_cases['states'])

In [8]:
df_states.head(10)

Unnamed: 0,country,display_name,nyt_abbr,geoid,href,latest,long_name,percent_change_14day,population,hospital_area_population,region,region_type,slug,state,subregion,date,population_adjustment
0,United States,Alabama,Ala.,USA-01,https://www.nytimes.com/interactive/2021/us/al...,"{'total': {'cases': 554605, 'deaths': 11402}, ...",Alabama,"{'raw': {'cases': 158.96632732967896, 'deaths'...",4903185,4903185,Alabama,state,us/alabama-covid-cases,"{'us_state_fips': '01', 'name': 'Alabama', 'sh...",,2021-07-10,
1,United States,Alaska,Alaska,USA-02,https://www.nytimes.com/interactive/2021/us/al...,"{'total': {'cases': 71118, 'deaths': 362}, 'av...",Alaska,"{'raw': {'cases': 72, 'deaths': 100, 'hospital...",731545,731545,Alaska,state,us/alaska-covid-cases,"{'us_state_fips': '02', 'name': 'Alaska', 'sho...",,2021-07-10,
2,United States,Arizona,Ariz.,USA-04,https://www.nytimes.com/interactive/2021/us/ar...,"{'total': {'cases': 900636, 'deaths': 18029}, ...",Arizona,"{'raw': {'cases': 22.232116355001487, 'deaths'...",7278717,7278717,Arizona,state,us/arizona-covid-cases,"{'us_state_fips': '04', 'name': 'Arizona', 'sh...",,2021-07-10,
3,United States,Arkansas,Ark.,USA-05,https://www.nytimes.com/interactive/2021/us/ar...,"{'total': {'cases': 355460, 'deaths': 5948}, '...",Arkansas,"{'raw': {'cases': 173.90572390572393, 'deaths'...",3017804,3017804,Arkansas,state,us/arkansas-covid-cases,"{'us_state_fips': '05', 'name': 'Arkansas', 's...",,2021-07-10,
4,United States,California,Calif.,USA-06,https://www.nytimes.com/interactive/2021/us/ca...,"{'total': {'cases': 3833655, 'deaths': 63482},...",California,"{'raw': {'cases': 80.15514809590974, 'deaths':...",39512223,39512223,California,state,us/california-covid-cases,"{'us_state_fips': '06', 'name': 'California', ...",,2021-07-10,
5,United States,Colorado,Colo.,USA-08,https://www.nytimes.com/interactive/2021/us/co...,"{'total': {'cases': 565497, 'deaths': 6994}, '...",Colorado,"{'raw': {'cases': 8.378746594005438, 'deaths':...",5758736,5758736,Colorado,state,us/colorado-covid-cases,"{'us_state_fips': '08', 'name': 'Colorado', 's...",,2021-07-10,
6,United States,Connecticut,Conn.,USA-09,https://www.nytimes.com/interactive/2021/us/co...,"{'total': {'cases': 349969, 'deaths': 8279}, '...",Connecticut,"{'raw': {'cases': 8.351648351648343, 'deaths':...",3565287,3565287,Connecticut,state,us/connecticut-covid-cases,"{'us_state_fips': '09', 'name': 'Connecticut',...",,2021-07-10,
7,United States,Delaware,Del.,USA-10,https://www.nytimes.com/interactive/2021/us/de...,"{'total': {'cases': 110019, 'deaths': 1695}, '...",Delaware,"{'raw': {'cases': 34.108527131782964, 'deaths'...",973764,973764,Delaware,state,us/delaware-covid-cases,"{'us_state_fips': '10', 'name': 'Delaware', 's...",,2021-07-10,
8,United States,"Washington, D.C.",D.C.,USA-11,,"{'total': {'cases': 49455, 'deaths': 1143}, 'a...","Washington, D.C.","{'raw': {'cases': 24.193548387096776, 'deaths'...",705749,705749,District of Columbia,state,us/washington-district-of-columbia-covid-cases,"{'us_state_fips': '11', 'name': 'District of C...",,2021-07-10,
9,United States,Florida,Fla.,USA-12,https://www.nytimes.com/interactive/2021/us/fl...,"{'total': {'cases': 2361360, 'deaths': 38157},...",Florida,"{'raw': {'cases': 114.9438812454743, 'deaths':...",21477737,21477737,Florida,state,us/florida-covid-cases,"{'us_state_fips': '12', 'name': 'Florida', 'sh...",,2021-07-10,


# Washington Post Data

In [9]:
df = pd.read_csv('https://www.washingtonpost.com/business/interactive/2021/dejoy-usps-delays-by-zip-code-map/usps.csv')

In [10]:
df

Unnamed: 0,zip3,avg_days_cur,avg_days_new,avg_days_diff
0,5,2.665517,3.103653,-0.438135
1,10,2.420418,2.778212,-0.357794
2,11,2.411635,2.745268,-0.333633
3,12,2.439943,2.809153,-0.369210
4,13,2.422443,2.835077,-0.412634
...,...,...,...,...
899,990,2.737138,3.482650,-0.745512
900,991,2.730945,3.445692,-0.714747
901,992,2.724143,3.484935,-0.760792
902,993,2.790638,3.527815,-0.737178


### "Secret" APIs
Shopping websites are good candidates for secret APIs, such as [Target](www.target.com)

Goal: Identify price and ratings of specific items on a shopping site

How: with two APIs

- First endpoint: list of IDs with the name of the product (search: plp_search_v1) 
- Second endpoint: list of categories with an ID of categories – each row has the ID for the products under that category

--> Take the IDs from the first endpoint and plug them into the second endpoint


#### Target's Search API

In [11]:
# 1. search API – to find the strucutre of the API request (meta info)
# 2. from there tweek API to get the information that we want 

In [12]:
# Open the networks tab
# right click -> Inspect
# -> Networks
# -> XHR tab
# Look for clues in the name
# v1, v2 - common in API
# ? - signals parameters, seperates the endpoint fromt the parameters
# Work backwards: go to the preview tab to find the content you’re looking for first

In [13]:
# search for an item with the networks tab open to ID which APIs you can use
# e.g. paper plates
# parse the URL so it's easier to read

In [14]:
parsed_url = urlparse('https://redsky.target.com/redsky_aggregations/v1/web/plp_search_v1?key=ff457966e64d5e877fdbad070f276d18ecec4a01&channel=WEB&count=24&default_purchasability_filter=true&include_sponsored=true&keyword=paper+plates&offset=0&page=%2Fs%2Fpaper+plates&platform=desktop&pricing_store_id=2850&scheduled_delivery_store_id=2850&store_ids=2850%2C1849%2C3284%2C3229%2C3249&useragent=Mozilla%2F5.0+%28Macintosh%3B+Intel+Mac+OS+X+10_15_7%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F91.0.4472.114+Safari%2F537.36&visitor_id=017A71BED83F0201BCBD154FC5FC4C74')

In [15]:
# check the parsed URL
parsed_url

ParseResult(scheme='https', netloc='redsky.target.com', path='/redsky_aggregations/v1/web/plp_search_v1', params='', query='key=ff457966e64d5e877fdbad070f276d18ecec4a01&channel=WEB&count=24&default_purchasability_filter=true&include_sponsored=true&keyword=paper+plates&offset=0&page=%2Fs%2Fpaper+plates&platform=desktop&pricing_store_id=2850&scheduled_delivery_store_id=2850&store_ids=2850%2C1849%2C3284%2C3229%2C3249&useragent=Mozilla%2F5.0+%28Macintosh%3B+Intel+Mac+OS+X+10_15_7%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F91.0.4472.114+Safari%2F537.36&visitor_id=017A71BED83F0201BCBD154FC5FC4C74', fragment='')

In [16]:
# -- > first three form the endpoint: scheme, netloc, path

In [17]:
# breakt down the url 
# see what url looks like to manipulate it 
# format the endpoint and parameters

In [18]:
# 1. endpoint
# check the first parameters
 
parsed_url[0]

'https'

In [19]:
parsed_url[1]

'redsky.target.com'

In [20]:
target_search_endpoint = parsed_url[0] + "://" + parsed_url[1] + parsed_url[2]

In [21]:
target_search_endpoint

'https://redsky.target.com/redsky_aggregations/v1/web/plp_search_v1'

In [22]:
# other way: copy & paste everything from the search url until the "?"

In [23]:
# where are the parameters?

parsed_url[4]

'key=ff457966e64d5e877fdbad070f276d18ecec4a01&channel=WEB&count=24&default_purchasability_filter=true&include_sponsored=true&keyword=paper+plates&offset=0&page=%2Fs%2Fpaper+plates&platform=desktop&pricing_store_id=2850&scheduled_delivery_store_id=2850&store_ids=2850%2C1849%2C3284%2C3229%2C3249&useragent=Mozilla%2F5.0+%28Macintosh%3B+Intel+Mac+OS+X+10_15_7%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F91.0.4472.114+Safari%2F537.36&visitor_id=017A71BED83F0201BCBD154FC5FC4C74'

In [24]:
# key value parameters are sperated by "&"

In [25]:
parsed_url[4].split('&')

# = list of values

['key=ff457966e64d5e877fdbad070f276d18ecec4a01',
 'channel=WEB',
 'count=24',
 'default_purchasability_filter=true',
 'include_sponsored=true',
 'keyword=paper+plates',
 'offset=0',
 'page=%2Fs%2Fpaper+plates',
 'platform=desktop',
 'pricing_store_id=2850',
 'scheduled_delivery_store_id=2850',
 'store_ids=2850%2C1849%2C3284%2C3229%2C3249',
 'useragent=Mozilla%2F5.0+%28Macintosh%3B+Intel+Mac+OS+X+10_15_7%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F91.0.4472.114+Safari%2F537.36',
 'visitor_id=017A71BED83F0201BCBD154FC5FC4C74']

In [26]:
# create an empty dictionary
# loop through list
# seperate key values paris by "="

params = {}

for parameter in parsed_url[4].split('&'):
    print(parameter.split('='))   

['key', 'ff457966e64d5e877fdbad070f276d18ecec4a01']
['channel', 'WEB']
['count', '24']
['default_purchasability_filter', 'true']
['include_sponsored', 'true']
['keyword', 'paper+plates']
['offset', '0']
['page', '%2Fs%2Fpaper+plates']
['platform', 'desktop']
['pricing_store_id', '2850']
['scheduled_delivery_store_id', '2850']
['store_ids', '2850%2C1849%2C3284%2C3229%2C3249']
['useragent', 'Mozilla%2F5.0+%28Macintosh%3B+Intel+Mac+OS+X+10_15_7%29+AppleWebKit%2F537.36+%28KHTML%2C+like+Gecko%29+Chrome%2F91.0.4472.114+Safari%2F537.36']
['visitor_id', '017A71BED83F0201BCBD154FC5FC4C74']


In [27]:
# key ist only the thing on the left side of '=' --> [0]

params = {}
for parameter in parsed_url[4].split('&'):
    key_value = parameter.split('=')
    params[key_value[0]] = key_value[1]

In [93]:
# alternative way to break down a query:

params = parse_qs(parsed_url[4])

In [94]:
params

{'key': ['ff457966e64d5e877fdbad070f276d18ecec4a01'],
 'channel': ['WEB'],
 'count': ['24'],
 'default_purchasability_filter': ['true'],
 'include_sponsored': ['true'],
 'keyword': ['paper plates'],
 'offset': ['0'],
 'page': ['/s/paper plates'],
 'platform': ['desktop'],
 'pricing_store_id': ['2850'],
 'scheduled_delivery_store_id': ['2850'],
 'store_ids': ['2850,1849,3284,3229,3249'],
 'useragent': ['Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'],
 'visitor_id': ['017A71BED83F0201BCBD154FC5FC4C74']}

In [50]:
# try to request something
# look at dev tab to see if get or post request
# combine the endpoint we found with the params we found
# using requests.get( , params= )

In [51]:
response = requests.get(target_search_endpoint, params=params)

In [52]:
response

<Response [200]>

In [54]:
# response.text

In [56]:
response.json()

{'data': {'search': {'search_suggestions': ['plastic utensils',
    'valentines paper plates',
    'colored paper plates',
    'paper ice cream bowls',
    'light pink plastic plates',
    'white plastic plates',
    'birthday paper plates',
    'bridal shower paper plates',
    'baby shower paper plates',
    'rainbow party plates',
    'red plastic plates',
    'light pink paper plates',
    'brown paper plates',
    'paper plates napkins',
    'gold plastic plates',
    'pastel party plates',
    'pastel plastic plates',
    'paper plates for wedding',
    'wedding plastic plates',
    'elegant paper dinner napkins'],
   'search_recommendations': {'related_categories': [], 'related_queries': []},
   'search_response': {'facet_list': [{'name': 'd_categorytaxonomy',
      'type': 'url',
      'display_name': 'Category',
      'expand': True,
      'details': [{'display_name': 'Household Essentials',
        'url': 'keyword=paper%2Bplates&sort_by=relevance&count=24&offset=0&category=5x

In [58]:
response.json().keys()

dict_keys(['data'])

In [62]:
response.json()['data'].keys()

dict_keys(['search'])

In [65]:
response.json()['data']['search'].keys()

dict_keys(['search_suggestions', 'search_recommendations', 'search_response', 'products'])

In [68]:
response.json()['data']['search']['products'][0]

{'__typename': 'ProductSummary',
 'tcin': '75666853',
 'original_tcin': '75666853',
 'item': {'relationship_type': 'Stand Alone',
  'relationship_type_code': 'SA',
  'merchandise_classification': {'class_id': 5, 'department_id': 253},
  'eligibility_rules': {'add_on': {'is_active': True},
   'scheduled_delivery': {'is_active': True}},
  'enrichment': {'buy_url': 'https://www.target.com/p/line-plaid-paper-plate-8-5-34-90ct-up-38-up-8482/-/A-75666853',
   'images': {'primary_image_url': 'https://target.scene7.com/is/image/Target/GUEST_39b91919-bb96-44a4-a419-2257cfd40fc5',
    'alternate_image_urls': ['https://target.scene7.com/is/image/Target/GUEST_8ffaba71-1687-4107-9e3d-c49036c358ed']}},
  'dpci': '253-05-0356',
  'cart_add_on_threshold': 35.0,
  'product_description': {'title': 'Line Plaid Paper Plate 8.5&#34; - 90ct - up &#38; up&#8482;',
   'bullet_descriptions': ['<B>Features:</B> Round (shape)',
    '<B>Dimensions (Overall):</B> 8.55 Inches (L), 8.55 Inches (W)',
    '<B>Package 

In [69]:
# change something in the parameters (like keyword)
params['keyword'] = 'paper+cups'

In [70]:
response = requests.get(target_search_endpoint, params=params)

In [71]:
response.json()['data']['search']['products'][0]

{'__typename': 'ProductSummary',
 'tcin': '12970172',
 'original_tcin': '12970172',
 'item': {'relationship_type': 'Stand Alone',
  'relationship_type_code': 'SA',
  'merchandise_classification': {'class_id': 5, 'department_id': 253},
  'eligibility_rules': {'add_on': {'is_active': True},
   'scheduled_delivery': {'is_active': True}},
  'enrichment': {'buy_url': 'https://www.target.com/p/dixie-everyday-assorted-designs-cold-cups-54ct-9oz/-/A-12970172',
   'images': {'primary_image_url': 'https://target.scene7.com/is/image/Target/GUEST_9b4d6235-6e2e-4c7c-a541-6babba8eec63',
    'alternate_image_urls': ['https://target.scene7.com/is/image/Target/GUEST_d2d66bf2-7f60-444c-a286-16f29b008b5f',
     'https://target.scene7.com/is/image/Target/GUEST_4f0f4c6a-4310-4db5-b9ff-f5ef64671679',
     'https://target.scene7.com/is/image/Target/GUEST_5e6b372c-c099-4e8b-93b0-8611f6580497']},
   'videos': [{'is_list_page_eligible': False,
     'video_files': [{'mime_type': 'video/mp4',
       'video_url': 

#### Target's aggregation API

In [76]:
# Look for an API that only pulls item information
# wesbite > categorie > subcategory 
# search for similar API to the one before
# look sor tcin number we saw in the API above
# --> list of tcins in the "query string parameters" in the network tab on the side

In [77]:
# parse the URL so it's easier to read
target_list = urlparse('https://redsky.target.com/redsky_aggregations/v1/web/plp_fulfillment_v1?key=ff457966e64d5e877fdbad070f276d18ecec4a01&tcins=81107269%2C81068829%2C14135567%2C81068792%2C82079503%2C81829962%2C81068790%2C81506339%2C80935950%2C81107259%2C81068797%2C11069188%2C81506334%2C81107271%2C81068773%2C81180792%2C81107267%2C81068789%2C81068796%2C81506336%2C81107268%2C81068821%2C81564691%2C81953908%2C81068815%2C81068825%2C81068787%2C81564688&store_id=2850&zip=11201&state=NY&latitude=40.690&longitude=-74.000&scheduled_delivery_store_id=2850')

In [78]:
# check the parsed URL
target_list

ParseResult(scheme='https', netloc='redsky.target.com', path='/redsky_aggregations/v1/web/plp_fulfillment_v1', params='', query='key=ff457966e64d5e877fdbad070f276d18ecec4a01&tcins=81107269%2C81068829%2C14135567%2C81068792%2C82079503%2C81829962%2C81068790%2C81506339%2C80935950%2C81107259%2C81068797%2C11069188%2C81506334%2C81107271%2C81068773%2C81180792%2C81107267%2C81068789%2C81068796%2C81506336%2C81107268%2C81068821%2C81564691%2C81953908%2C81068815%2C81068825%2C81068787%2C81564688&store_id=2850&zip=11201&state=NY&latitude=40.690&longitude=-74.000&scheduled_delivery_store_id=2850', fragment='')

In [82]:
# Way 1:
# create a function that pulls out the endpoint and key value pairs (like above)
# def to define a function
# use more general variable names to make the variables replacable

# def parse_target_url(url):
#    params = {}
#    for key_value in url[4].split('&'):
#        key_value_list = key_value.split('=')
#        params[key_value_list[0]] = key_value_list[1]     
#    return params

In [83]:
# target_aggregate_params = parse_target_url(parsed_url)

In [84]:
# target_aggregate_params

In [None]:
# target_aggregate_endpoint = parsed_url[0] # '://' + parsed_url[1] + parsed_url[2]

In [85]:
# Way 2:

target_list_endpoint = target_list[0] + '://' + target_list[1] + target_list[2]

target_list_params = {}

for parameter in target_list[4].split('&'):
    key_value = parameter.split('=')
    target_list_params[key_value[0]] = key_value[1]

In [86]:
# change something in the parameters (like tcins)
# e.g. one tcin from the list we got from the other API
target_list_params['tcins'] = '81107269'

In [87]:
# get request with endpoint and params
target_list_r = requests.get(target_list_endpoint, params=target_list_params)

In [89]:
# drill down the json file
target_list_r.json()['data']['product_summaries']

[{'__typename': 'ProductSummary',
  'tcin': '81107269',
  'fulfillment': {'product_id': '81107269',
   'is_out_of_stock_in_all_store_locations': False,
   'shipping_options': {'availability_status': 'IN_STOCK',
    'loyalty_availability_status': 'IN_STOCK',
    'available_to_promise_quantity': 248.0,
    'minimum_order_quantity': 1.0,
    'services': [{'shipping_method_id': 'STANDARD',
      'min_delivery_date': '2021-07-16',
      'max_delivery_date': '2021-07-16',
      'is_two_day_shipping': False,
      'is_base_shipping_method': True,
      'service_level_description': 'Standard Shipping',
      'shipping_method_short_description': 'Standard',
      'cutoff': '2021-07-12T17:00:00Z'}]},
   'store_options': [{'location_name': 'Brooklyn Fulton St',
     'location_address': '445 Albee Square West,BROOKLYN,NY,11201-3016',
     'location_id': '2850',
     'search_response_store_type': 'PRIMARY',
     'order_pickup': {'availability_status': 'UNAVAILABLE',
      'reason_code': 'IN_ELIGIBL

In [90]:
# drill down some more
target_list_r.json()['data']['product_summaries'][0]

{'__typename': 'ProductSummary',
 'tcin': '81107269',
 'fulfillment': {'product_id': '81107269',
  'is_out_of_stock_in_all_store_locations': False,
  'shipping_options': {'availability_status': 'IN_STOCK',
   'loyalty_availability_status': 'IN_STOCK',
   'available_to_promise_quantity': 248.0,
   'minimum_order_quantity': 1.0,
   'services': [{'shipping_method_id': 'STANDARD',
     'min_delivery_date': '2021-07-16',
     'max_delivery_date': '2021-07-16',
     'is_two_day_shipping': False,
     'is_base_shipping_method': True,
     'service_level_description': 'Standard Shipping',
     'shipping_method_short_description': 'Standard',
     'cutoff': '2021-07-12T17:00:00Z'}]},
  'store_options': [{'location_name': 'Brooklyn Fulton St',
    'location_address': '445 Albee Square West,BROOKLYN,NY,11201-3016',
    'location_id': '2850',
    'search_response_store_type': 'PRIMARY',
    'order_pickup': {'availability_status': 'UNAVAILABLE',
     'reason_code': 'IN_ELIGIBLE'},
    'in_store_onl

In [98]:
# now that we know how to get one
# loop through all the IDs in the ID list from the first API

target_id_list = response.json()['data']['search']['products']

In [103]:
# test: print out all the tcin

for target in target_id_list:
    print(target['tcin'])

12970172
75557224
82242697
15723662
75557230
75557221
75668350
52013769
12969897
13493362
80959323
75668349
80798429
14736274
75668376
17133601
16485137
77332725
82439551
80798426
16485140
17133628
76341872
81644374
52917582


In [105]:
# replace the tcin each time it loops through the list 
# change of tcin is saved through 'taret_list_params'
# save the result in an empty list
 
target_goal = []    
    
for target in target_id_list:
    target_list_params['tcin'] = target['tcin']
    target_list_r = requests.get(target_list_endpoint, params=target_list_params)
    target_goal.append(target_list_r.json())

In [106]:
# see how many results we have

len(target_goal)

25

In [107]:
target_goal

[{'data': {'product_summaries': [{'__typename': 'ProductSummary',
     'tcin': '81107269',
     'fulfillment': {'product_id': '81107269',
      'is_out_of_stock_in_all_store_locations': False,
      'shipping_options': {'availability_status': 'IN_STOCK',
       'loyalty_availability_status': 'IN_STOCK',
       'available_to_promise_quantity': 234.0,
       'minimum_order_quantity': 1.0,
       'services': [{'shipping_method_id': 'STANDARD',
         'min_delivery_date': '2021-07-16',
         'max_delivery_date': '2021-07-16',
         'is_two_day_shipping': False,
         'is_base_shipping_method': True,
         'service_level_description': 'Standard Shipping',
         'shipping_method_short_description': 'Standard',
         'cutoff': '2021-07-12T17:00:00Z'}]},
      'store_options': [{'location_name': 'Brooklyn Fulton St',
        'location_address': '445 Albee Square West,BROOKLYN,NY,11201-3016',
        'location_id': '2850',
        'search_response_store_type': 'PRIMARY',
   

In [108]:
target_goal_2=[]

for item in target_goal:
    target_goal_2.append(item['data'])

In [109]:
target_goal_2

[{'product_summaries': [{'__typename': 'ProductSummary',
    'tcin': '81107269',
    'fulfillment': {'product_id': '81107269',
     'is_out_of_stock_in_all_store_locations': False,
     'shipping_options': {'availability_status': 'IN_STOCK',
      'loyalty_availability_status': 'IN_STOCK',
      'available_to_promise_quantity': 234.0,
      'minimum_order_quantity': 1.0,
      'services': [{'shipping_method_id': 'STANDARD',
        'min_delivery_date': '2021-07-16',
        'max_delivery_date': '2021-07-16',
        'is_two_day_shipping': False,
        'is_base_shipping_method': True,
        'service_level_description': 'Standard Shipping',
        'shipping_method_short_description': 'Standard',
        'cutoff': '2021-07-12T17:00:00Z'}]},
     'store_options': [{'location_name': 'Brooklyn Fulton St',
       'location_address': '445 Albee Square West,BROOKLYN,NY,11201-3016',
       'location_id': '2850',
       'search_response_store_type': 'PRIMARY',
       'order_pickup': {'availa

In [111]:
target_goal_2[0]

{'product_summaries': [{'__typename': 'ProductSummary',
   'tcin': '81107269',
   'fulfillment': {'product_id': '81107269',
    'is_out_of_stock_in_all_store_locations': False,
    'shipping_options': {'availability_status': 'IN_STOCK',
     'loyalty_availability_status': 'IN_STOCK',
     'available_to_promise_quantity': 234.0,
     'minimum_order_quantity': 1.0,
     'services': [{'shipping_method_id': 'STANDARD',
       'min_delivery_date': '2021-07-16',
       'max_delivery_date': '2021-07-16',
       'is_two_day_shipping': False,
       'is_base_shipping_method': True,
       'service_level_description': 'Standard Shipping',
       'shipping_method_short_description': 'Standard',
       'cutoff': '2021-07-12T17:00:00Z'}]},
    'store_options': [{'location_name': 'Brooklyn Fulton St',
      'location_address': '445 Albee Square West,BROOKLYN,NY,11201-3016',
      'location_id': '2850',
      'search_response_store_type': 'PRIMARY',
      'order_pickup': {'availability_status': 'UNAV

## Using sessions to login
### Accessing password-protected pages
[Sessions object - request library](https://docs.python-requests.org/en/master/user/advanced/#session-objects)

In [112]:
# open up a session so that your login credentials are saved

session = requests.Session()

In [41]:
with open('../config/config.json') as json_file:
    config = json.load(json_file)

In [42]:
# check the website for the login parameters

payload = {
    'username':'katiemarriner',
    'password': config['atom_password'],
}

In [43]:
# post the payload to the site to login with the correct log in endpoint

s = session.post("https://atom.finance/session/signin", data=payload)

In [44]:
# check credentials to see if successful

s.text

In [45]:
# look at an example page to get you started with a query

payload = {
    "variables":{"symbol":"SPY"},
    "query": "query getETFProfile($symbol: String!) {\n  etfProfile(symbol: $symbol) {\n    id\n    issuer\n    description\n    }\n}\n"
}

In [46]:
# Navigate to the next page and scrape the data
# json.payload --> without + s.text --> error message: "Variables are invalid JSON"
# --> search for solution

s = session.post('https://atom.finance/graphql', json=payload)

In [48]:
# check to see what is returned

s.text