# Getting data from nike.com
---

In [1]:
# Modules import
import pandas as pd
import requests as req
import json
import re

# Product page
uri = 'https://www.nike.com/w/mens-shoes-nik1zy7ok'
url = 'https://www.nike.com/gb/launch?s=upcoming'
base_url = 'https://api.nike.com'
session = req.Session()


# A function for retrieving lazily loaded products
def get_lazy_products(stub, products):
    response = session.get(base_url + stub).json()
    next_products = response['pages']['next']
    products += response['objects']
    if next_products:
        get_lazy_products(next_products, products)
    return products


# find INITIAL_REDUX_STATE
html_data = session.get(uri).text
redux = json.loads(
    re.search(r'window.INITIAL_REDUX_STATE=(\{.*?\});', html_data).group(1))

# find the initial products and the api entry point for the recursive loading of additional products
wall = redux['Wall']
initial_products = re.sub('anchor=[0-9]+', 'anchor=0',
                          wall['pageData']['next'])

# find all the products
products = get_lazy_products(initial_products, [])

# Optional: filter by id to get a list with unique products
cloudProductIds = set()
unique_products = []
for product in products:
    try:
        if not product['id'] in cloudProductIds:
            cloudProductIds.add(product['id'])
            unique_products.append(product)
    except KeyError:
        print(product)

## Trying out another method

In [2]:
# Importing modules
import lxml
import re
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
from requests import get

In [8]:
url = "https://www.nike.com/gb/launch?s=upcoming"

In [9]:
#Getting the html page of the website using requests.get(url) framework
webpage = get(url)

In [11]:
#Parsing data from the html page
soup = BeautifulSoup(webpage.content, 'lxml')

In [20]:
#Getting the element or tag that holds the movie content
content = soup.find(class_="ncss-container feed-container-inner")

In [22]:
shoes = content.find_all("figure", class_ = "d-md-h ncss-col-sm-12 va-sm-t pb0-sm prl0-sm")

In [21]:
content

<div class="ncss-container feed-container-inner" data-qa="product-wall"><section class="upcoming-section bg-white ncss-row prl2-md prl5-lg pb4-md pb6-lg" data-qa="upcoming-section"><figure class="pb2-sm va-sm-t ncss-col-sm-12 ncss-col-md-6 ncss-col-lg-4 pb4-md prl0-sm prl2-md ncss-col-sm-6 ncss-col-lg-3 pb4-md prl2-md pl0-md pr1-md d-sm-h d-md-ib"><div class="product-card ncss-row mr0-sm ml0-sm" data-qa="product-card-0"><div class="ncss-col-sm-12 full"><a aria-label="Air Force 1 Mid Jewel 'NYC Cool Grey' Release Date" class="card-link d-sm-b" data-qa="product-card-link" href="/gb/launch/t/air-force-1-mid-jewel-nyc-cool-grey"><div class="launch-time ta-sm-l d-sm-h d-md-b z10 mod-bg-grey pt6-sm pl6-sm"><div class="launch-caption ta-sm-c"><p class="headline-4" data-qa="test-startDate">Oct</p><p class="headline-1" data-qa="test-day">20</p></div></div><div style="position:absolute;top:0;right:0;bottom:0;left:0"></div></a><figcaption class="ncss-row"><div class="ncss-col-sm-12 full"><div cla

In [24]:
shoes

[<figure class="d-md-h ncss-col-sm-12 va-sm-t pb0-sm prl0-sm"><div class="bg-lightestgrey upcoming ncss-col-sm-12"><h2 class="headline-5 mr4-sm ml4-sm mt4-sm mb5-sm">20 Oct</h2></div><div class="upcoming upcoming-card ncss-row mr0-sm ml0-sm"><div class="ncss-col-sm-3 full"><a class="card-link d-sm-b" href="/gb/launch/t/air-force-1-mid-jewel-nyc-cool-grey"><img alt="image" class="image-component" src="https://secure-images.nike.com/is/image/DotCom/DH5622_001_A_PREM?$SNKRS_COVER_WD$&amp;align=0,1" style="opacity:0" title="image"/></a></div><a class="ncss-col-sm-8 launch-details u-full-height va-sm-t full" href="/gb/launch/t/air-force-1-mid-jewel-nyc-cool-grey"><div class="d-sm-t u-full-height ml3-sm"><div class="d-sm-tc va-sm-m"><h3 class="headline-5 mb1-sm fs16-sm"> <!-- -->Air Force 1 Mid Jewel</h3><h6 class="text-color-secondary mb-1-sm body-3"><div class="available-date-component">Available 10/20 at 7:00 AM</div></h6></div></div></a></div></figure>,
 <figure class="d-md-h ncss-col-sm

In [25]:
# Creating a class to extract data from the SNKRS webpage
class SNKRS:
    '''Creating a docstring for the webpage for SNKRS'''
    def __init__(self, url):
        self.soup = self.soup_gen(url)
        self.url_start = "https://www.nike.com"
        self.url_end = ""

    def soup_gen(self, url):
        snkrs_webpage = get(url)
        soup = BeautifulSoup(webpage.content, 'lxml')
        return soup

    def feed_content(self):
        content = soup.find(class_="ncss-container feed-container-inner")
        return content.find_all("figure", class_ = "d-md-h ncss-col-sm-12 va-sm-t pb0-sm prl0-sm")