In [39]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:81.0) Gecko/20100101 Firefox/81.0',
    'Accept': 'image/webp,*/*',
    'Accept-Language': 'fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3',
    'Connection': 'keep-alive',
    'Pragma': 'no-cache',
    'Cache-Control': 'no-cache',
}

def individual_shoe_links():
    nike_home_links=['https://www.nike.com/in/w/retro-running-8kemk','https://www.nike.com/in/w/womens-shoes-5e1x6zy7ok']
    shoe_links=set()
    for link in nike_home_links:
        response = requests.get(link, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        product_cards = soup.find_all('div',attrs={'class':'product-card__body'})
        for card in product_cards:
            shoe_links.add(card.find('a')['href'])
    return shoe_links


def get_individual_shoe_details(link):
    def get_shoe_name(soup):
        return soup.find('h1',attrs={"id":'pdp_product_title'}).text.strip()

    def get_category(soup):
        return "Women" if "women" in soup.find('h2').text.strip().lower() else "Men"

    def get_style_or_product_code(soup):
        return soup.select_one('.description-preview__style-color').text.strip()

    def get_reviews(data):
        return data['props']['pageProps']['initialState']['reviews']['total']

    def get_sizes(data):
        sizes = set()
        for _, details in data['props']['pageProps']['initialState']['Threads']['products'].items():
            for sku in details['skus']:
                sizes.add(sku['nikeSize'])
            break
        return sizes

    def get_colors(soup):
        colorway_divs = soup.find_all('div', class_='colorway-container')
        alt_texts = []
        for div in colorway_divs:
            img_tag = div.find('img')
            if img_tag and 'alt' in img_tag.attrs:
                alt_text = img_tag['alt']
                alt_texts.append(alt_text)
        return alt_texts

    def get_star_count(data): 
        return data['props']['pageProps']['initialState']['reviews']['averageRating']

    def get_price(data):
        price = ""
        for _, details in data['props']['pageProps']['initialState']['Threads']['products'].items():
            price = details['fullPrice']
            break
        return price

    def get_dimensions(data):
        fitness = 0
        comfort = 0
        recommend = 0
        for dim in data['props']['pageProps']['initialState']['reviews']["dimensions"]:
            if dim['label'] == "How did this product fit?":
                fitness = dim["average"]
            elif dim['label'] == "How comfortable was this product?":
                comfort = dim["average"]
            elif dim['label'] == "Would you recommend this product?":
                recommend = dim["average"]
        return fitness, comfort, recommend
    link_detail = {"link": link, "brand": "nike"}
    response = requests.get(link, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    data = json.loads(soup.find('script', id='__NEXT_DATA__').text)
    link_detail["shoe_name"] =  get_shoe_name(soup)
    link_detail["category"] = get_category(soup)
    link_detail["style_or_product_code"] = get_style_or_product_code(soup)
    link_detail["reviews"] = get_reviews(data)
    available_sizes = get_sizes(data)
    link_detail["number_of_sizes"] = len(available_sizes)
    link_detail["size"] = ",".join(available_sizes)
    colors_available = get_colors(soup)
    link_detail["number_of_colors"] = len(colors_available)
    link_detail["color_1"], link_detail["color_2"], link_detail["color_3"], link_detail["color_4"], link_detail["color_5"] = (colors_available + [None] * (5 - len(colors_available)))[:5]
    link_detail["stars"] = get_star_count(data)
    link_detail["price"] = get_price(data)
    link_detail["fitness"], link_detail["comfort"], link_detail["recommend"] = get_dimensions(data)
    return link_detail

def get_nike_details():
    shoe_links = individual_shoe_links()
    details = []
    for shoe_link in shoe_links:
        details.append(get_individual_shoe_details(shoe_link))
    return details


nike_details = get_nike_details()
print(nike_details)

[{'link': 'https://www.nike.com/in/t/cortez-23-shoes-1VrCHW/FB6877-001', 'brand': 'nike', 'shoe_name': 'Nike Cortez 23 Premium', 'category': 'Women', 'style_or_product_code': 'Style: FB6877-001', 'reviews': 0, 'number_of_sizes': 11, 'size': '6,5,9,9.5,7,6.5,7.5,8,10,5.5,8.5', 'number_of_colors': 0, 'color_1': None, 'color_2': None, 'color_3': None, 'color_4': None, 'color_5': None, 'stars': 0, 'price': 9695, 'fitness': None, 'comfort': None, 'recommend': None}, {'link': 'https://www.nike.com/in/t/p-6000-shoes-6l9ww7/FQ8243-025', 'brand': 'nike', 'shoe_name': 'Nike P-6000 Premium', 'category': 'Men', 'style_or_product_code': 'Style: FQ8243-025', 'reviews': 9, 'number_of_sizes': 13, 'size': '12,9,9.5,12.5,7,7.5,8,10,10.5,11.5,13,11,8.5', 'number_of_colors': 0, 'color_1': None, 'color_2': None, 'color_3': None, 'color_4': None, 'color_5': None, 'stars': 5, 'price': 10795, 'fitness': None, 'comfort': None, 'recommend': None}, {'link': 'https://www.nike.com/in/t/air-jordan-1-low-se-shoes-4K

In [40]:
df1_columns = ["shoe_name", "category", "number_of_colors", "price"]
df2_columns = ["number_of_sizes", "color_1","color_2","color_3","color_4","color_5","style_or_product_code"]
df3_columns = ["reviews", "size", "comfort", "fitness", "recommend", "stars"]
nike_df1 = pd.DataFrame({key: d.get(key) for key in df1_columns} for d in nike_details)
nike_df2 = pd.DataFrame({key: d.get(key) for key in df2_columns} for d in nike_details)
nike_df3 = pd.DataFrame({key: d.get(key) for key in df3_columns} for d in nike_details)

print(nike_df1)
print("\n")

print(nike_df2)
print("\n")

print(nike_df3)
print("\n")

                            shoe_name category  number_of_colors  price
0              Nike Cortez 23 Premium    Women                 0   9695
1                 Nike P-6000 Premium      Men                 0  10795
2                 Air Jordan 1 Low SE    Women                 2  10295
3                     Nike Cortez TXT    Women                 3   8495
4             Nike Air Force 1 '07 NN    Women                 0   9695
5               Nike Dunk Low LX NBHD    Women                 2  10795
6                           Nike Calm    Women                 3   5695
7                     Nike Air Humara      Men                 5  14995
8                 Nike P-6000 Premium      Men                 0  10795
9              Nike Court Vision Alta    Women                 5   5995
10                        Nike Cortez    Women                 5   7495
11                 Nike Zoom Vomero 5      Men                 2  14995
12               Nike Air Force 1 '07    Women                 0