In [35]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:81.0) Gecko/20100101 Firefox/81.0',
    'Accept': 'image/webp,*/*',
    'Accept-Language': 'fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3',
    'Connection': 'keep-alive',
    'Pragma': 'no-cache',
    'Cache-Control': 'no-cache',
}

def get_individual_shoe_links():
    home_links = ['https://us.puma.com/us/en/women/shoes?offset=24', 'https://us.puma.com/us/en/men/mens-shoes-and-sneakers?offset=24']
    links=set()
    for link in home_links:
        response = requests.get(link, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')
        for i in soup.find_all('li',attrs={'data-test-id':"product-list-item"}):
            link = "https://us.puma.com"+i.find('a')['href']
            link = link.split("?")[0]
            links.add(link)
    return links


def get_individual_shoe_details(link):
    def get_shoe_name(soup): #
        return soup.select_one('#pdp-product-title').text.strip()

    def get_category(soup): # 
        return "Women" if "women" in soup.select_one('#pdp-product-title').text.strip().lower() else "Men"

    def get_style_or_product_code(soup): #
        product_description_div = soup.find('div', {'data-test-id': 'pdp-product-description'})
        info_list = product_description_div.find('ul', {'data-uds-child': 'text'}).find_all('li')
        style = info_list[0].text.split(':')[1].strip()
        return style

    def get_reviews(soup): #
        product_reviews_div = soup.find('div', {'id': 'product-reviews'})
        reviews_heading = product_reviews_div.find('h2')
        review_count = reviews_heading.text.split('(')[-1].split(')')[0].strip()
        return review_count

    def get_sizes(soup): #
        size_picker_div = soup.find('div', {'id': 'size-picker'})
        if not size_picker_div:
            return []
        size_spans = size_picker_div.find_all('label', {'data-size': True})
        return [span.find(class_='text-sm').text for span in size_spans]

    def get_colors(soup): # 
        style_picker_div = soup.find('div', {'id': 'style-picker'})
        img_tags = style_picker_div.find_all('img')
        alt_values = [img.get('alt') for img in img_tags]
        return alt_values

    def get_star_count(soup): #  
        product_reviews_div = soup.find('div', {'id': 'product-reviews'})
        star_count_span = product_reviews_div.find('span', {'class': 'sr-only'})
        star_count = star_count_span.text.split(':')[-1].strip().split('/')[0].strip()
        return star_count

    def get_price(soup): #
        return soup.select_one('.override\:md\:text-2xl').text.strip()

    def get_dimensions(soup): # 
        fitness = 0
        comfort = 0
        recommend = 0  
        return fitness, comfort, recommend
    print(f"trying {link}")
    link_detail = {"link": link, "brand": "puma"}
    response = requests.get(link, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    link_detail["shoe_name"] =  get_shoe_name(soup)
    link_detail["category"] = get_category(soup)
    link_detail["style_or_product_code"] = get_style_or_product_code(soup)
    link_detail["reviews"] = get_reviews(soup)
    available_sizes = get_sizes(soup)
    link_detail["number_of_sizes"] = len(available_sizes)
    link_detail["size"] = ",".join(available_sizes)
    colors_available = get_colors(soup)
    link_detail["number_of_colors"] = len(colors_available)
    link_detail["color_1"], link_detail["color_2"], link_detail["color_3"], link_detail["color_4"], link_detail["color_5"] = (colors_available + [None] * (5 - len(colors_available)))[:5]
    link_detail["stars"] = get_star_count(soup)
    link_detail["price"] = get_price(soup)
    link_detail["fitness"], link_detail["comfort"], link_detail["recommend"] = get_dimensions(soup)
    return link_detail
def get_puma_details():
    individual_shoe_links = get_individual_shoe_links()
    details = []
    for shoe_link in individual_shoe_links:
        details.append(get_individual_shoe_details(shoe_link))
    return details

puma_details = get_puma_details()
print(puma_details)

trying https://us.puma.com/us/en/pd/fenty-x-puma-creeper-phatty-womens-sneakers/399332
trying https://us.puma.com/us/en/pd/rs-xk-sneakers/392787
trying https://us.puma.com/us/en/pd/gv-special%2B-mens-sneakers/366613
trying https://us.puma.com/us/en/pd/suede-gum-mens-sneakers/381174
trying https://us.puma.com/us/en/pd/puma-x-lamelo-ball-mb-01-lo-mens-basketball-shoes/376941
trying https://us.puma.com/us/en/pd/rs-x-faded-daytona-sneakers/394981
trying https://us.puma.com/us/en/pd/future-ultimate-neymar-jr-fg-ag-mens-soccer-cleats/107610
trying https://us.puma.com/us/en/pd/mayze-classic-womens-sneakers/384209
trying https://us.puma.com/us/en/pd/suede-classic-xxi-sneakers/374915
trying https://us.puma.com/us/en/pd/cali-dream-west-coast-leather-womens-sneakers/392730
trying https://us.puma.com/us/en/pd/cali-womens-sneakers/369155
trying https://us.puma.com/us/en/pd/puma-x-lamelo-ball-mb-03-lafranc%C3%A9-mens-basketball-shoes/379233
trying https://us.puma.com/us/en/pd/rs-x-soft-womens-sneake

In [36]:
df1_columns = ["shoe_name", "category", "number_of_colors", "price"]
df2_columns = ["number_of_sizes", "color_1","color_2","color_3","color_4","color_5","style_or_product_code"]
df3_columns = ["reviews", "size", "comfort", "fitness", "recommend", "stars"]
puma_df1 = pd.DataFrame({key: d.get(key) for key in df1_columns} for d in puma_details)
puma_df2 = pd.DataFrame({key: d.get(key) for key in df2_columns} for d in puma_details)
puma_df3 = pd.DataFrame({key: d.get(key) for key in df3_columns} for d in puma_details)

print(puma_df1)
print("\n")

print(puma_df2)
print("\n")

print(puma_df3)
print("\n")

                                            shoe_name category  \
0        FENTY x PUMA Creeper Phatty Women's Sneakers    Women   
1                                      RS-XK Sneakers      Men   
2                          GV Special+ Men's Sneakers      Men   
3                            Suede Gum Men's Sneakers      Men   
4   PUMA x LAMELO BALL MB.01 Lo Men's Basketball S...      Men   
5                         RS-X Faded Daytona Sneakers      Men   
6   FUTURE ULTIMATE Neymar Jr FG/AG Men's Soccer C...      Men   
7                      Mayze Classic Women's Sneakers    Women   
8                          Suede Classic XXI Sneakers      Men   
9      Cali Dream West Coast Leather Women's Sneakers    Women   
10                              Cali Women's Sneakers    Women   
11  PUMA x LAMELO BALL MB.03 LaFrancé Men's Basket...      Men   
12                         RS-X Soft Women's Sneakers    Women   
13             FENTY x PUMA AVANTI L Women's Sneakers    Women   
14   PUMA 

In [None]:
all_df = pd.DataFrame(puma_details)
all_df.to_csv("puma_all.csv")

puma_df1.to_csv("puma_1.csv")
puma_df2.to_csv("puma_2.csv")
puma_df3.to_csv("puma_3.csv")
