In [2]:
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
import requests
from selenium.webdriver.chrome.options import Options


BASE_URL = "https://www.leonardoshoes.com"
CATEGORIES = ["/collections/donna-calzature-mocassini",
              "/collections/donna-calzature-decolte-tacchi",
              "/collections/donna-calzature-sandali",
              "/collections/donna-calzature-mules",
              "/collections/donna-calzature-ballerine",
              "/collections/donna-calzature-stivali-tronchetti",
              "/collections/donna-calzature-sneakers", 
              "/collections/donna-calzature-sandali",
              "/collections/donna-calzature-sandali?page=2"
              "/collections/donna-sandali-tacco-alto",
              "/collections/donna-sandali-tacco-basso",
              "/collections/slingback",
              "/collections/mocassini-da-barca",
              "/collections/polacchine-donna-in-pelle-artigianali"
              ]

################################################# N O T E S #####################################################
#                    Requests library doesn't work on this website, use webdriver instead                       #
#             Pages with more than 100 products need to be scraped with suffix ?page={number_of_page}           #
#                  There are product names containing "," which can interfere with csv export                   #
#################################################################################################################

In [3]:
# Initialize dataframe to contain all the shoes details
df_leonardo = pd.DataFrame(columns = ["nome", "categoria", "prezzo", "tacco", "taglie", "colore", "link"])

# Cicle through category pages
for CAT in CATEGORIES:
    url = BASE_URL + CAT
    options = Options()
    options.add_argument("--headless")
    driver = webdriver.Chrome(options)
    driver.get(url)
    page = driver.page_source
    driver.quit()

    bs = BeautifulSoup(page, 'html.parser')
    links = bs.find_all("div", class_ = "h4 spf-product-card__title")
    l_shoe_links = []

    # Cycle for taking the link, put into links
    for link in links:
        l_shoe_links.append(BASE_URL + link.a["href"])
    
    # Cycle through every shoe's page
    for shoe_link in l_shoe_links:
        print(shoe_link)
        response = requests.get(shoe_link)
        bs_shoe = BeautifulSoup(response.content, 'html.parser') 
        product_name = bs_shoe.find("h1").text.replace("," , "") # Ci sono prodotti con una virgola, danno fastidio al csv
        price = bs_shoe.find("span", class_ = "price price--large").text.split("vendita")[1] if bs_shoe.find("span", class_ = "price price--large") else bs_shoe.find("span", class_ = "price price--highlight price--large").text.split("vendita")[1]
        temp = bs_shoe.find_all("label", class_ = "block-swatch__item")
        sizes = []

        # Get all sizes as string and append in list
        for taglia in temp:
            sizes.append(taglia.text)

        # Join all sizes with a " " as separator to obtain all sizes as one string
        sizes = " ".join(sizes)
        bs_details = bs_shoe.find_all("div", class_ = "product-tabs__tab-item-content rte")[1]
        details = bs_details.find_all("p")
        heel = details[6].text.replace(" cm", "").split(": ")[1] if len(details) > 6 else "NaN"
        color = details[0].text.split(": ")[1]
        
        # Create a new row with all the scraped data
        row = pd.Series({
                        "nome": product_name, 
                        "categoria": CAT,
                        "prezzo": price, 
                        "tacco": heel, 
                        "taglie": sizes, 
                        "colore": color,
                        "link": shoe_link})
        df = pd.DataFrame(row).T
            
        # Add new row to dataframe
        df_leonardo = pd.concat([df_leonardo, df])

https://www.leonardoshoes.com/collections/donna-calzature-mocassini/products/mocassino-da-donna-in-pelle-scamosciata-con-morsetto-colore-viola
https://www.leonardoshoes.com/collections/donna-calzature-mocassini/products/mocassino-da-donna-in-pelle-scamosciata-con-morsetto-colore-turchese-1
https://www.leonardoshoes.com/collections/donna-calzature-mocassini/products/mocassino-da-donna-in-pelle-scamosciata-con-morsetto-colore-fucsia
https://www.leonardoshoes.com/collections/donna-calzature-mocassini/products/mocassino-da-donna-in-pelle-scamosciata-con-morsetto-colore-blu
https://www.leonardoshoes.com/collections/donna-calzature-mocassini/products/mocassino-da-donna-in-pelle-scamosciata-con-morsetto-colore-bu
https://www.leonardoshoes.com/collections/donna-calzature-mocassini/products/mocassino-da-donna-in-pelle-scamosciata-con-morsetto-colore-beige
https://www.leonardoshoes.com/collections/donna-calzature-mocassini/products/mocassino-da-donna-in-pelle-scamosciata-con-morsetto-colore-marr

In [4]:
df_leonardo.to_csv("leonardo.csv")

#### Prima di aprire pulire su Excel con PQ, exportare in .csv di nuovo ed eventualmente sostituire tutti i ";" con "," se il .csv viene esportato con i ";" al posto delle ","

In [6]:
df_leonardo = pd.read_csv("leonardo_pulito.csv")
df_leonardo

Unnamed: 0,nome,categoria,prezzo,tacco,taglie,colore,link
0,Ballerina casual da donna in pelle liscia colo...,Ballerine,€139.00,1,35 36 37 38 39 40 41,Bianco,https://www.leonardoshoes.com/collections/donn...
1,Ballerine da donna color nero in pelle scamosc...,Ballerine,€99.00,1,35 36 37 38 39 40 41,Nero,https://www.leonardoshoes.com/collections/donn...
2,Ballerine da donna color celeste in pelle scam...,Ballerine,€99.00,1,35 36 37 38 39 40 41,Celeste,https://www.leonardoshoes.com/collections/donn...
3,Ballerine da donna color lilla in pelle scamos...,Ballerine,€99.00,1,35 36 37 38 39 40 41,Lilla,https://www.leonardoshoes.com/collections/donn...
4,Ballerine da donna color verde in pelle scamos...,Ballerine,€99.00,1,35 36 37 38 39 40 41,Verde,https://www.leonardoshoes.com/collections/donn...
...,...,...,...,...,...,...,...
568,Sandalo chiuso tacco medio da donna a punta in...,Sandali,€135.00,3,35 36 37 38 39 40 41,Verde,https://www.leonardoshoes.com/collections/donn...
569,Sandalo chiuso tacco medio da donna a punta in...,Sandali,€135.00,3,35 36 37 38 39 40 41,Nero,https://www.leonardoshoes.com/collections/donn...
570,Sandalo aperto da donna con tacco basso in pel...,Sandali,€135.00,4,35 36 37 38 39 40 41,Beige,https://www.leonardoshoes.com/collections/donn...
571,Sandali Arcadia da donna stile romano antico i...,Sandali,€99.00,1,35 36 37 38 39 40 41,Nero,https://www.leonardoshoes.com/collections/donn...
