# BOT AMAZON web scraping

## BEST SELLERS

In [2]:
import requests
from lxml import html
import pandas as pd
import time
import re
from datetime import datetime

def get_category_links(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return []

    tree = html.fromstring(response.content)
    categories = tree.xpath('//div[contains(@class, "_p13n-zg-nav-tree-all_style_zg-browse-group__88fbz")]//a')
    category_links = [{'category': cat.text_content().strip(), 'link': 'https://www.amazon.nl' + cat.get('href')} for cat in categories]

    return category_links

def extract_product_details(items, category):
    products = []
    # Obter a data de hoje
    today_date = datetime.today().strftime('%Y-%m-%d')

    for index, item in enumerate(items, start=1):
        # Produto Individual (data-asin está em um elemento pai)
        product_id = item.xpath('.//@data-asin')
        product_id = product_id[0] if product_id else "No ID"
        print(f"Product ID: {product_id}")

        # Posição na Lista
        position = item.xpath('.//span[@class="zg-bdg-text"]/text()')
        position = position[0].strip() if position else str(index)
        print(f"Posição: {position}")

        # Imagem do Produto
        image = item.xpath('.//img[contains(@class, "a-dynamic-image")]/@src')
        image_link = image[0] if image else "No image link"
        print(f"Image link: {image_link}")

        # Título e Link
        title = item.xpath('.//a/span/div/text()')
        title = title[0].strip() if title else "No title"
        print(f"Title: {title}")

        link = item.xpath('.//a[contains(@class, "a-link-normal")]/@href')
        product_link = "https://www.amazon.nl" + link[0] if link else "No product link"
        print(f"Product link: {product_link}")

        # Extrair Name
        name = item.xpath('.//a[@class="a-link-normal aok-block"]/@href')
        if name:
            name = name[0].split('/')[1]
        else:
            name = "No name"
        print(f"Name: {name}")

        # Avaliações
        rating = item.xpath('.//span[contains(@class, "a-icon-alt")]/text()')
        rating = rating[0].strip() if rating else "No rating"
        print(f"Rating: {rating}")

        reviews = item.xpath('.//span[@class="a-size-small"]/text()')
        reviews = reviews[0].strip() if reviews else "No reviews"
        print(f"Reviews: {reviews}")

        # Preço
        price = item.xpath('.//span[contains(@class, "p13n-sc-price")]/text()')
        if price:
            price = price[0].strip()
            # Separar símbolo e valor usando expressões regulares
            currency_symbol = ''.join(re.findall(r'[^\d.,]', price))
            value = ''.join(re.findall(r'[\d.,]+', price))
        else:
            currency_symbol = "Not Available"
            value = "Not Available"
        print(f"Currency Symbol: {currency_symbol}, Value: {value}")

        products.append({
            "category": category,
            "rank": position,
            "asin": product_id,
            "name": name,
            "title": title,
            "rating": rating,
            "reviews": reviews,
            "symbol": currency_symbol,
            "value": value,
            "image": image_link,
            "link": product_link,        
            "date": today_date
        })

    return products

def get_amazon_bestsellers(url, category, retries=5, backoff_factor=0.3):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    
    for i in range(retries):
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            break
        elif response.status_code == 503:
            print(f"Failed to retrieve the page. Status code: 503. Retrying {i+1}/{retries}...")
            time.sleep(backoff_factor * (2 ** i))  # Exponential backoff
        else:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            return []
    
    if response.status_code != 200:
        print(f"Failed to retrieve the page after {retries} retries.")
        return []
    
    tree = html.fromstring(response.content)
    items = tree.xpath('//div[contains(@id, "p13n-asin-index")]')
    print(f"Found {len(items)} items in category {category}.")
    
    products = extract_product_details(items, category)
    return products

def save_to_excel(products, directory_path):
    df = pd.DataFrame(products)
    today_date = datetime.today().strftime('%Y-%m-%d')
    filename = f"{directory_path}/bot_amazon_best_sellers_{today_date}.xlsx"
    df.to_excel(filename, index=False)
    print(f"Products saved to {filename}")

if __name__ == "__main__":
    base_url = "https://www.amazon.nl/gp/bestsellers/"
    category_links = get_category_links(base_url)

    all_products = []
    for category_link in category_links:
        category = category_link['category']
        link = category_link['link']
        print(f"Processing category: {category}")
        products = get_amazon_bestsellers(link, category)
        all_products.extend(products)
        time.sleep(10)  # Adicionando tempo de espera maior entre as requisições para melhor performance

    # Caminho do diretório onde o arquivo Excel será salvo
    directory_path = "C:/Users/ThiagoBizacha/Desktop/Projeto_Automacao_Coleta_Dados/data/output/bot_amazon"
    save_to_excel(all_products, directory_path)
    print("Finalizado!")


Processing category: Amazon Renewed
Found 30 items in category Amazon Renewed.
Product ID: B0B2XSLDL7
Posição: #1
Image link: https://images-eu.ssl-images-amazon.com/images/I/81UDF62AqHS._AC_UL300_SR300,200_.jpg
Title: Apple Magic Keyboard (voor 12,9‑inch iPad Pro - 5e generatie) - amerikansk engelsk - zwart (Refurbished)
Product link: https://www.amazon.nl/Apple-Magic-Keyboard-voor-9%E2%80%91inch/dp/B0B2XSLDL7/ref=zg_bs_g_amazon-renewed_d_sccl_1/258-8968264-0154149?psc=1
Name: Apple-Magic-Keyboard-voor-9%E2%80%91inch
Rating: 4,4 van 5 sterren
Reviews: 10
Currency Symbol: € , Value: 220,89
Product ID: B0CRZ8B5S5
Posição: #2
Image link: https://images-eu.ssl-images-amazon.com/images/I/71cRLJBWovL._AC_UL300_SR300,200_.jpg
Title: Dell Desktop RGB Gaming PC, Intel Quad Core I7 tot 3,9 GHz, GeForce GTX 750 Ti 4 GB GDDR5, 16 GB RAM, 512 GB SSD, WiFi 600 M, Bluetooth 5.0, W10P64 (Refurbished)
Product link: https://www.amazon.nl/Dell-Desktop-GeForce-Bluetooth-Refurbished/dp/B0CRZ8B5S5/ref=zg_b

## NEW RELEASES

In [4]:
import requests
from lxml import html
import pandas as pd
import time
import re
from datetime import datetime

def get_category_links(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return []

    tree = html.fromstring(response.content)
    categories = tree.xpath('//div[contains(@class, "_p13n-zg-nav-tree-all_style_zg-browse-group__88fbz")]//a')
    category_links = [{'category': cat.text_content().strip(), 'link': 'https://www.amazon.nl' + cat.get('href')} for cat in categories]

    return category_links

def extract_product_details(items, category):
    products = []
    # Obter a data de hoje
    today_date = datetime.today().strftime('%Y-%m-%d')

    for index, item in enumerate(items, start=1):
        # Produto Individual (data-asin está em um elemento pai)
        product_id = item.xpath('.//@data-asin')
        product_id = product_id[0] if product_id else "No ID"
        print(f"Product ID: {product_id}")

        # Posição na Lista
        position = item.xpath('.//span[@class="zg-bdg-text"]/text()')
        position = position[0].strip() if position else str(index)
        print(f"Posição: {position}")

        # Imagem do Produto
        image = item.xpath('.//img[contains(@class, "a-dynamic-image")]/@src')
        image_link = image[0] if image else "No image link"
        print(f"Image link: {image_link}")

        # Título e Link
        title = item.xpath('.//a/span/div/text()')
        title = title[0].strip() if title else "No title"
        print(f"Title: {title}")

        link = item.xpath('.//a[contains(@class, "a-link-normal")]/@href')
        product_link = "https://www.amazon.nl" + link[0] if link else "No product link"
        print(f"Product link: {product_link}")

        # Extrair Name
        name = item.xpath('.//a[@class="a-link-normal aok-block"]/@href')
        if name:
            name = name[0].split('/')[1]
        else:
            name = "No name"
        print(f"Name: {name}")

        # Avaliações
        rating = item.xpath('.//span[contains(@class, "a-icon-alt")]/text()')
        rating = rating[0].strip() if rating else "No rating"
        print(f"Rating: {rating}")

        reviews = item.xpath('.//span[@class="a-size-small"]/text()')
        reviews = reviews[0].strip() if reviews else "No reviews"
        print(f"Reviews: {reviews}")

        # Preço
        price = item.xpath('.//span[contains(@class, "p13n-sc-price")]/text()')
        if price:
            price = price[0].strip()
            # Separar símbolo e valor usando expressões regulares
            currency_symbol = ''.join(re.findall(r'[^\d.,]', price))
            value = ''.join(re.findall(r'[\d.,]+', price))
        else:
            currency_symbol = "Not Available"
            value = "Not Available"
        print(f"Currency Symbol: {currency_symbol}, Value: {value}")

        products.append({
            "category": category,
            "rank": position,
            "asin": product_id,
            "name": name,
            "title": title,
            "rating": rating,
            "reviews": reviews,
            "symbol": currency_symbol,
            "value": value,
            "image": image_link,
            "link": product_link,        
            "date": today_date
        })
        
    return products

def get_amazon_bestsellers(url, category, retries=5, backoff_factor=0.3):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    
    for i in range(retries):
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            break
        elif response.status_code == 503:
            print(f"Failed to retrieve the page. Status code: 503. Retrying {i+1}/{retries}...")
            time.sleep(backoff_factor * (2 ** i))  # Exponential backoff
        else:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            return []
    
    if response.status_code != 200:
        print(f"Failed to retrieve the page after {retries} retries.")
        return []
    
    tree = html.fromstring(response.content)
    items = tree.xpath('//div[contains(@id, "p13n-asin-index")]')
    print(f"Found {len(items)} items in category {category}.")
    
    products = extract_product_details(items, category)
    return products

def save_to_excel(products, directory_path):
    df = pd.DataFrame(products)
    today_date = datetime.today().strftime('%Y-%m-%d')
    filename = f"{directory_path}/bot_amazon_new_releases_{today_date}.xlsx"
    df.to_excel(filename, index=False)
    print(f"Products saved to {filename}")

if __name__ == "__main__":
    base_url = "https://www.amazon.nl/gp/new-releases/"
    category_links = get_category_links(base_url)

    all_products = []
    for category_link in category_links:
        category = category_link['category']
        link = category_link['link']
        print(f"Processing category: {category}")
        products = get_amazon_bestsellers(link, category)
        all_products.extend(products)
        time.sleep(10)  # Adicionando tempo de espera maior entre as requisições para melhor performance

    # Caminho do diretório onde o arquivo Excel será salvo
    directory_path = "C:/Users/ThiagoBizacha/Desktop/Projeto_Automacao_Coleta_Dados/data/output/bot_amazon"
    save_to_excel(all_products, directory_path)
    print("Finalizado!")


Processing category: Amazon Renewed
Found 18 items in category Amazon Renewed.
Product ID: B0D7MWP97P
Posição: #1
Image link: https://images-eu.ssl-images-amazon.com/images/I/410c51tHtvL._AC_UL300_SR300,200_.jpg
Title: soundcore by Anker Sleep Earbuds (Refurbished)
Product link: https://www.amazon.nl/soundcore-Anker-Sleep-Earbuds-Refurbished/dp/B0D7MWP97P/ref=zg_bsnr_g_amazon-renewed_d_sccl_1/258-5790110-6321151?psc=1
Name: soundcore-Anker-Sleep-Earbuds-Refurbished
Rating: 3,0 van 5 sterren
Reviews: 2
Currency Symbol: € , Value: 119,99
Product ID: B0CYNP2GNN
Posição: #2
Image link: https://images-eu.ssl-images-amazon.com/images/I/51OM0f8R61L._AC_UL300_SR300,200_.jpg
Title: soundcore P40i door Anker, draadloze oordopjes met omgevingsafhankelijke ruisonderdrukking, zware bassen, 60 u speeltijd, 2-in-1 doosje en telefoonstandaard, IPX5, draadloos opladen, Bluetooth 5.3 (Refurbished)
Product link: https://www.amazon.nl/soundcore-omgevingsafhankelijke-ruisonderdrukking-telefoonstandaard-Ref

## MOVERS AND SHAKERS

In [5]:
import requests
from lxml import html
import pandas as pd
import time
import re
from datetime import datetime

def get_category_links(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return []

    tree = html.fromstring(response.content)
    categories = tree.xpath('//div[contains(@class, "_p13n-zg-nav-tree-all_style_zg-browse-group__88fbz")]//a')
    category_links = [{'category': cat.text_content().strip(), 'link': 'https://www.amazon.nl' + cat.get('href')} for cat in categories]

    return category_links

def extract_product_details(items, category):
    products = []
    # Obter a data de hoje
    today_date = datetime.today().strftime('%Y-%m-%d')

    for index, item in enumerate(items, start=1):
        # Produto Individual (data-asin está em um elemento pai)
        product_id = item.xpath('.//@data-asin')
        product_id = product_id[0] if product_id else "No ID"
        print(f"Product ID: {product_id}")

        # Posição na Lista
        position = item.xpath('.//span[@class="zg-bdg-text"]/text()')
        position = position[0].strip() if position else str(index)
        print(f"Posição: {position}")

        # Imagem do Produto
        image = item.xpath('.//img[contains(@class, "a-dynamic-image")]/@src')
        image_link = image[0] if image else "No image link"
        print(f"Image link: {image_link}")

        # Título e Link
        title = item.xpath('.//a/span/div/text()')
        title = title[0].strip() if title else "No title"
        print(f"Title: {title}")

        link = item.xpath('.//a[contains(@class, "a-link-normal")]/@href')
        product_link = "https://www.amazon.nl" + link[0] if link else "No product link"
        print(f"Product link: {product_link}")

        # Extrair Name
        name = item.xpath('.//a[@class="a-link-normal aok-block"]/@href')
        if name:
            name = name[0].split('/')[1]
        else:
            name = "No name"
        print(f"Name: {name}")

        # Avaliações
        rating = item.xpath('.//span[contains(@class, "a-icon-alt")]/text()')
        rating = rating[0].strip() if rating else "No rating"
        print(f"Rating: {rating}")

        reviews = item.xpath('.//span[@class="a-size-small"]/text()')
        reviews = reviews[0].strip() if reviews else "No reviews"
        print(f"Reviews: {reviews}")

        # Preço
        price = item.xpath('.//span[contains(@class, "p13n-sc-price")]/text()')
        if price:
            price = price[0].strip()
            # Separar símbolo e valor usando expressões regulares
            currency_symbol = ''.join(re.findall(r'[^\d.,]', price))
            value = ''.join(re.findall(r'[\d.,]+', price))
        else:
            currency_symbol = "Not Available"
            value = "Not Available"
        print(f"Currency Symbol: {currency_symbol}, Value: {value}")

        products.append({
            "category": category,
            "rank": position,
            "asin": product_id,
            "name": name,
            "title": title,
            "rating": rating,
            "reviews": reviews,
            "symbol": currency_symbol,
            "value": value,
            "image": image_link,
            "link": product_link,        
            "date": today_date
        })
        
    return products

def get_amazon_bestsellers(url, category, retries=5, backoff_factor=0.3):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    
    for i in range(retries):
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            break
        elif response.status_code == 503:
            print(f"Failed to retrieve the page. Status code: 503. Retrying {i+1}/{retries}...")
            time.sleep(backoff_factor * (2 ** i))  # Exponential backoff
        else:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            return []
    
    if response.status_code != 200:
        print(f"Failed to retrieve the page after {retries} retries.")
        return []
    
    tree = html.fromstring(response.content)
    items = tree.xpath('//div[contains(@id, "p13n-asin-index")]')
    print(f"Found {len(items)} items in category {category}.")
    
    products = extract_product_details(items, category)
    return products

def save_to_excel(products, directory_path):
    df = pd.DataFrame(products)
    today_date = datetime.today().strftime('%Y-%m-%d')
    filename = f"{directory_path}/bot_amazon_movers_and_shakers_{today_date}.xlsx"
    df.to_excel(filename, index=False)
    print(f"Products saved to {filename}")

if __name__ == "__main__":
    base_url = "https://www.amazon.nl/gp/movers-and-shakers/"
    category_links = get_category_links(base_url)

    all_products = []
    for category_link in category_links:
        category = category_link['category']
        link = category_link['link']
        print(f"Processing category: {category}")
        products = get_amazon_bestsellers(link, category)
        all_products.extend(products)
        time.sleep(10)  # Adicionando tempo de espera maior entre as requisições para melhor performance

    # Caminho do diretório onde o arquivo Excel será salvo
    directory_path = "C:/Users/ThiagoBizacha/Desktop/Projeto_Automacao_Coleta_Dados/data/output/bot_amazon"
    save_to_excel(all_products, directory_path)
    print("Finalizado!")


Processing category: Amazon Renewed
Found 30 items in category Amazon Renewed.
Product ID: B0B63GDZ1T
Posição: #1
Image link: https://images-eu.ssl-images-amazon.com/images/I/81Ll9FCl63L._AC_UL300_SR300,200_.jpg
Title: OPPO Reno6 Pro Smartphone 5G ontgrendeld, mobiele telefoon, 5G, 12 GB RAM, 256 GB geheugen, Sony IMX766 50 MP, portretmodus, video, snel opladen, hoge batterijduur, blauw (Refurbished)
Product link: https://www.amazon.nl/OPPO-ontgrendeld-portretmodus-batterijduur-Refurbished/dp/B0B63GDZ1T/ref=zg_bsms_g_amazon-renewed_d_sccl_1/257-2238487-1572854?psc=1
Name: OPPO-ontgrendeld-portretmodus-batterijduur-Refurbished
Rating: 5,0 van 5 sterren
Reviews: 2
Currency Symbol: € , Value: 244,00
Product ID: B0CXF6P88Q
Posição: #2
Image link: https://images-eu.ssl-images-amazon.com/images/I/61OXXVWSuAL._AC_UL300_SR300,200_.jpg
Title: Dell Desktop RGB Gaming PC, Intel Quad Core I5 tot 3,6 GHz, Radeon RX 550 4 GB GDDR5, 16 GB RAM, 512 GB SSD, WiFi 600 M, Bluetooth 5.0, W10P65 (vernieuwd)

## BACKUP

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

def get_amazon_bestsellers(url, retries=5, backoff_factor=0.3):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    
    for i in range(retries):
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            break
        elif response.status_code == 503:
            print(f"Failed to retrieve the page. Status code: 503. Retrying {i+1}/{retries}...")
            time.sleep(backoff_factor * (2 ** i))  # Exponential backoff
        else:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            return []
    
    if response.status_code != 200:
        print(f"Failed to retrieve the page after {retries} retries.")
        return []
    
    soup = BeautifulSoup(response.content, "html.parser")
    products = []

    # Verificar a estrutura HTML da página para encontrar o seletor correto
    items = soup.select("li.a-carousel-card")  # Ajuste o seletor para os itens de produto
    print(f"Found {len(items)} items.")

    for item in items:
        # Ajuste os seletores conforme necessário
        title = item.select_one("div.p13n-sc-truncate-desktop-type2")
        price = item.select_one("span._cDEzb_p13n-sc-price_3mJ9Z")
        rating = item.select_one("span.a-icon-alt")
        image = item.select_one("img.a-dynamic-image")
        link = item.select_one("a.a-link-normal")

        # Verificação e logs dos elementos encontrados
        if title:
            title = title.get_text(strip=True)
        else:
            title = "No title"
        print(f"Title: {title}")

        if price:
            price = price.get_text(strip=True)
            # Separar o símbolo da moeda e o valor
            currency_symbol = re.findall(r'[^\d.,]+', price)[0]
            value = re.findall(r'[\d.,]+', price)[0]
        else:
            currency_symbol = "Not Available"
            value = "Not Available"
        print(f"Currency Symbol: {currency_symbol}, Value: {value}")

        if rating:
            rating = rating.get_text(strip=True)
        else:
            rating = "No rating"
        print(f"Rating: {rating}")

        if image:
            image_link = image.get('src')
        else:
            image_link = "No image link"
        print(f"Image link: {image_link}")

        if link:
            product_link = "https://www.amazon.nl" + link.get('href')
        else:
            product_link = "No product link"
        print(f"Product link: {product_link}")

        products.append({
            "title": title,
            "currency_symbol": currency_symbol,
            "price_value": value,
            "rating": rating,
            "image_link": image_link,
            "product_link": product_link
        })

    return products

def save_to_excel(products, filename):
    df = pd.DataFrame(products)
    df.to_excel(filename, index=False)
    print(f"Products saved to {filename}")

if __name__ == "__main__":
    url = "https://www.amazon.nl/gp/new-releases"
    products = get_amazon_bestsellers(url)
    save_to_excel(products, "amazon_new_releases.xlsx")


Found 36 items.
Title: BSITSSS Watersensorische mat voor huisdier spelen, watersensorische speelmat voor katten en honden, verdikte sensorische watermat comfort, verkoelende kat waterspeelmat, speelgoed, grappige watermat
Currency Symbol: € , Value: 10,20
Rating: 2,1 van 5 sterren
Image link: https://images-eu.ssl-images-amazon.com/images/I/71Yr73bmSpL._AC_UL225_SR225,160_.jpg
Product link: https://www.amazon.nl/BSITSSS-Watersensorische-watersensorische-sensorische-waterspeelmat/dp/B0D7DKB389/ref=zg_bsnr_c_pet-supplies_d_sccl_1/259-0152082-0893144?pd_rd_w=VaZWc&content-id=amzn1.sym.f882a860-19f1-44df-b232-144e06421629&pf_rd_p=f882a860-19f1-44df-b232-144e06421629&pf_rd_r=JS8WCYZ26C2D8XAD6R28&pd_rd_wg=qXOon&pd_rd_r=f3d5b8fe-9092-44a0-91b0-c0b73434e4c1&pd_rd_i=B0D7DKB389&psc=1
Title: Draadloze Kat Water Fontein: Batterij Betrokken Roestvrij staal Pet Fontein, Automatische Draadloze Waterdispenser Binnenshuis, Metalen Kraan Fles voor Drinken, Oplaadbare Hond Water Bowl met 1 Filter
Currenc

### V2

In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re
from lxml import html

def get_category_links(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return []

    soup = BeautifulSoup(response.content, "html.parser")
    categories = soup.select('div._p13n-zg-nav-tree-all_style_zg-browse-group__88fbz a')
    category_links = [{'category': cat.get_text(strip=True), 'link': 'https://www.amazon.nl' + cat['href']} for cat in categories]

    return category_links

def get_amazon_bestsellers(url, category, retries=5, backoff_factor=0.3):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    
    for i in range(retries):
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            break
        elif response.status_code == 503:
            print(f"Failed to retrieve the page. Status code: 503. Retrying {i+1}/{retries}...")
            time.sleep(backoff_factor * (2 ** i))  # Exponential backoff
        else:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            return []
    
    if response.status_code != 200:
        print(f"Failed to retrieve the page after {retries} retries.")
        return []
    
    tree = html.fromstring(response.content)
    products = []

    items = tree.xpath('//div[@class="zg-grid-general-faceout"]')
    print(f"Found {len(items)} items in category {category}.")

    for item in items:
        #name = item.xpath('.//a[@class="a-link-normal aok-block"]/@href')
        #title = item.xpath('.//div[@class="_cDEzb_p13n-sc-css-line-clamp-1_1Fn1y"]/text()')
        #brand = item.xpath('.//div[@class="_cDEzb_p13n-sc-css-line-clamp-1_1Fn1y"]/text()')
        #price = item.xpath('.//span[contains(@class,"_cDEzb_p13n-sc-price_3mJ9Z") or contains(@class,"p13n-sc-price")]/text()')
        #rating = item.xpath('.//span[@class="a-icon-alt"]/text()')
        #reviews = item.xpath('.//a[@class="a-size-small a-link-normal"]/text()')
        #platform = item.xpath('.//div[@class="_cDEzb_p13n-sc-css-line-clamp-1_1Fn1y"]/text()')
        #image = item.xpath('.//img[contains(@class,"a-dynamic-image") or contains(@class,"s-image")]/@src')
        #link = item.xpath('.//a[@class="a-link-normal aok-block"]/@href')

        name = item.xpath('.//a[@class="a-link-normal aok-block"]/@href')
        title = item.xpath('.//div[contains(@class, "p13n-sc-css-line-clamp-1")]/text()')
        if not title:
            title = item.xpath('.//span[contains(@class, "p13n-sc-css-line-clamp")]/text()')
        if not title:
            title = item.xpath('.//h2[contains(@class, "p13n-sc-css-line-clamp")]/text()')
        if not title:
            title = item.xpath('.//h3[contains(@class, "p13n-sc-css-line-clamp")]/text()')
        if not title:
            title = ['No title']
        brand = item.xpath('.//span[contains(@class, "p13n-sc-text")]/text()')
        price = item.xpath('.//span[contains(@class, "p13n-sc-price") or contains(@class, "p13n-sc-price-3mJ9Z")]/text()')
        rating = item.xpath('.//span[contains(@class, "a-icon-alt")]/text()')
        reviews = item.xpath('.//a[contains(@class, "a-size-small") and contains(@class, "a-link-normal")]/text()')
        platform = item.xpath('.//span[contains(@class, "p13n-sc-text")]/text()')
        image = item.xpath('.//img[contains(@class, "a-dynamic-image") or contains(@class, "s-image")]/@src')
        link = item.xpath('.//a[contains(@class, "a-link-normal") and contains(@class, "aok-block")]/@href')


        if name:
            name = name[0].split('/')[1]
        else:
            name = "No name"
        print(f"Name: {name}")

        if title:
            title = title[0].strip()
        else:
            title = "No title"
        print(f"Title: {title}")

        if brand:
            brand = brand[1].strip() if len(brand) > 1 else "No brand"
        else:
            brand = "No brand"
        print(f"Brand: {brand}")

        if price:
            price = price[0].strip()
            currency_symbol = re.findall(r'[^\d.,]+', price)[0]
            value = re.findall(r'[\d.,]+', price)[0]
        else:
            currency_symbol = "Not Available"
            value = "Not Available"
        print(f"Currency Symbol: {currency_symbol}, Value: {value}")

        if rating:
            rating = rating[0].strip()
        else:
            rating = "No rating"
        print(f"Rating: {rating}")

        if reviews:
            reviews = reviews[0].strip()
        else:
            reviews = "No reviews"
        print(f"Reviews: {reviews}")

        if platform:
            platform = platform[-1].strip()
        else:
            platform = "No platform"
        print(f"Platform: {platform}")

        if image:
            image_link = image[0]
        else:
            image_link = "No image link"
        print(f"Image link: {image_link}")

        if link:
            product_link = "https://www.amazon.nl" + link[0]
        else:
            product_link = "No product link"
        print(f"Product link: {product_link}")

        products.append({
            "name": name,
            "title": title,
            "brand": brand,
            "currency_symbol": currency_symbol,
            "price_value": value,
            "rating": rating,
            "reviews": reviews,
            "platform": platform,
            "image_link": image_link,
            "product_link": product_link,
            "category": category
        })

    return products

def save_to_excel(products, filename):
    df = pd.DataFrame(products)
    df.to_excel(filename, index=False)
    print(f"Products saved to {filename}")

if __name__ == "__main__":
    base_url = "https://www.amazon.nl/gp/bestsellers/"
    category_links = get_category_links(base_url)

    all_products = []
    for category_link in category_links:
        category = category_link['category']
        link = category_link['link']
        print(f"Processing category: {category}")
        products = get_amazon_bestsellers(link, category)
        all_products.extend(products)
        time.sleep(10)  # Adicionando tempo de espera maior entre as requisições para melhor performance

    save_to_excel(all_products, "amazon_bestsellers_by_category.xlsx")
    print("Finalizado!")


Processing category: Amazon Renewed
Found 30 items in category Amazon Renewed.
Name: Sony-Mdr-Zx110-Opvouwbare-Instapkoptelefoon-Uitstekend
Title: No title
Brand: No brand
Currency Symbol: € , Value: 14,95
Rating: 4,5 van 5 sterren
Reviews: No reviews
Platform: No platform
Image link: https://images-eu.ssl-images-amazon.com/images/I/61ZbQQiPo4L._AC_UL300_SR300,200_.jpg
Product link: https://www.amazon.nl/Sony-Mdr-Zx110-Opvouwbare-Instapkoptelefoon-Uitstekend/dp/B00NBR7962/ref=zg_bs_g_amazon-renewed_d_sccl_1/261-9612622-4262127?psc=1
Name: Apple-Magic-Keyboard-voor-9%E2%80%91inch
Title: No title
Brand: No brand
Currency Symbol: € , Value: 229,89
Rating: 4,4 van 5 sterren
Reviews: No reviews
Platform: No platform
Image link: https://images-eu.ssl-images-amazon.com/images/I/81UDF62AqHS._AC_UL300_SR300,200_.jpg
Product link: https://www.amazon.nl/Apple-Magic-Keyboard-voor-9%E2%80%91inch/dp/B0B2XSLDL7/ref=zg_bs_g_amazon-renewed_d_sccl_2/261-9612622-4262127?psc=1
Name: Seagate-Enterprise-Cap

KeyboardInterrupt: 

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re
from lxml import html

def get_category_links(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return []

    soup = BeautifulSoup(response.content, "html.parser")
    categories = soup.select('div._p13n-zg-nav-tree-all_style_zg-browse-group__88fbz a')
    category_links = [{'category': cat.get_text(strip=True), 'link': 'https://www.amazon.nl' + cat['href']} for cat in categories]

    return category_links

def get_amazon_bestsellers(url, category, retries=5, backoff_factor=0.3):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    
    for i in range(retries):
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            break
        elif response.status_code == 503:
            print(f"Failed to retrieve the page. Status code: 503. Retrying {i+1}/{retries}...")
            time.sleep(backoff_factor * (2 ** i))  # Exponential backoff
        else:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            return []
    
    if response.status_code != 200:
        print(f"Failed to retrieve the page after {retries} retries.")
        return []
    
    tree = html.fromstring(response.content)
    products = []

    items = tree.xpath('//div[@class="zg-grid-general-faceout"]')
    print(f"Found {len(items)} items in category {category}.")

    for index, item in enumerate(items, start=1):
        
        title = item.xpath('.//div[contains(@class, "p13n-sc-css-line-clamp-1")]/text()')
        if not title:
            title = item.xpath('.//span[contains(@class, "p13n-sc-css-line-clamp")]/text()')
        if not title:
            title = item.xpath('.//h2[contains(@class, "p13n-sc-css-line-clamp")]/text()')
        if not title:
            title = item.xpath('.//h3[contains(@class, "p13n-sc-css-line-clamp")]/text()')
        if not title:
            title = ['No title']
        brand = item.xpath('.//span[contains(@class, "p13n-sc-text")]/text()')
        price = item.xpath('.//span[contains(@class, "p13n-sc-price") or contains(@class, "p13n-sc-price-3mJ9Z")]/text()')
        rating = item.xpath('.//span[contains(@class, "a-icon-alt")]/text()')
        reviews = item.xpath('.//a[contains(@class, "a-size-small") and contains(@class, "a-link-normal")]/text()')
        image = item.xpath('.//img[contains(@class, "a-dynamic-image") or contains(@class, "s-image")]/@src')
        link = item.xpath('.//a[contains(@class, "a-link-normal") and contains(@class, "aok-block")]/@href')
        name = item.xpath('.//a[@class="a-link-normal aok-block"]/@href')
        if name:
            name = name[0].split('/')[1]
        else:
            name = "No name"
        print(f"Name: {name}")

        if title:
            title = title[0].strip()
        else:
            title = "No title"
        print(f"Title: {title}")

        if brand:
            brand = brand[1].strip() if len(brand) > 1 else "No brand"
        else:
            brand = "No brand"
        print(f"Brand: {brand}")

        if price:
            price = price[0].strip()
            currency_symbol = re.findall(r'[^\d.,]+', price)[0]
            value = re.findall(r'[\d.,]+', price)[0]
        else:
            currency_symbol = "Not Available"
            value = "Not Available"
        print(f"Currency Symbol: {currency_symbol}, Value: {value}")

        if rating:
            rating = rating[0].strip()
        else:
            rating = "No rating"
        print(f"Rating: {rating}")

        if reviews:
            reviews = reviews[0].strip()
        else:
            reviews = "No reviews"
        print(f"Reviews: {reviews}")

        if image:
            image_link = image[0]
        else:
            image_link = "No image link"
        print(f"Image link: {image_link}")

        if link:
            product_link = "https://www.amazon.nl" + link[0]
        else:
            product_link = "No product link"
        print(f"Product link: {product_link}")

        products.append({
            "Produto Individual": name,
            "Posição na Lista": index,
            "Imagem do Produto": image_link,
            "Título": title,
            "Link": product_link,
            "Numero de estrelas": rating,
            "Numero de avaliações": reviews,
            "Preço": f"{currency_symbol} {value}",
            "category": category
        })

    return products

def save_to_excel(products, filename):
    df = pd.DataFrame(products)
    df.to_excel(filename, index=False)
    print(f"Products saved to {filename}")

if __name__ == "__main__":
    base_url = "https://www.amazon.nl/gp/bestsellers/"
    category_links = get_category_links(base_url)

    all_products = []
    for category_link in category_links:
        category = category_link['category']
        link = category_link['link']
        print(f"Processing category: {category}")
        products = get_amazon_bestsellers(link, category)
        all_products.extend(products)
        time.sleep(10)  # Adicionando tempo de espera maior entre as requisições para melhor performance

    save_to_excel(all_products, "amazon_bestsellers_by_category.xlsx")
    print("Finalizado!")


Processing category: Amazon Renewed
Found 30 items in category Amazon Renewed.
Name: Apple-10-2-inch-Wi-Fi-Spacezwart-Refurbished
Title: No title
Brand: No brand
Currency Symbol: € , Value: 244,89
Rating: 4,4 van 5 sterren
Reviews: No reviews
Image link: https://images-eu.ssl-images-amazon.com/images/I/71F8udBqz3L._AC_UL300_SR300,200_.jpg
Product link: https://www.amazon.nl/Apple-10-2-inch-Wi-Fi-Spacezwart-Refurbished/dp/B08N89P2QZ/ref=zg_bs_g_amazon-renewed_d_sccl_1/258-7813849-1708323?psc=1
Name: Apple-iPhone-128GB-Sierra-Blue
Title: No title
Brand: No brand
Currency Symbol: € , Value: 595,00
Rating: 3,9 van 5 sterren
Reviews: No reviews
Image link: https://images-eu.ssl-images-amazon.com/images/I/61RAsVPOjxL._AC_UL300_SR300,200_.jpg
Product link: https://www.amazon.nl/Apple-iPhone-128GB-Sierra-Blue/dp/B09ML78C2J/ref=zg_bs_g_amazon-renewed_d_sccl_2/258-7813849-1708323?psc=1
Name: Apple-iPad-Air-64GB-Wi-Fi
Title: No title
Brand: No brand
Currency Symbol: € , Value: 211,89
Rating: 4,3 

KeyboardInterrupt: 

In [19]:
import requests
from lxml import html
import pandas as pd
import time
import re

def get_category_links(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return []

    tree = html.fromstring(response.content)
    categories = tree.xpath('//div[contains(@class, "_p13n-zg-nav-tree-all_style_zg-browse-group__88fbz")]//a')
    category_links = [{'category': cat.text_content().strip(), 'link': 'https://www.amazon.nl' + cat.get('href')} for cat in categories]

    return category_links

def extract_product_details(items):
    products = []

    for index, item in enumerate(items, start=1):
        # Produto Individual (data-asin está em um elemento pai)
        product_id = item.xpath('.//@data-asin')
        product_id = product_id[0] if product_id else "No ID"
        print(f"Product ID: {product_id}")

        # Posição na Lista
        position = item.xpath('.//span[@class="zg-bdg-text"]/text()')
        position = position[0].strip() if position else str(index)
        print(f"Posição: {position}")

        # Imagem do Produto
        image = item.xpath('.//img[contains(@class, "a-dynamic-image")]/@src')
        image_link = image[0] if image else "No image link"
        print(f"Image link: {image_link}")

        # Título e Link
        title = item.xpath('.//a/span/div/text()')
        title = title[0].strip() if title else "No title"
        print(f"Title: {title}")

        link = item.xpath('.//a[contains(@class, "a-link-normal")]/@href')
        product_link = "https://www.amazon.nl" + link[0] if link else "No product link"
        print(f"Product link: {product_link}")

        # Avaliações
        rating = item.xpath('.//span[contains(@class, "a-icon-alt")]/text()')
        rating = rating[0].strip() if rating else "No rating"
        print(f"Rating: {rating}")

        reviews = item.xpath('.//span[@class="a-size-small"]/text()')
        reviews = reviews[0].strip() if reviews else "No reviews"
        print(f"Reviews: {reviews}")

        # Preço
        price = item.xpath('.//span[contains(@class, "p13n-sc-price")]/text()')
        if price:
            price = price[0].strip()
            # Separar símbolo e valor usando expressões regulares
            currency_symbol = ''.join(re.findall(r'[^\d.,]', price))
            value = ''.join(re.findall(r'[\d.,]+', price))
        else:
            currency_symbol = "Not Available"
            value = "Not Available"
        print(f"Currency Symbol: {currency_symbol}, Value: {value}")

        products.append({
            "Produto Individual": product_id,
            "Posição na Lista": position,
            "Imagem do Produto": image_link,
            "Título": title,
            "Link": product_link,
            "Numero de estrelas": rating,
            "Numero de avaliações": reviews,
            "Símbolo da Moeda": currency_symbol,
            "Valor": value
        })

    return products

def get_amazon_bestsellers(url, category, retries=5, backoff_factor=0.3):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    
    for i in range(retries):
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            break
        elif response.status_code == 503:
            print(f"Failed to retrieve the page. Status code: 503. Retrying {i+1}/{retries}...")
            time.sleep(backoff_factor * (2 ** i))  # Exponential backoff
        else:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            return []
    
    if response.status_code != 200:
        print(f"Failed to retrieve the page after {retries} retries.")
        return []
    
    tree = html.fromstring(response.content)
    items = tree.xpath('//div[contains(@id, "p13n-asin-index")]')
    print(f"Found {len(items)} items in category {category}.")
    
    products = extract_product_details(items)
    return products

def save_to_excel(products, filename):
    df = pd.DataFrame(products)
    df.to_excel(filename, index=False)
    print(f"Products saved to {filename}")

if __name__ == "__main__":
    base_url = "https://www.amazon.nl/gp/bestsellers/"
    category_links = get_category_links(base_url)

    all_products = []
    for category_link in category_links:
        category = category_link['category']
        link = category_link['link']
        print(f"Processing category: {category}")
        products = get_amazon_bestsellers(link, category)
        all_products.extend(products)
        time.sleep(10)  # Adicionando tempo de espera maior entre as requisições para melhor performance

    save_to_excel(all_products, "amazon_bestsellers_by_category.xlsx")
    print("Finalizado!")


Processing category: Amazon Renewed
Found 30 items in category Amazon Renewed.
Product ID: B08N89P2QZ
Posição: #1
Image link: https://images-eu.ssl-images-amazon.com/images/I/71F8udBqz3L._AC_UL300_SR300,200_.jpg
Title: 2020 Apple iPad (10.2-inch, Wi-Fi, 32GB) Spacezwart (Refurbished)
Product link: https://www.amazon.nl/Apple-10-2-inch-Wi-Fi-Spacezwart-Refurbished/dp/B08N89P2QZ/ref=zg_bs_g_amazon-renewed_d_sccl_1/259-1305506-5100743?psc=1
Rating: 4,4 van 5 sterren
Reviews: 403
Currency Symbol: € , Value: 244,89
Product ID: B09ML78C2J
Posição: #2
Image link: https://images-eu.ssl-images-amazon.com/images/I/61RAsVPOjxL._AC_UL300_SR300,200_.jpg
Title: Apple iPhone 13 Pro, 128GB, Sierra Blue - (Refurbished)
Product link: https://www.amazon.nl/Apple-iPhone-128GB-Sierra-Blue/dp/B09ML78C2J/ref=zg_bs_g_amazon-renewed_d_sccl_2/259-1305506-5100743?psc=1
Rating: 3,9 van 5 sterren
Reviews: 403
Currency Symbol: € , Value: 595,00
Product ID: B07J4CMSVS
Posição: #3
Image link: https://images-eu.ssl-im

In [21]:
import requests
from lxml import html
import pandas as pd
import time
import re
from datetime import datetime

def get_category_links(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return []

    tree = html.fromstring(response.content)
    categories = tree.xpath('//div[contains(@class, "_p13n-zg-nav-tree-all_style_zg-browse-group__88fbz")]//a')
    category_links = [{'category': cat.text_content().strip(), 'link': 'https://www.amazon.nl' + cat.get('href')} for cat in categories]

    return category_links

def extract_product_details(items, category):
    products = []
    # Obter a data de hoje
    today_date = datetime.today().strftime('%Y-%m-%d')

    for index, item in enumerate(items, start=1):
        # Produto Individual (data-asin está em um elemento pai)
        product_id = item.xpath('.//@data-asin')
        product_id = product_id[0] if product_id else "No ID"
        print(f"Product ID: {product_id}")

        # Posição na Lista
        position = item.xpath('.//span[@class="zg-bdg-text"]/text()')
        position = position[0].strip() if position else str(index)
        print(f"Posição: {position}")

        # Imagem do Produto
        image = item.xpath('.//img[contains(@class, "a-dynamic-image")]/@src')
        image_link = image[0] if image else "No image link"
        print(f"Image link: {image_link}")

        # Título e Link
        title = item.xpath('.//a/span/div/text()')
        title = title[0].strip() if title else "No title"
        print(f"Title: {title}")

        link = item.xpath('.//a[contains(@class, "a-link-normal")]/@href')
        product_link = "https://www.amazon.nl" + link[0] if link else "No product link"
        print(f"Product link: {product_link}")

        # Extrair Name
        name = item.xpath('.//a[@class="a-link-normal aok-block"]/@href')
        if name:
            name = name[0].split('/')[1]
        else:
            name = "No name"
        print(f"Name: {name}")

        # Avaliações
        rating = item.xpath('.//span[contains(@class, "a-icon-alt")]/text()')
        rating = rating[0].strip() if rating else "No rating"
        print(f"Rating: {rating}")

        reviews = item.xpath('.//span[@class="a-size-small"]/text()')
        reviews = reviews[0].strip() if reviews else "No reviews"
        print(f"Reviews: {reviews}")

        # Preço
        price = item.xpath('.//span[contains(@class, "p13n-sc-price")]/text()')
        if price:
            price = price[0].strip()
            # Separar símbolo e valor usando expressões regulares
            currency_symbol = ''.join(re.findall(r'[^\d.,]', price))
            value = ''.join(re.findall(r'[\d.,]+', price))
        else:
            currency_symbol = "Not Available"
            value = "Not Available"
        print(f"Currency Symbol: {currency_symbol}, Value: {value}")

        products.append({
            "asim": product_id,
            "rank": position,
            "product_photo": image_link,
            "product_title": title,
            "link": product_link,
            "Name": name,
            "rating": rating,
            "reviews": reviews,
            "symbol": currency_symbol,
            "value": value,
            "category": category,
            "date": today_date
        })

    return products

def get_amazon_bestsellers(url, category, retries=5, backoff_factor=0.3):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    
    for i in range(retries):
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            break
        elif response.status_code == 503:
            print(f"Failed to retrieve the page. Status code: 503. Retrying {i+1}/{retries}...")
            time.sleep(backoff_factor * (2 ** i))  # Exponential backoff
        else:
            print(f"Failed to retrieve the page. Status code: {response.status_code}")
            return []
    
    if response.status_code != 200:
        print(f"Failed to retrieve the page after {retries} retries.")
        return []
    
    tree = html.fromstring(response.content)
    items = tree.xpath('//div[contains(@id, "p13n-asin-index")]')
    print(f"Found {len(items)} items in category {category}.")
    
    products = extract_product_details(items, category)
    return products

def save_to_excel(products, filename):
    df = pd.DataFrame(products)
    df.to_excel(filename, index=False)
    print(f"Products saved to {filename}")

if __name__ == "__main__":
    base_url = "https://www.amazon.nl/gp/bestsellers/"
    category_links = get_category_links(base_url)

    all_products = []
    for category_link in category_links:
        category = category_link['category']
        link = category_link['link']
        print(f"Processing category: {category}")
        products = get_amazon_bestsellers(link, category)
        all_products.extend(products)
        time.sleep(10)  # Adicionando tempo de espera maior entre as requisições para melhor performance

    save_to_excel(all_products, "amazon_bestsellers_by_category.xlsx")
    print("Finalizado!")


Processing category: Amazon Renewed
Found 30 items in category Amazon Renewed.
Product ID: B08N89P2QZ
Posição: #1
Image link: https://images-eu.ssl-images-amazon.com/images/I/71F8udBqz3L._AC_UL300_SR300,200_.jpg
Title: 2020 Apple iPad (10.2-inch, Wi-Fi, 32GB) Spacezwart (Refurbished)
Product link: https://www.amazon.nl/Apple-10-2-inch-Wi-Fi-Spacezwart-Refurbished/dp/B08N89P2QZ/ref=zg_bs_g_amazon-renewed_d_sccl_1/259-2930722-3790644?psc=1
Name: Apple-10-2-inch-Wi-Fi-Spacezwart-Refurbished
Rating: 4,4 van 5 sterren
Reviews: 403
Currency Symbol: € , Value: 244,89
Product ID: B09ML78C2J
Posição: #2
Image link: https://images-eu.ssl-images-amazon.com/images/I/61RAsVPOjxL._AC_UL300_SR300,200_.jpg
Title: Apple iPhone 13 Pro, 128GB, Sierra Blue - (Refurbished)
Product link: https://www.amazon.nl/Apple-iPhone-128GB-Sierra-Blue/dp/B09ML78C2J/ref=zg_bs_g_amazon-renewed_d_sccl_2/259-2930722-3790644?psc=1
Name: Apple-iPhone-128GB-Sierra-Blue
Rating: 3,9 van 5 sterren
Reviews: 403
Currency Symbol: €