In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# Configurar Selenium con Chrome en modo headless
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # No abrir ventana del navegador
options.add_argument("--disable-blink-features=AutomationControlled")  # Evitar detección de bot
options.add_argument("start-maximized")  
options.add_argument("disable-infobars")

# Inicializar el driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# URL de búsqueda en Amazon España
url = "https://www.amazon.es/s?k=mobil&__mk_es_ES=%C3%85M%C3%85%C5%BD%C3%95%C3%91&crid=18LZTS0F1Q8I7&sprefix=mobil%2Caps%2C125&ref=nb_sb_noss_1"

# Cargar la página
driver.get(url)

# Esperar a que se cargue el contenido dinámico
driver.implicitly_wait(5)

# Extraer todos los elementos con data-asin
productos = driver.find_elements(By.CSS_SELECTOR, 'div[data-asin]')

# Obtener los valores de data-asin (filtrando los vacíos)
data_asins = [p.get_attribute("data-asin") for p in productos if p.get_attribute("data-asin")]

# Imprimir los resultados
print(data_asins)

# guardar los resultados en un archivo
with open("asins.txt", "w") as f:
    f.write("\n".join(data_asins))
    
# Cerrar el navegador
driver.quit()

In [None]:
import http.client
import json

API_KEYS = [
    "b8c33a42admsh1eb667b2e78c8dcp1d2e65jsnce7335e1d5b4",
    "fdcd4c06b5msha7a5970ae873ae9p1d5117jsnaa7f64e9591a",
    "API_KEY_3"
]
current_api_index = 0 
# ASIN_LIST = ["B0DL4831YZ", "B0DHZJW5Z4", "B0BRQMC66M", "B0CTHYFRHH", "B0CPLLJ367", "B0D2RQ8R19", "B0CV7R71NY"]
ASIN_LIST = data_asins
products_data = []

for asin in ASIN_LIST:
    success = False
    for _ in range(len(API_KEYS)):
        conn = http.client.HTTPSConnection("real-time-amazon-data.p.rapidapi.com")
        headers = {
            'x-rapidapi-key': API_KEYS[current_api_index],
            'x-rapidapi-host': "real-time-amazon-data.p.rapidapi.com"
        }
        # conn.request("GET", f"/product-details?asin={asin}&country=ES", headers=headers) Per si volem tots els params altra vegada
        conn.request("GET", f"/product-details?asin={asin}&country=ES&fields=product_url,asin,product_price,product_original_price,product_title,product_star_rating,product_num_ratings,product_information,product_details", headers=headers)
        res = conn.getresponse()
        if res.status == 200:
            data = res.read()
            product_info = json.loads(data.decode("utf-8"))
            products_data.append(product_info)
            success = True
            conn.close()
            break
        else:
            print(f"Error con API_KEY {current_api_index + 1}: {API_KEYS[current_api_index]}")
            conn.close()
            current_api_index = (current_api_index + 1) % len(API_KEYS)
            print(f"Usando API_KEY {current_api_index + 1}: {API_KEYS[current_api_index]}")
    if not success:
        print(f"No se pudo obtener información del ASIN {asin} con ninguna API Key.")

products_data

In [None]:
import csv
import re

def find_value_by_keywords(data_dict, keywords, pattern=None, default="null"):
    for key, value in data_dict.items():
        if any(keyword.lower() in key.lower() for keyword in keywords):
            if pattern is None or re.search(pattern, str(value)):
                return value
    return default 

with open('moviles.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(["URL", "Asin", "Precio", "Precio Inicial", "Título", "Estrellas", "Opiniones", "Marca", "Modelo", "Año del modelo", "Dimensiones",
                     "RAM", "Memoria", "Sistema operativo", "Resolución pantalla", "Tamaño pantalla", "Relación aspecto", "Peso", "Tecnología conectividad", "Bateria"])
    for product in products_data:
        url = product["data"].get("product_url", "null")
        asin = product["data"].get("asin", "null")
        product_price = product["data"].get("product_price", "null")
        product_original_price = product["data"].get("product_original_price", "null")
        if not product_original_price:
            product_original_price = product_price
        product_original_price = product_original_price.replace("€","")
        product_title = product["data"].get("product_title", "null")
        product_star_rating = product["data"].get("product_star_rating", "null")
        product_num_ratings = product["data"].get("product_num_ratings", "null")
    
        product_details = product["data"].get("product_information", {}) | product["data"].get("product_details", {})
        marca = find_value_by_keywords(product_details, ["fabricante", "marca"])
        modelo = find_value_by_keywords(product_details, ["modelo"])
        ano = find_value_by_keywords(product_details, ["año"], r"\d+")
        dimensiones = find_value_by_keywords(product_details, ["dimensiones", "tamaño", "medidas"], r"\d+")
        ram = find_value_by_keywords(product_details, ["RAM", "memoria RAM"], r"^(1|2|4|8|16|32|64)\s?GB$")
        memoria = find_value_by_keywords(product_details, ["ROM", "memoria", "capacidad"], r"^(1|32|64|128|256|512|1024)\s?(GB|TB)$")
        sistema_operativo = product_details.get("Sistema operativo", "null")
        resolucion_pantalla = find_value_by_keywords(product_details, ["resolucion", "resolución"], r"\d+")
        tamano_pantalla = find_value_by_keywords(product_details, ["pantalla", "pulgadas"], r"\d+")
        relacion_aspecto = find_value_by_keywords(product_details, ["relacion", "aspecto"], r"\d+")
        peso = find_value_by_keywords(product_details, ["peso", "masa"], r"\d+\s?g")
        tecnologia_conectividad = find_value_by_keywords(product_details, ["tecnología", "conectividad"], r"^(4G|5G)$")
        bateria = find_value_by_keywords(product_details, ["batería", "capacidad batería", "mAh"])
        
        writer.writerow([
            url, asin, product_price, product_original_price, product_title, product_star_rating, product_num_ratings,
            marca, modelo, ano, dimensiones, ram, memoria, sistema_operativo, resolucion_pantalla, tamano_pantalla, relacion_aspecto, peso, tecnologia_conectividad, bateria
        ])
