In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from collections import namedtuple
import time
import re
import uuid

In [2]:
url = "https://listado.mercadolibre.com.uy/auriculares#D[A:auriculares]"
Product = namedtuple('Product', ['id', 'name', 'description', 'price', 'stock', 'category', 'category_id'])

In [3]:
r = requests.get(url)
html_contents = r.text

In [7]:
html_soup = BeautifulSoup(html_contents, 'html.parser')
products_links = html_soup.find_all('a', class_='ui-search-item__group__element ui-search-link__title-card ui-search-link')
product_list = []
product_uuid_map = {}
category_uuid_map = {}

In [8]:
for links in products_links:
    product_url = links.get('href')
    if product_url:
          response = requests.get(product_url)
          product_soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
          try:
              name = product_soup.find('h1', class_='ui-pdp-title').text.strip()
          except AttributeError:
              name = "N/A"

          try:
              description = product_soup.find('p', class_='ui-pdp-description__content').text.strip()
          except AttributeError:
              description = "N/A"

          try:
              main_container = product_soup.find('div', class_='ui-pdp-price__main-container')
              price = main_container.find('span', class_='andes-money-amount__fraction').text.strip()
          except AttributeError:
              price = "N/A"

          try:
              stock_text = product_soup.find('span', class_='ui-pdp-buybox__quantity__available').text.strip()
              stock_match = re.search(r'\d+', stock_text)
              stock = stock_match.group(0) if stock_match else "N/A"
          except AttributeError:
              stock = "N/A"

          try:
              categories = product_soup.find_all('a', class_='andes-breadcrumb__link')
              category = categories[-1].text.strip()
          except AttributeError:
              category = "N/A"

          if name not in product_uuid_map:
              product_uuid_map[name] = str(uuid.uuid4())
          product_id = product_uuid_map[name]

          if category not in category_uuid_map:
              category_uuid_map[category] = str(uuid.uuid4())
          category_id = category_uuid_map[category]

          product = Product(product_id, name, description, price, stock, category, category_id)
          product_list.append(product)

          time.sleep(1)

In [9]:
df = pd.DataFrame(product_list)

df.to_csv('productos.csv', index=False, encoding='utf-8')

print(df)

                                      id  \
0   4983acf5-3e20-42e8-8574-957b4fb6f1e7   
1   e0403173-9c33-4073-9b9d-b8f95832ec2e   
2   598e19f2-6d1a-4245-858e-ec7683b2e2a7   
3   2a6d6915-4ddd-44aa-8a9a-33aa085d2be1   
4   f923e080-6713-4f49-b47e-57e0fb1794a8   
5   0ce152e9-d3e3-4080-bd02-7bbda935088f   
6   2b649e67-8a64-41fd-a4e5-644c9e0e8a37   
7   1aa24ea4-f7eb-4a97-8bc7-585cae9e0cc6   
8   aa5a4a00-e520-48f2-9985-d60bae5de157   
9   9b9e9243-d674-4a65-9cab-d83faf5c8ae3   
10  b58ce56e-b0d4-4f6e-b2b6-e118cbac687f   
11  cbda4f5d-2367-4ec7-9f28-d9a8ede5eda3   
12  8872e172-d852-4c25-b6c7-f626fe873d41   
13  b9a07d85-e0c7-400e-9328-d8250fa72f6d   
14  d35c856b-742b-46d9-a19a-5f2354a303b0   
15  f4022168-4d44-421f-bfaf-cdd880af9f21   
16  f313787f-4f55-4bd5-967b-400d779eaa91   
17  f5c71351-2be7-4a2c-acda-827d0b818287   
18  f3929f59-b666-462e-9777-ac1cda53b07b   
19  291101e9-4594-4da3-a27f-90f0c444e7c3   
20  204cc307-a4bc-4062-8e17-ac2234732deb   
21  694e358c-165a-4962-ae11-712d