# Preparação do ambiente

## Bibliotecas

In [1]:
import json
import requests
import uuid
from pathlib import Path

In [2]:
import pandas as pd
from tqdm.auto import tqdm

## Constantes e funções auxiliares

In [3]:
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'

In [13]:
url = 'https://api.mercadolibre.com/sites/MLB/search?'
headers = {'User-Agent': USER_AGENT}
params = {'q': 'iphone 15 pro max'}
requests.get(url, headers=headers, params=params)

<Response [200]>

In [4]:
def save_search_response_content(content, output_folder):
    
    # check if output folder is a Path object    
    if isinstance(output_folder, str):
        output_folder = Path(output_folder)
    # create search results subfolder
    output_folder = output_folder / 'search_results'
    # create output folder if it doesn't exists    
    if not output_folder.exists():
        output_folder.mkdir(parents=True)

    file_to_save = output_folder / f'{str(uuid.uuid4())}.json'
    
    with open(file_to_save,'w') as actual_content_file:
        json.dump(content,actual_content_file,indent=2)

In [11]:
def search_item(query, cellphones=False, access_token=None, output_folder=None):

    headers = {'User-Agent': USER_AGENT}
    params = {'q': query}

    if cellphones:
        # endpoint for search all items in cellphones category
        url = 'https://api.mercadolibre.com/sites/MLB/search?category=MLB1055'
    else:
        # endpoint for search all categories
        url = 'https://api.mercadolibre.com/sites/MLB/search?'

    # Instructions for get new access token are available in
    # https://developers.mercadolivre.com.br/pt_br/autenticacao-e-autorizacao
    if access_token is None:
        offset_limit = 1000
    else:
        offset_limit = 4000
        headers['Authorization'] = f'Bearer {access_token}'

    # first page of results
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        content = response.content.decode(response.encoding)
        content =  json.loads(content)
        
        if output_folder is not None:
            save_search_response_content(content, output_folder)
            
        total_results = content['paging']['total']
        results = content['results']

        if total_results < offset_limit:
            max_offset = total_results
        else:
            max_offset = offset_limit
    else:
        return None

    # iterate over next results pages
    for actual_offset in range(50,max_offset,50):
        params['offset'] = actual_offset
        response = requests.get(url, headers=headers, params=params)

        if response.status_code == 200:
            content = response.content.decode(response.encoding)
            content =  json.loads(content)
            if output_folder is not None:
                save_search_response_content(content, output_folder)
            
            if len(content['results']) > 0: 
                results.extend(content['results'])

    if output_folder is not None:
        # check if output folder is a Path object
        if isinstance(output_folder, str):
            output_folder = Path(output_folder)
        # create search results subfolder
        output_folder = output_folder / 'items_results'
        if not output_folder.exists():
            output_folder.mkdir(parents=True)
        output_file = output_folder / f'total_results_{str(uuid.uuid4())}.json'
        with open(output_file,'w') as f:
            json.dump(results,f,indent=2)
    
    return results

In [6]:
def search_item_details(item_id, output_folder=None):
    
    url = f'https://api.mercadolibre.com/items/{item_id}'
    headers = {'User-Agent': USER_AGENT}
    response = requests.get(url)
    if response.status_code == 200:
        content = response.content.decode(response.encoding)
        content =  json.loads(content)

        if output_folder is not None:
            # check if output folder is a Path object
            if isinstance(output_folder, str):
                output_folder = Path(output_folder)
            # create output folder if it doesn't exists
            if not output_folder.exists():
                output_folder.mkdir(parents=True)
            output_file = output_folder / f'{item_id}.json'
            with open(output_file,'w') as f:
                json.dump(results,f,indent=2)
        
        return content
    else:
        return None

In [7]:
def parse_item_details(item_details):
    
    keys_to_keep = [
        'id', 
        'title', 
        'seller_id', 
        'category_id', 
        'official_store_id', 
        'price', 
        'currency_id', 
        'initial_quantity',
        'condition', 
        'permalink', 
        'warranty', 
        'catalog_product_id', 
        'date_created', 
        'last_updated', 
        'status'
    ]
    
    attributes_to_keep = [
        'ITEM_CONDITION', 
        'BRAND', 
        'MODEL', 
        'DETAILED_MODEL', 
        'ANATEL_HOMOLOGATION_NUMBER', 
        'CELLPHONES_ANATEL_HOMOLOGATION_NUMBER', 
        'GTIN', 
        'EMPTY_GTIN_REASON',
    ]

    columns_to_keep = [col.lower() for col in keys_to_keep+attributes_to_keep]

    if item_details['status']=='under review':
        return None

    if not 'attributes' in item_details.keys():
        return None
    
    parsed_item = {key:None for key in columns_to_keep}    
    for key in keys_to_keep:
        try:
            parsed_item[key] = item_details[key]
        except:
            continue
    
    for item_attribute in item_details['attributes']:
        if item_attribute['id'] in attributes_to_keep:
            attribute_key = item_attribute['id'].lower()
            parsed_item[attribute_key]  = item_attribute['value_name']

    parsed_item['warranty_type'] = None
    if 'sale_terms' in item_details.keys():
        for sale_term in item_details['sale_terms']:
            if sale_term['id'] == 'WARRANTY_TYPE':
                parsed_item['warranty_type'] = sale_term['value_name']

    if parsed_item['anatel_homologation_number'] is not None:
        parsed_item['anatel_homologation_number'] = parsed_item['anatel_homologation_number'].zfill(12)

    if parsed_item['cellphones_anatel_homologation_number'] is not None:
        parsed_item['cellphones_anatel_homologation_number'] = parsed_item['cellphones_anatel_homologation_number'].zfill(12)

    return parsed_item

# Desenvolvimento

In [9]:
items = df_results.id.unique()
df_items = pd.DataFrame([parse_item_details(search_item_details(item_id)) for item_id in tqdm(items)])
df_items

  0%|          | 0/69 [00:00<?, ?it/s]

Unnamed: 0,id,title,seller_id,category_id,official_store_id,price,currency_id,initial_quantity,condition,permalink,...,status,item_condition,brand,model,detailed_model,anatel_homologation_number,cellphones_anatel_homologation_number,gtin,empty_gtin_reason,warranty_type
0,MLB4376702816,Apple iPhone 15 Pro Max (256 Gb) - Titânio Pre...,480263032,MLB1055,2162.0,12599.10,BRL,843,new,https://produto.mercadolivre.com.br/MLB-437670...,...,active,Novo,Apple,iPhone 15 Pro Max,iPhone 15 Pro Max 256GB Titânio preto,127822301993,127822301993,195949048104,,Garantia do vendedor
1,MLB3461275499,Apple iPhone 15 Pro Max (512 Gb) - Titânio Pre...,480263032,MLB1055,2162.0,10998.90,BRL,603,new,https://produto.mercadolivre.com.br/MLB-346127...,...,active,Novo,Apple,iPhone 15 Pro Max,iPhone 15 Pro Max 512GB Titânio preto,127822301993,127822301993,195949048821,,Garantia do vendedor
2,MLB3570897553,Apple iPhone 15 Pro Max (256 Gb) - Titânio Nat...,480263032,MLB1055,2162.0,9430.00,BRL,1438,new,https://produto.mercadolivre.com.br/MLB-357089...,...,active,Novo,Apple,iPhone 15 Pro Max,iPhone 15 Pro Max 256GB Titânio natural,127822301993,127822301993,195949048463,,Garantia do vendedor
3,MLB4376709268,Apple iPhone 15 Pro Max (512 Gb) - Titânio Nat...,480263032,MLB1055,2162.0,10829.18,BRL,286,new,https://produto.mercadolivre.com.br/MLB-437670...,...,active,Novo,Apple,iPhone 15 Pro Max,iPhone 15 Pro Max 512GB Titânio natural,127822301993,127822301993,195949049187,,Garantia do vendedor
4,MLB3570844411,Apple iPhone 15 Pro Max (1 Tb) - Titânio Preto...,480263032,MLB1055,2162.0,12064.70,BRL,267,new,https://produto.mercadolivre.com.br/MLB-357084...,...,active,Novo,Apple,iPhone 15 Pro Max,iPhone 15 Pro Max 1TB Titânio preto,127822301993,127822301993,195949049545,,Garantia do vendedor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,MLB3807476745,Apple iPhone 15 Pro Max (256 Gb) - Titânio Nat...,787346261,MLB1055,,7000.00,BRL,1,used,https://produto.mercadolivre.com.br/MLB-380747...,...,active,Usado,Apple,iPhone 15 Pro Max,,,127822301993,,,Garantia de fábrica
65,MLB3809667035,iPhone 15 Pro Max (256 Gb) - Titânio Branco,204516370,MLB1055,,6650.00,BRL,1,used,https://produto.mercadolivre.com.br/MLB-380966...,...,active,Usado,Apple,iPhone 15 Pro Max,,,127822301993,,,Garantia de fábrica
66,MLB4983883474,Apple iPhone 15 Pro Max (256 Gb) - Titânio Azul,32541270,MLB1055,,6300.00,BRL,1,used,https://produto.mercadolivre.com.br/MLB-498388...,...,active,Usado,Apple,iPhone 15 Pro Max,,,127822301993,,,Garantia de fábrica
67,MLB3753972993,iPhone 15 Pro Max,45206638,MLB1055,,7799.00,BRL,1,used,https://produto.mercadolivre.com.br/MLB-375397...,...,active,Usado,Apple,iPhone 15 Pro Max,,148484946495,127822301993,,,Garantia de fábrica


warranty_type
Garantia de fábrica     16
Garantia do vendedor    13
Sem garantia             6
Name: count, dtype: int64