In [1]:
!pip install iso8601



In [2]:
import requests
import json
import pandas as pd
import iso8601
import numpy as np
from datetime import date

In [3]:
def create_dateframe(category):
    
    '''
    Itera sobre los
    '''
    URL = "https://api.mercadolibre.com/sites/MLA/search?limit=50&category=" + category
    api_request = requests.get(URL)
    json_dic = json.loads(api_request.content)
    primary_results = json_dic.get('paging').get('primary_results')
    if primary_results <= 1000:
        n_iterations = primary_results//50
        offset_list = [50]*n_iterations
    if primary_results%50 != 0:
        offset_list.append(primary_results%50)
    else:
        offset_list = [50]*20

    df = pd.DataFrame()
    for i in range(len(offset_list)):
        URL_i = URL + "&offset=" + str(i*50) + "&limit=" + str(offset_list[i])
        api_request_i = requests.get(URL_i) 
        json_dic_i = json.loads(api_request_i.content)
        json_dic_i_results = json_dic_i.get('results')
        for j in range(len(json_dic_i_results)):
            tmp = get_item_data(json_dic_i_results[j])
            df  = pd.concat([df, tmp])
            
    return df
  

In [4]:
def iterate_over_categories():
    '''
    Iterador Sobre categorías
    '''
    categories_url = "https://api.mercadolibre.com/sites/MLA/categories"
    categories_request = requests.get(categories_url)
    categories_json = json.loads(categories_request.content)
    categories_df = pd.DataFrame(categories_json)
    df = pd.DataFrame()
    for i in range(len(categories_df)):
        category_i = categories_df.loc[i, 'id']
        tmp = create_dateframe(category_i)
        tmp['category_id'] = category_i
        print(category_i)
        df = pd.concat([df, tmp])
    
    return df



In [5]:
def years_between_date_and_today(x):
    '''
    Calcula la cantidad de años entre hoy y una fecha determinada
    '''
    return (date.today() - iso8601.parse_date(x).date()).days/365.25


In [17]:

def get_item_data(json_dic_i):
    '''
    Esta función sirve para extraer los datos de un diccionario con la información
    de un item. Transforma los Tags en variables dummies y filtra solamente las 
    variables que se van a usar en el modelo
    
    '''
    tag_list = ['ahora-12','brand_verified','cart_eligible','dragged_bids_and_visits',
                'good_quality_picture','good_quality_thumbnail','immediate_payment',
                'incomplete_technical_specs','loyalty_discount_eligible','shipping_guaranteed',
                'poor_quality_picture','poor_quality_thumbnail','catalog_listing_eligible',
                'extended_warranty_eligible','lightning_deal','under_infractions','supermarket_eligible',
                'dragged_visits','deal_of_the_day','catalog_forewarning','only_html_description','hirable']

    seller_list = ['brand','credits_active_borrower','credits_priority_2','credits_priority_4','credits_profile',
                   'developer','eshop','large_seller','medium_seller','messages_as_buyer','messages_as_seller',
                   'mshops','normal','user_info_verified','car_dealer','medium_seller_advanced','credits_priority_3',
                   'credits_priority_1','credits_open_market','ngo','from_facebook','real_estate_agency']

    ship_list = ['fs_removed_by_tagger','fulfillment','mandatory_free_shipping','self_service_in','self_service_out',
                'fbm_in_process','is_flammable','me2_blocked','me2_available', 'fbm_in_progress', 'fbm_me2_frozen',
                'adoption_required']
    
    df = pd.DataFrame()
            
    if json_dic_i.get('seller_id') is None:    
        if json_dic_i.get('seller').get('tags') is None:
            seller_url = 'https://api.mercadolibre.com/users/' + str(json_dic_i.get('seller').get('id'))
            api_request_seller = requests.get(seller_url)   
            json_dic_i_seller = json.loads(api_request_seller.content)       
        else:
            json_dic_i_seller = json_dic_i.get('seller')
    else:
        seller_url = 'https://api.mercadolibre.com/users/' + str(json_dic_i.get('seller_id'))
        api_request_seller = requests.get(seller_url)   
        json_dic_i_seller = json.loads(api_request_seller.content) 
        

    dummies_tag_dic = {i:int(i in json_dic_i.get('tags')) for i in tag_list}

    dummies_seller_dic = {i:int(i in json_dic_i_seller.get('tags')) for i in seller_list}

    dummies_ship_dic = {i:int(i in json_dic_i.get('shipping').get('tags')) for i in ship_list}

    if json_dic_i.get('original_price') is None:
        descuento = 0
    else:
        descuento = (json_dic_i.get('original_price')-json_dic_i.get('original_price'))/json_dic_i.get('original_price')
    
    if json_dic_i.get('condition') is None:
        condition = 'not_specified'
    else:
        condition = json_dic_i.get('condition')
    
    if json_dic_i.get('shipping').get('logistic_type') is None:
        shipping_logistic_type = 'not_specified'
    else:
        shipping_logistic_type = json_dic_i.get('shipping').get('logistic_type')    
        
    dic = {'category_id2': json_dic_i.get('category_id'),
           'price': json_dic_i.get('price'),
           'available_quantity':json_dic_i.get('available_quantity'),
           'sold_quantity':json_dic_i.get('sold_quantity'),
           'buying_mode':json_dic_i.get('buying_mode'),
           'listing_type_id':json_dic_i.get('listing_type_id'),
           'condition' : condition,
           'accepts_mercadopago':int(json_dic_i.get('accepts_mercadopago')),
           'descuento':descuento,
           'free_shipping':int(json_dic_i.get('shipping').get('free_shipping')),
           'shipping_mode':json_dic_i.get('shipping').get('mode'),
           'shipping_logistic_type': shipping_logistic_type,
           'shipping_store_pick_up': int(json_dic_i.get('shipping').get('store_pick_up')),
           'seller_transactions_ratings_negative': json_dic_i_seller.get('seller_reputation').get('transactions').get('ratings').get('negative'),
           'seller_transactions_ratings_neutral':json_dic_i_seller.get('seller_reputation').get('transactions').get('ratings').get('neutral'),
           'seller_transactions_ratings_positive':json_dic_i_seller.get('seller_reputation').get('transactions').get('ratings').get('positive'),
           'seller_transactions_total':json_dic_i_seller.get('seller_reputation').get('transactions').get('total'),
           'seller_level_id':json_dic_i_seller.get('seller_reputation').get('level_id'),
           'seller_power_seller_status':json_dic_i_seller.get('seller_reputation').get('power_seller_status'),
           'seller_transactions_canceled': json_dic_i_seller.get('seller_reputation').get('transactions').get('canceled'),
           'seller_transactions_completed':json_dic_i_seller.get('seller_reputation').get('transactions').get('completed'),
           'seller_years_from_registration':years_between_date_and_today(json_dic_i_seller.get('registration_date'))}	

    tmp = pd.DataFrame({**dic, **dummies_tag_dic, **dummies_seller_dic, **dummies_ship_dic}, index=[json_dic_i.get('id')])
    df = pd.concat([df,tmp])
    return df

In [18]:

def get_item_data_evaluate(json_dic_i):
    '''
    Esta función sirve para extraer los datos de un diccionario con la información
    de un item. Transforma los Tags en variables dummies y filtra solamente las 
    variables que se van a usar en el modelo
    
    '''
    tag_list = ['ahora-12','brand_verified','cart_eligible','dragged_bids_and_visits',
                'good_quality_picture','good_quality_thumbnail','immediate_payment',
                'incomplete_technical_specs','loyalty_discount_eligible','shipping_guaranteed',
                'poor_quality_picture','poor_quality_thumbnail','catalog_listing_eligible',
                'extended_warranty_eligible','lightning_deal','under_infractions','supermarket_eligible',
                'dragged_visits','deal_of_the_day','catalog_forewarning','only_html_description','hirable']

    seller_list = ['brand','credits_active_borrower','credits_priority_2','credits_priority_4','credits_profile',
                   'developer','eshop','large_seller','medium_seller','messages_as_buyer','messages_as_seller',
                   'mshops','normal','user_info_verified','car_dealer','medium_seller_advanced','credits_priority_3',
                   'credits_priority_1','credits_open_market','ngo','from_facebook','real_estate_agency']

    ship_list = ['fs_removed_by_tagger','fulfillment','mandatory_free_shipping','self_service_in','self_service_out',
                'fbm_in_process','is_flammable','me2_blocked','me2_available', 'fbm_in_progress', 'fbm_me2_frozen',
                'adoption_required']
    
    df = pd.DataFrame()
            

    seller_url = 'https://api.mercadolibre.com/users/' + str(json_dic_i.get('seller_id'))
    api_request_seller = requests.get(seller_url)   
    json_dic_i_seller = json.loads(api_request_seller.content) 
        

    dummies_tag_dic = {i:int(i in json_dic_i.get('tags')) for i in tag_list}

    dummies_seller_dic = {i:int(i in json_dic_i_seller.get('tags')) for i in seller_list}

    dummies_ship_dic = {i:int(i in json_dic_i.get('shipping').get('tags')) for i in ship_list}

    if json_dic_i.get('original_price') is None:
        descuento = 0
    else:
        descuento = (json_dic_i.get('original_price')-json_dic_i.get('original_price'))/json_dic_i.get('original_price')
    
    if json_dic_i.get('condition') is None:
        condition = 'not_specified'
    else:
        condition = json_dic_i.get('condition')
    
    if json_dic_i.get('shipping').get('logistic_type') is None:
        shipping_logistic_type = 'not_specified'
    else:
        shipping_logistic_type = json_dic_i.get('shipping').get('logistic_type')    
        
    dic = {'category_id2': json_dic_i.get('category_id'),
           'price': json_dic_i.get('price'),
           'available_quantity':json_dic_i.get('available_quantity'),
           'sold_quantity':json_dic_i.get('sold_quantity'),
           'buying_mode':json_dic_i.get('buying_mode'),
           'listing_type_id':json_dic_i.get('listing_type_id'),
           'condition' : condition,
           'accepts_mercadopago':int(json_dic_i.get('accepts_mercadopago')),
           'descuento':descuento,
           'free_shipping':int(json_dic_i.get('shipping').get('free_shipping')),
           'shipping_mode':json_dic_i.get('shipping').get('mode'),
           'shipping_logistic_type': shipping_logistic_type,
           'shipping_store_pick_up': int(json_dic_i.get('shipping').get('store_pick_up')),
           'seller_transactions_ratings_negative': json_dic_i_seller.get('seller_reputation').get('transactions').get('ratings').get('negative'),
           'seller_transactions_ratings_neutral':json_dic_i_seller.get('seller_reputation').get('transactions').get('ratings').get('neutral'),
           'seller_transactions_ratings_positive':json_dic_i_seller.get('seller_reputation').get('transactions').get('ratings').get('positive'),
           'seller_transactions_total':json_dic_i_seller.get('seller_reputation').get('transactions').get('total'),
           'seller_level_id':json_dic_i_seller.get('seller_reputation').get('level_id'),
           'seller_power_seller_status':json_dic_i_seller.get('seller_reputation').get('power_seller_status'),
           'seller_transactions_canceled': json_dic_i_seller.get('seller_reputation').get('transactions').get('canceled'),
           'seller_transactions_completed':json_dic_i_seller.get('seller_reputation').get('transactions').get('completed'),
           'seller_years_from_registration':years_between_date_and_today(json_dic_i_seller.get('registration_date'))}	

    tmp = pd.DataFrame({**dic, **dummies_tag_dic, **dummies_seller_dic, **dummies_ship_dic}, index=[json_dic_i.get('id')])
    df = pd.concat([df,tmp])
    return df

In [19]:
def create_dataframe():
    '''
    Crea el dataframe
    '''
    df = iterate_over_categories()
    df['id'] = df.index
    df = df.reset_index(drop=True)
    df.to_csv('Data/dataset2.csv', index=False)

In [None]:
create_dataframe()

In [None]:
iterate_over_categories()