In [None]:
import requests
from src.common.cloud_storage_connector import CloudStorage
from src.common.bigquery_connector import BigQueryManager
from src.common.utils import batch_process, log_process, authenticate, fetch_items_from_storage
from src.config import settings
import json
import asyncio
import aiohttp
from datetime import datetime

In [None]:
json = {
  "access_token": "APP_USR-2951712600123976-110203-ce78f9cdf280fab258ac0894a9286af2-569119547",
  "client_id": "2951712600123976",
  "client_secret": "QprAIl8ydXzcxFVHjnIHT6fUQ8KpzADV",
  "seller_id": 569119547,
  "store_name": "gw shop"
}

bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)

In [None]:
import pandas as pd 

# Function to extract sku
def extract_seller_sku(attributes):
    for attribute in attributes:
        if attribute.get('id') == 'SELLER_SKU':
            return attribute.get('value_name')
    return None  

# Functions to get relations 
get_item_relations = lambda x: x.get('item_relations', [])[0].get('id') if len(x.get('item_relations', [])) > 0 else None  
get_variation_id_relations = lambda x: x.get('item_relations', [])[0].get('variation_id') if len(x.get('item_relations', [])) > 0 else None  
get_stock_relations = lambda x: x.get('item_relations', [])[0].get('stock_relation') if len(x.get('item_relations', [])) > 0 else None  

def process_details(content_details, content_variations):  

    df_product = pd.DataFrame()

        # Checking if item has variations
    for item in content_details:

        if extract_seller_sku(item.get('attributes', [])):
            has_variation = False
        else:
            has_variation = True 

        # get channels information
        channel = item.get('channels')
        flag_marketplace = 'marketplace' in item.get('channels',[])
        flag_mshops = 'mshops' in item.get('channels',[])  

        # get general information
        product_details_general = {
            'item_id': item.get('id'),
            'item_name': item.get('title'),
            'seller_id': item.get('seller_id'),
            'category_id': item.get('category_id'),
            'official_store_id': item.get('official_store_id'),
            'price': item.get('price'),
            'base_price': item.get('base_price'),
            'original_price': item.get('original_price'),
            'initial_quantity': item.get('initial_quantity'),
            'status': item.get('status'),
            'listing_type': item.get('listing_type_id'),
            'url': item.get('permalink'),
            'free_shipping': item.get('shipping',{}).get('free_shipping'),
            'logistic_type': item.get('shipping',{}).get('logistic_type'),
            'catalog_id' : item.get('catalog_product_id'),
            'picture_url': item.get('pictures', [{}])[0].get('url'),
            'catalog_listing': item.get('catalog_listing', ''),
            'item_health': item.get('health',''),
            'fg_marketplace': flag_marketplace,
            'fg_mshops': flag_mshops,
        }  

        # If product does not have variations
        if not has_variation:
            product_detail_variation = {
                'inventory_id': item.get('inventory_id'),
                'currency_id': item.get('currency_id'),
                'stock': item.get('available_quantity'),
                'sold_quantity': item.get('sold_quantity'),
                'seller_sku': extract_seller_sku(item.get('attributes', [])),
                'variation_id': np.nan,
                'item_relations': get_item_relations(item),
                'stock_relations': get_stock_relations(item),
                'variation_id_relations':get_variation_id_relations(item)
            }

            product_details_general.update(product_detail_variation)
            df_ = pd.DataFrame([product_details_general])
            df_product = pd.concat([df_product, df_], ignore_index=True)

        # If product has variations
        else:
            for var in item.get('variations', []):
                variation_id = var['id']
                variation = [variation for variation in content_variations if variation['id'] == variation_id][0]
                variation_id = var['id']
                product_detail_variation = {
                    'inventory_id': variation.get('inventory_id'),
                    'currency_id': variation.get('currency_id'),
                    'stock': variation.get('available_quantity'),
                    'sold_quantity': variation.get('sold_quantity'),
                    'seller_sku': extract_seller_sku(variation.get('attributes', [])),
                    'variation_id': variation_id,
                    'item_relations': get_item_relations(item),
                    'stock_relations': get_stock_relations(item),
                    'variation_id_relations':get_variation_id_relations(item)
                }
                product_details_general.update(product_detail_variation)
                df_ = pd.DataFrame([product_details_general])
                df_product = pd.concat([df_product, df_], ignore_index=True)

    return df_product

In [None]:

data = json
store_name = data.get('store_name')
seller_id = data.get('seller_id')
print('** Connecting to storage and BigQuery... **')
# Initialize storage and BigQuery
storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)
# Define paths and table names from the config
bucket_name = settings.BUCKET_STORES
table_management = settings.TABLE_MANAGEMENT
destiny_table = settings.TABLE_DETAILS
blob_details = settings.BLOB_ITEMS_DETAILS(store_name)
blob_variations = settings.BLOB_VARIATIONS(store_name)
# Define today's date
today_str = datetime.today().strftime('%Y-%m-%d')
# Get dates to treat
list_dates_to_process = bigquery.get_list_dates_to_process(seller_id, table_management, destiny_table)
print(f'*** Starting to process dates: {len(list_dates_to_process)} dates to process  ***')
df_processed_data = pd.DataFrame()
for date in list_dates_to_process:
    # Transform date to string
    date_to_process = date.strftime('%Y-%m-%d')
    print(f'Processing date: {date_to_process}')
    # Get blob with the date
    blob_prefix_details = blob_details + f'date={date_to_process}/'
    blob_prefix_variations = blob_variations + f'date={date_to_process}/'
    # List all the files
    blobs_details = storage.list_blobs(bucket_name, blob_prefix_details)
    blobs_variations = storage.list_blobs(bucket_name, blob_prefix_variations)
    
    # Empty variables
    df_processed_data = pd.DataFrame()
    content_details=[]
    content_variations=[]
    # Getting details data
    for blob_det in blobs_details:
        # Get content information for details and variations
        print(f"Reading file: {blob_det.name}")
        content_details += storage.download_json(bucket_name, blob_det.name)
    # Getting variation data
    for blob_var in blobs_variations:
        print(f"Reading file: {blob_var.name}")
        content_variations += storage.download_json(bucket_name, blob_var.name)
    df_processed_data = process_details(content_details, content_variations)
    df_processed_data['correspondent_date'] = pd.to_datetime(date_to_process)
    df_processed_data['process_time'] = datetime.now()
    df_processed_data['seller_id'] = seller_id
    print(f'*** Finished treating all data. {df_processed_data.shape[0]} products ***')
    # print('** Deleting existing data **')
    # bigquery.delete_existing_data(destiny_table, seller_id, date_to_process)
    
    # print('** Correct dataframe schema **')
    # bigquery.match_dataframe_schema(df_processed_data, destiny_table)
    # print('** Inserting data into BQ**')
    # bigquery.insert_dataframe(df_processed_data, destiny_table)
    # print('** Updating log table **')
    # bigquery.update_logs_table(seller_id, date_to_process, destiny_table, table_management)



In [None]:
blobs_details

In [None]:
access_token = authenticate(json['client_id'], json['client_secret'])

In [None]:
bigquery.run_query()

In [None]:
import requests
seller_id = '189643563'
url = f"https://api.mercadolibre.com/users/{seller_id}"
response = requests.get(url)
response.json()

In [None]:
print(url)

In [None]:
order_id = '2000009607285924'

order_url = f'https://api.mercadolibre.com/orders/{order_id}'
costs_url = f'https://api.mercadolibre.com/orders/{order_id}/costs'

headers = {
    'Authorization': f'Bearer {access_token}',
    'Content-Type': 'application/json'
}

response = requests.get(order_url, headers=headers)
response.json()

In [None]:
catalog_item_id = 'MLB28017126'

url = f"https://api.mercadolibre.com/products/{catalog_item_id}/items"

# Cabeçalhos de autorização
headers = {'Authorization': f'Bearer {access_token}'}

# Fazendo a requisição GET
response = requests.get(url, headers=headers)
response.json()

In [None]:
item_id = 'MLB4966133390'

url = f"https://api.mercadolibre.com/items/{item_id}/shipping"

# Cabeçalhos de autorização
headers = {'Authorization': f'Bearer {access_token}'}

# Fazendo a requisição GET
response = requests.get(url, headers=headers)
response.json()

In [None]:
item_id = 'MLB4978023790'

url = f"https://api.mercadolibre.com/items/{item_id}/shipping"

# Cabeçalhos de autorização
headers = {'Authorization': f'Bearer {access_token}'}

# Fazendo a requisição GET
response = requests.get(url, headers=headers)
data = response.json()

In [None]:
def process_shipping(json_data):
    try:
        default_value = json_data.get('default')
        channels = json_data.get('channels', [])
        item_id = json_data.get('item_id')
        dict_list = []
        for channel in channels:
            dict_content = {
                'item_id': item_id,
                'channel_id': channel.get('id'),
                'mode': channel.get('mode'),
                'logistic_type': channel.get('logistic_type'),
                'local_pick_up': channel.get('local_pick_up'),
                'free_shipping': channel.get('free_shipping'),
                'store_pick_up': channel.get('store_pick_up'),
                'default': default_value
            }
            dict_list.append(dict_content)
        return dict_list
    except Exception as e:
        print(f'Error processing json: {json_data}, error: {e}')
        return []
import pandas as pd
pd.DataFrame(process_shipping(data))

In [None]:
data = json
client_id = data.get('client_id')
client_secret = data.get('client_secret')
store_name = data.get('store_name')
seller_id = data.get('seller_id')
access_token = data.get('access_token')
print('** Defining authentication... **')
# Authenticate (assuming this is now centralized in utils.py or a similar file)
if not access_token:
    access_token = authenticate(client_id, client_secret)  # You can add this to a common module
print('** Connecting to storage and BigQuery... **')
# Initialize storage and BigQuery
storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)
# Define paths and table names from the config
bucket_name = settings.BUCKET_STORES
table_management = settings.TABLE_MANAGEMENT
destiny_table = settings.TABLE_FULLFILMENT
# Define today's date
today_str = datetime.today().strftime('%Y-%m-%d')

# Fetch item IDs from the storage bucket
blob_items_prefix = f'{store_name}/meli/api_response/catelog_details/date={today_str}/'
items_id = fetch_items_from_storage(
storage, 
bucket_name, 
blob_items_prefix, 
key_names='inventory_id'
)

In [None]:
import pandas as pd
import numpy as np

from datetime import datetime, timedelta
from src.common.cloud_storage_connector import CloudStorage
from src.common.bigquery_connector import BigQueryManager
from src.config import settings
import json


def insert_bq_competitors_prices(request):

    data = request.get_json()
    store_name = data.get('store_name')
    seller_id = data.get('seller_id')

    print('** Connecting to storage and BigQuery... **')
    # Initialize storage and BigQuery
    storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
    bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)

    # Define paths and table names from the config
    bucket_name = settings.BUCKET_STORES
    table_management = settings.TABLE_MANAGEMENT
    destiny_table = settings.TABLE_CATALOG_COMPETITORS
    blob_shipping_cost = settings.BLOB_COMPETITORS_CATALOG(store_name)

    # Define today's date
    today_str = datetime.today().strftime('%Y-%m-%d')

    # Get dates to treat
    list_dates_to_process = bigquery.get_list_dates_to_process(seller_id, table_management, destiny_table)

    print(f'*** Starting to process dates: {len(list_dates_to_process)} dates to process  ***')

    df_processed_data = pd.DataFrame()

    for date in list_dates_to_process:

        # Transform date to string
        date_to_process = date.strftime('%Y-%m-%d')
        print(f'Processing date: {date_to_process}')
        # Get blob with the date
        blob_prefix = blob_shipping_cost + f'date={date_to_process}/'
        # List all the files
        blobs = storage.list_blobs(bucket_name, blob_prefix)

        # Processing each blob
        for blob in blobs:
            print(f"Reading file: {blob.name}")
            content = storage.download_json(bucket_name, blob.name)

            for json in content:
                processed_dict = process_prices(json)

                if isinstance(processed_dict, list):
                    df_processed_data = pd.concat([df_processed_data, pd.DataFrame(processed_dict)], ignore_index = True)
                else:
                    continue

        df_processed_data['correspondent_date'] = pd.to_datetime(date_to_process)
        df_processed_data['process_time'] = datetime.now()
        df_processed_data['seller_id'] = seller_id

        print(f'*** Finished treating all data. {df_processed_data.shape[0]} products ***')

        print('** Deleting existing data **')
        bigquery.delete_existing_data(destiny_table, seller_id, date_to_process)
        
        print('** Correct dataframe schema **')
        bigquery.match_dataframe_schema(df_processed_data, destiny_table)

        print('** Inserting data into BQ**')
        bigquery.insert_dataframe(df_processed_data, destiny_table)

        print('** Updating log table **')
        bigquery.update_logs_table(seller_id, date_to_process, destiny_table, table_management)

    return ('Success', 200)


def process_prices(json):

    try:
        extracted_data = []
        # Dicionário temporário para priorizar os preços por canal
        price_by_channel = {}
        for price in json['prices']:
            channel = price['conditions']['context_restrictions']
            if len(channel) == 1:
                channel = channel[0]

                # Se ainda não há preço para o canal ou se o preço atual é promoção, atualiza
                if channel not in price_by_channel or price['type'] == 'promotion':
                    price_by_channel[channel] = {
                        'item_id': json.get('id'),
                        'price_id': price.get('id'),
                        'regular_amount': price.get('regular_amount'),
                        'price': price.get('amount'),
                        'channel': channel,
                        'last_updated': price.get('last_updated')
                    }
        # Converte os valores armazenados para uma lista
        extracted_data.extend(price_by_channel.values())

        return extracted_data
    
    except:
        print(f'Error processing json: {json}')
        

In [None]:
json = {
  "access_token": None,
  "client_id": "4959083987776428",
  "client_secret": "Hw9wWSydd8PMvMEJewWoMvKGYMAWyKEw",
  "seller_id": 189643563,
  "store_name": "hubsmarthome"
}

In [None]:
data = json
store_name = data.get('store_name')
seller_id = data.get('seller_id')
print('** Connecting to storage and BigQuery... **')
# Initialize storage and BigQuery
storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)
# Define paths and table names from the config
bucket_name = settings.BUCKET_STORES
table_management = settings.TABLE_MANAGEMENT
destiny_table = settings.TABLE_CATALOG_COMPETITORS
blob_shipping_cost = settings.BLOB_COMPETITORS_CATALOG(store_name)
# Define today's date
today_str = datetime.today().strftime('%Y-%m-%d')
# Get dates to treat
list_dates_to_process = bigquery.get_list_dates_to_process(seller_id, table_management, destiny_table)
print(f'*** Starting to process dates: {len(list_dates_to_process)} dates to process  ***')
df_processed_data = pd.DataFrame()
for date in list_dates_to_process:
    # Transform date to string
    date_to_process = date.strftime('%Y-%m-%d')
    print(f'Processing date: {date_to_process}')
    # Get blob with the date
    blob_prefix = blob_shipping_cost + f'date={date_to_process}/'
    # List all the files
    blobs = storage.list_blobs(bucket_name, blob_prefix)
    # Processing each blob
    for blob in blobs:
        print(f"Reading file: {blob.name}")
        content = storage.download_json(bucket_name, blob.name)
        for json in content:
            processed_dict = process_competitors_catalog(json)
            if isinstance(processed_dict, list):
                df_processed_data = pd.concat([df_processed_data, pd.DataFrame(processed_dict)], ignore_index = True)
            else:
                continue
    df_processed_data['correspondent_date'] = pd.to_datetime(date_to_process)
    df_processed_data['process_time'] = datetime.now()
    df_processed_data['seller_id'] = seller_id

In [None]:
df_processed_data

In [None]:
content[0]['item_id']

In [None]:
data = content[0]['results']
data[0]

In [None]:
content[0]['results'][0].get('category_id')

In [None]:
list_proc = process_competitors_catalog(content[0])
pd.DataFrame(list_proc)

In [None]:
def process_competitors_catalog(json):

    catalog_id = json['item_id']
    results_list = []  # Create an empty list to store the dictionaries

    try:
        for item in json['results']:
            dict_content = {
                'catalog_product_id': catalog_id, 
                'item_id' : item.get('item_id'),
                'competitors_type': 'catalog',
                'category_id': item.get('category_id'),
                'official_store_id': item.get('official_store_id'),
                'competitor_seller_id': item.get('seller_id'),
                'listing_type_id': item.get('listing_type_id'),
                'condition': item.get('condition'),
            }
            
            results_list.append(dict_content)  # Append each dictionary to the list
        
        return results_list  # Return the full list after iterating through all items
    
    except Exception as e:
        print(f'Error processing json: {json}. Error: {str(e)}')
        return None  # Optionally return None if there's an error


In [None]:
# Improve costs
data = json = {
  "access_token": None,
  "client_id": "4959083987776428",
  "client_secret": "Hw9wWSydd8PMvMEJewWoMvKGYMAWyKEw",
  "seller_id": 189643563,
  "store_name": "hubsmarthome"
}

In [None]:
from src.common.cloud_storage_connector import CloudStorage
from src.common.bigquery_connector import BigQueryManager
from src.common.utils import batch_process, log_process, authenticate, fetch_items_from_storage
from src.config import settings
import json
import asyncio
import aiohttp
from datetime import datetime
import requests

semaphore = asyncio.Semaphore(100)  # Control the number of simultaneous requests


# Parsing request data
# data = request.get_json()
client_id = data.get('client_id')
client_secret = data.get('client_secret')
store_name = data.get('store_name')
seller_id = data.get('seller_id')
access_token = data.get('access_token')
print('** Defining authentication... **')
# Authenticate (assuming this is now centralized in utils.py or a similar file)
if not access_token:
    access_token = authenticate(client_id, client_secret)  # You can add this to a common module
print('** Connecting to storage and BigQuery... **')
# Initialize storage and BigQuery
storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)
# Define paths and table names from the config
bucket_name = settings.BUCKET_STORES
table_management = settings.TABLE_MANAGEMENT
destiny_table = settings.TABLE_COSTS
# Define today's date
today_str = datetime.today().strftime('%Y-%m-%d')
# Getting params to see costs
query = f'''
    with items_details as (
    select distinct
        item_id,
        listing_type,
        category_id
    from datalake-v2-424516.datalake_v2.items_details
    where
        1=1
        and date(correspondent_date) = current_date()
        and seller_id = {seller_id}
    )
    select 
    p.item_id as id,
    d.listing_type as listing_type_id,
    d.category_id,
    p.price,
    p.channel
    from datalake-v2-424516.datalake_v2.items_prices p
    inner join items_details d
    on p.item_id = d.item_id
    where 
        1=1
        and date(p.correspondent_date) = current_date()
        and channel is not null
'''
# blob_items_prefix = f'{store_name}/meli/api_response/item_detail/date={today_str}/'
# items_id = fetch_items_from_storage(
# storage, 
# bucket_name, 
# blob_items_prefix, 
# key_names=['id','price', 'category_id', 'listing_type_id']
# )

df_params = bigquery.run_query(query)
# items = df_params[['id','channel']].to_dict(orient='records')
df_params['channel'] = df_params['channel'].apply(lambda x : x.replace('channel_', '')).drop(columns = 'channel')
items_id = df_params.to_dict(orient='records')

print(f'** Items found: {len(items_id)}**')
print(f'** Cleaning blob **')
# Path for saving 
blob_basic_path = settings.BLOB_COSTS(store_name)
date_blob_path = f'{blob_basic_path}date={today_str}/'
# Clean existing files in the storage bucket
storage.clean_blobs(bucket_name, date_blob_path)
print(f'** Starting API requests for {len(items_id)} items**')
# URL function for API
url = settings.URL_COST
headers = {'Authorization': f'Bearer {access_token}'}

# Batch processing the API requests
async with aiohttp.ClientSession() as session:
    await batch_process(session, items_id, url, headers, 
                        bucket_name, date_blob_path, storage, 
                        params = items_id, add_item_id = True)
    
log_process(seller_id, destiny_table, today_str, table_management, processed_to_bq=False)



In [None]:
log_process(seller_id, destiny_table, today_str, table_management, processed_to_bq=False)


# Visitas

In [None]:
from src.common.cloud_storage_connector import CloudStorage
from src.common.bigquery_connector import BigQueryManager
from src.common.utils import batch_process, log_process, authenticate, fetch_items_from_storage
from src.config import settings
import json
import asyncio
import aiohttp
from datetime import datetime

semaphore = asyncio.Semaphore(100)  # Control the number of simultaneous requests

async def main_async(request):
    # Parsing request data
    data = request.get_json()
    client_id = data.get('client_id')
    client_secret = data.get('client_secret')
    store_name = data.get('store_name')
    seller_id = data.get('seller_id')
    access_token = data.get('access_token')

    print('** Defining authentication... **')
    # Authenticate (assuming this is now centralized in utils.py or a similar file)
    if not access_token:
        access_token = authenticate(client_id, client_secret)  # You can add this to a common module

    print('** Connecting to storage and BigQuery... **')
    # Initialize storage and BigQuery
    storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
    bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)

    # Define paths and table names from the config
    bucket_name = settings.BUCKET_STORES
    table_management = settings.TABLE_MANAGEMENT
    destiny_table = settings.TABLE_VISITS

    # Define today's date
    today_str = datetime.today().strftime('%Y-%m-%d')
    
    # Fetch item IDs from the storage bucket
    blob_items_prefix = f'{store_name}/meli/api_response/items/date={today_str}/'
    items_id = fetch_items_from_storage(
    storage, 
    bucket_name, 
    blob_items_prefix, 
    key_names='results'
    )

    print(f'** Items found: {len(items_id)}**')

    print(f'** Cleaning blob **')
    # Path for saving 
    blob_basic_path = settings.BLOB_VISITS(store_name)
    date_blob_path = f'{blob_basic_path}date={today_str}/'

    # Clean existing files in the storage bucket
    storage.clean_blobs(bucket_name, date_blob_path)

    print(f'** Starting API requests for {len(items_id)} items**')
    # URL function for API
    url = settings.URL_ITEM_DETAIL

    return 

In [None]:
args = {
  "access_token": None,
  "client_id": "4959083987776428",
  "client_secret": "Hw9wWSydd8PMvMEJewWoMvKGYMAWyKEw",
  "seller_id": 189643563,
  "store_name": "hubsmarthome"
}

In [None]:
data = args
client_id = data.get('client_id')
client_secret = data.get('client_secret')
store_name = data.get('store_name')
seller_id = data.get('seller_id')
access_token = data.get('access_token')
print('** Defining authentication... **')
# Authenticate (assuming this is now centralized in utils.py or a similar file)
if not access_token:
    access_token = authenticate(client_id, client_secret)  # You can add this to a common module
print('** Connecting to storage and BigQuery... **')
# Initialize storage and BigQuery
storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)
# Define paths and table names from the config
bucket_name = settings.BUCKET_STORES
table_management = settings.TABLE_MANAGEMENT
destiny_table = settings.TABLE_VISITS
# Define today's date
today_str = datetime.today().strftime('%Y-%m-%d')

# Fetch item IDs from the storage bucket
blob_items_prefix = f'{store_name}/meli/api_response/items/date={today_str}/'
items_id = fetch_items_from_storage(
storage, 
bucket_name, 
blob_items_prefix, 
key_names='results'
)
print(f'** Items found: {len(items_id)}**')
print(f'** Cleaning blob **')
# Path for saving 
blob_basic_path = settings.BLOB_VISITS(store_name)
date_blob_path = f'{blob_basic_path}date={today_str}/'
# Clean existing files in the storage bucket
storage.clean_blobs(bucket_name, date_blob_path)
print(f'** Starting API requests for {len(items_id)} items**')
# URL function for API
url = settings.URL_ITEM_DETAIL

In [None]:
def visits_to_dataframe(json_visit):
    # Initialize lists to store the extracted data
    
    item_id = json_visit['item_id']
    visits_data = json_visit['results']
    dates = []
    total_visits = []
    companies = []

    # Iterate through the data
    for visit in visits_data:
        dates.append(visit['date'])
        total_visits.append(visit['total'])
        company_list = [detail['company'] for detail in visit['visits_detail']]
        companies.append(", ".join(company_list))  # Join company names if there are multiple
    
    # Create a DataFrame
    df = pd.DataFrame({
        'item_id':item_id,
        'date': dates,
        'total_visits': total_visits,
        'companies': companies
    })
    
    return df


In [None]:
url = lambda item_id : f'https://api.mercadolibre.com/items/{item_id}/visits/time_window?last=1&unit=day'
headers = {'Authorization': f'Bearer {access_token}'}

item = 'MLB3326162963'
response = requests.get(url(item), headers=headers)
response.json()

In [None]:
blob_basic_path = settings.BLOB_VISITS(store_name)
bool_first_time = storage.blob_exists(bucket_name, blob_basic_path)
bool_first_time

In [None]:
from tqdm import tqdm

# access_token = authenticate(client_id, client_secret)

url = lambda item_id : f'https://api.mercadolibre.com/items/{item_id}/visits/time_window?last=150&unit=day&ending=2024-09-07'
headers = {'Authorization': f'Bearer {access_token}'}

df_visitas = pd.DataFrame()

for i, item in tqdm(enumerate(items_id)):
    
    response = requests.get(url(item), headers=headers)
    print(response.status_code)
    daily_visits = response.json()
    
    df_ = visits_to_dataframe(daily_visits)
    
    df_visitas = pd.concat([df_visitas, df_], ignore_index=True)
    
    if i % 50 == 0:
        print('Pause')
        time.sleep(15)


In [None]:
import pandas as pd
import numpy as np

from datetime import datetime, timedelta
from src.common.cloud_storage_connector import CloudStorage
from src.common.bigquery_connector import BigQueryManager
from src.config import settings
import json


def insert_bq_visits(request):

    data = request.get_json()
    store_name = data.get('store_name')
    seller_id = data.get('seller_id')

    print('** Connecting to storage and BigQuery... **')
    # Initialize storage and BigQuery
    storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
    bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)

    # Define paths and table names from the config
    bucket_name = settings.BUCKET_STORES
    table_management = settings.TABLE_MANAGEMENT
    destiny_table = settings.TABLE_VISITS
    blob_shipping_cost = settings.BLOB_VISITS(store_name)

    # Define today's date
    today_str = datetime.today().strftime('%Y-%m-%d')

    # Get dates to treat
    list_dates_to_process = bigquery.get_list_dates_to_process(seller_id, table_management, destiny_table)

    print(f'*** Starting to process dates: {len(list_dates_to_process)} dates to process  ***')

    df_processed_data = pd.DataFrame()

    for date in list_dates_to_process:

        # Transform date to string
        date_to_process = date.strftime('%Y-%m-%d')
        print(f'Processing date: {date_to_process}')
        # Get blob with the date
        blob_prefix = blob_shipping_cost + f'date={date_to_process}/'
        # List all the files
        blobs = storage.list_blobs(bucket_name, blob_prefix)

        # Processing each blob
        for blob in blobs:
            print(f"Reading file: {blob.name}")
            content = storage.download_json(bucket_name, blob.name)

            for json in content:
                processed_dict = process_shipping(json)

                if isinstance(processed_dict, list):
                    df_processed_data = pd.concat([df_processed_data, pd.DataFrame(processed_dict)], ignore_index = True)
                else:
                    continue

        df_processed_data['correspondent_date'] = pd.to_datetime(date_to_process)
        df_processed_data['process_time'] = datetime.now()
        df_processed_data['seller_id'] = seller_id

        print(f'*** Finished treating all data. {df_processed_data.shape[0]} products ***')

        print('** Deleting existing data **')
        bigquery.delete_existing_data(destiny_table, seller_id, date_to_process)
        
        print('** Correct dataframe schema **')
        bigquery.match_dataframe_schema(df_processed_data, destiny_table)

        print('** Inserting data into BQ**')
        bigquery.insert_dataframe(df_processed_data, destiny_table)

        print('** Updating log table **')
        bigquery.update_logs_table(seller_id, date_to_process, destiny_table, table_management)

    return ('Success', 200)

def process_shipping(json_data):
    try:
        default_value = json_data.get('default')
        channels = json_data.get('channels', [])
        item_id = json_data.get('item_id')
        dict_list = []
        for channel in channels:
            dict_content = {
                'item_id': item_id,
                'channel_id': channel.get('id'),
                'mode': channel.get('mode'),
                'logistic_type': channel.get('logistic_type'),
                'local_pick_up': channel.get('local_pick_up'),
                'free_shipping': channel.get('free_shipping'),
                'store_pick_up': channel.get('store_pick_up'),
                'default_shipping': default_value
            }
            dict_list.append(dict_content)
        return dict_list
    except Exception as e:
        print(f'Error processing json: {json_data}, error: {e}')
        return []

        
                        



In [None]:
json

In [None]:
data=args
store_name = data.get('store_name')
seller_id = data.get('seller_id')
print('** Connecting to storage and BigQuery... **')
# Initialize storage and BigQuery
storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)
# Define paths and table names from the config
bucket_name = settings.BUCKET_STORES
table_management = settings.TABLE_MANAGEMENT
destiny_table = settings.TABLE_VISITS
blob_shipping_cost = settings.BLOB_VISITS(store_name)
# Define today's date
today_str = datetime.today().strftime('%Y-%m-%d')
# Get dates to treat
list_dates_to_process = bigquery.get_list_dates_to_process(seller_id, table_management, destiny_table)
print(f'*** Starting to process dates: {len(list_dates_to_process)} dates to process  ***')
df_processed_data = pd.DataFrame()
for date in list_dates_to_process:
    # Transform date to string
    date_to_process = date.strftime('%Y-%m-%d')
    print(f'Processing date: {date_to_process}')
    # Get blob with the date
    blob_prefix = blob_shipping_cost + f'date={date_to_process}/'
    # List all the files
    blobs = storage.list_blobs(bucket_name, blob_prefix)
    # Processing each blob
    for blob in blobs:
        print(f"Reading file: {blob.name}")
        content = storage.download_json(bucket_name, blob.name)
        for json in content:
            processed_dict = process_visits(json)
            if isinstance(processed_dict, list):
                df_processed_data = pd.concat([df_processed_data, pd.DataFrame(processed_dict)], ignore_index = True)
            else:
                continue

    df_processed_data['correspondent_date'] = pd.to_datetime(date_to_process)
    df_processed_data['process_time'] = datetime.now()
    df_processed_data['seller_id'] = seller_id

In [None]:
df_processed_data

In [None]:
data = content[0]
data

In [None]:
def process_visits(json_data):

    try:
        item_id = json_data.get("item_id")
        list_visits = []
        for visits_per_date in json_data.get('results',[]):

            dict_content = {
                "item_id": item_id,
                "num_visits": visits_per_date.get('total'),
                "date": visits_per_date.get('date')
            }

            list_visits.append(dict_content)

        return list_visits

    except Exception as e:
        print(f'Error processing json: {json_data}, error: {e}')
        return []

In [None]:
from src.common.firestore_connector import FirestoreManager
from src.config import settings
firestore = FirestoreManager(credentials_path=settings.PATH_SERVICE_ACCOUNT, project_id='datalake-meli-dev')

firestore.clean_cache('query_cache')


In [None]:
from datetime import datetime
import json
import logging
import requests
from src.common.bigquery_connector import BigQueryManager
from src.config import settings
import pandas as pd


def main_fetch_sellers_information():

    bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)
    table_id = settings.TABLE_SELLER_INFORMATION

    # Getting list of sellers to update
    query = """
    WITH sellers_ids AS (
        SELECT DISTINCT competitor_seller_id
        FROM `datalake-v2-424516.datalake_v2.items_competitors_catalog`

        UNION ALL

        SELECT DISTINCT competitor_seller_id
        FROM `datalake-v2-424516.datalake_v2.items_competitors_details`
    )

    SELECT DISTINCT si.competitor_seller_id
    FROM sellers_ids si 
    LEFT JOIN `datalake-v2-424516.datalake_v2.sellers_competitors_details` sc
    ON CAST(sc.competitor_seller_id AS INT64) = si.competitor_seller_id
    WHERE sc.competitor_seller_id IS NULL
    """

    sellers_df = bigquery.run_query(query)
    sellers_list = sellers_df['competitor_seller_id'].to_list()

    if len(sellers_list) == 0:
        print('Zero novos sellers para processar')
    
    else:
        seller_details_list = []
        for seller_id in sellers_list:
            details = fetch_seller_details(seller_id)
            seller_details_list.append(details)

        # Creates a dataframe with all the information
        print('Creating dataframe')
        df_to_save = product_to_save(seller_details_list)

        print(f'{df_to_save.shape[0]} sellers encontrados')

        # Saving dataframe
        print('Match schema dataframe')
        df_to_save = bigquery.match_dataframe_schema(df_to_save, table_id)

        print('Inserting dataframe')
        bigquery.insert_dataframe(df_to_save, table_id)



def fetch_seller_details(seller_id):

    url = f"https://api.mercadolibre.com/users/{seller_id}"

    response = requests.get(url)
    response.raise_for_status()
    seller_data = response.json()

    return seller_data
    

def product_to_save(product_details_list):
    competitor_seller_list = []
    process_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    for product_data in product_details_list:
        if product_data:  

            seller_reputation = product_data.get("seller_reputation", {})
            transactions = seller_reputation.get("transactions", {})
            site_status = product_data.get("status",{})

            product_dict = {
                'process_time': process_time,
                "competitor_seller_id": product_data.get("id"),
                "competitor_seller_nickname": product_data.get("nickname"),
                "competitor_seller_level_id": seller_reputation.get("level_id", ""),
                "competitor_power_seller_status": seller_reputation.get("power_seller_status", ""),
                "competitor_transactions_period": transactions.get("period", ""),
                "competitor_transactions_total": transactions.get("total", 0),
                "competitor_site_status": site_status.get("site_status", ""), 
                "competitor_permalink": product_data.get("permalink")
            }
            competitor_seller_list.append(product_dict)

    return pd.DataFrame(competitor_seller_list)


In [None]:
bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)
table_id = settings.TABLE_SELLER_INFORMATION
# Getting list of sellers to update
query = """
WITH sellers_ids AS (
    SELECT DISTINCT competitor_seller_id
    FROM `datalake-v2-424516.datalake_v2.items_competitors_catalog`
    UNION ALL
    SELECT DISTINCT competitor_seller_id
    FROM `datalake-v2-424516.datalake_v2.items_competitors_details`
)
SELECT DISTINCT si.competitor_seller_id
FROM sellers_ids si 
LEFT JOIN `datalake-v2-424516.datalake_v2.sellers_competitors_details` sc
ON CAST(sc.competitor_seller_id AS INT64) = si.competitor_seller_id
WHERE sc.competitor_seller_id IS NULL
"""
sellers_df = bigquery.run_query(query)
sellers_list = sellers_df['competitor_seller_id'].to_list()
if len(sellers_list) == 0:
    print('Zero novos sellers para processar')

else:
    seller_details_list = []
    for seller_id in sellers_list[:5]:
        details = fetch_seller_details(seller_id)
        seller_details_list.append(details)
    # Creates a dataframe with all the information
    print('Creating dataframe')
    df_to_save = product_to_save(seller_details_list)
    print(f'{df_to_save.shape[0]} sellers encontrados')
    # Saving dataframe
    print('Match schema dataframe')
    df_to_save = bigquery.match_dataframe_schema(df_to_save, table_id)
    print('Inserting dataframe')
    bigquery.insert_dataframe(df_to_save, table_id)

In [None]:

def get_seller_id_and_store_name(client_id, client_secret, access_token):
    
    if not access_token:
        print("Getting access_token")
        token_url = 'https://api.mercadolibre.com/oauth/token'

        token_data = {
            'grant_type': 'client_credentials',
            'client_id': client_id,
            'client_secret': client_secret
        }

        response = requests.post(token_url, data=token_data)
        token_info = response.json()
        access_token = token_info['access_token']
    
    # Step 2: Retrieve User Information
    user_info_url = 'https://api.mercadolibre.com/users/me'
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    
    user_response = requests.get(user_info_url, headers=headers)
    user_info = user_response.json()
    
    # Extract seller ID and store name
    seller_id = user_info['id']
    store_name = user_info.get('nickname', 'N/A').split('.')[0]  # Using 'nickname' as store name

    return store_name, seller_id

In [None]:
# access_token = 'TG-673604f2cda3960001605660-1904654004'
client_id = '2951712600123976'
client_secret = 'QprAIl8ydXzcxFVHjnIHT6fUQ8KpzADV'

get_seller_id_and_store_name(client_id, client_secret, access_token)

In [None]:
url = "https://api.mercadolibre.com/oauth/token"

payload = {
    "grant_type": "refresh_token",
    "client_id": f"{client_id}",
    "client_secret": f"{client_secret}",
    "refresh_token": f"{access_token}"
}
headers = {
    "Content-Type": "application/x-www-form-urlencoded"
}
response = requests.post(url, data=payload, headers=headers)
tokens = response.json()
access_token = tokens.get("access_token")

In [None]:
access_token

In [2]:
from datetime import datetime
import json
import logging
import requests
from src.common.bigquery_connector import BigQueryManager
from src.config import settings
import pandas as pd

In [3]:
bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)


Using local credentials from: C:/Users/User/Documents/papa preco/service account/service_account_datalakev2.json


In [4]:
import mysql.connector
import pandas as pd
import pandas as pd
from sqlalchemy import create_engine, text
from sqlalchemy.types import String, Integer, Float, DateTime
import numpy as np
from urllib.parse import quote_plus

In [5]:
password = quote_plus('Glm@mysql24')  # Your actual password

# Create the SQLAlchemy engine
engine = create_engine(f'mysql+pymysql://geraldo-papa:{password}@34.123.250.92/glm')

In [15]:
from sqlalchemy import create_engine, text
from urllib.parse import quote_plus

# Database connection
password = quote_plus('Glm@mysql24')
engine = create_engine(f'mysql+pymysql://geraldo-papa:{password}@34.123.250.92/glm')

# Increase VARCHAR size or set it to TEXT
with engine.connect() as conn:
    conn.execute(text("ALTER TABLE suggested_items MODIFY COLUMN item_id TEXT;"))

print("Column 'item_id' updated successfully!")


Column 'item_id' updated successfully!


In [None]:
cd

In [7]:
import time

# tables_list = ['competitor', 'general', 'performance_table', 'stock_seller', 'suggested_items']

tables_list = ['competitor']

for table_name in tables_list:

    with engine.connect() as conn:
        conn.execute(text(f"TRUNCATE TABLE {table_name};"))

    if table_name == 'competitor':
        table_name = 'competitors'
        
    df= bigquery.run_query(f'select * from datalake-v2-424516.tables_frontend.{table_name}')
    df['created_at'] = datetime.now()
    df['updated_at'] = datetime.now()

    memory_usage = df.memory_usage(deep=True).sum()/ (1024 ** 2)
    print(f"Tabela: {table_name} / Tamanho em memória: {memory_usage:.2f} MB" )
    
    start_time = time.time()
    df.to_sql(
            name=table_name,
            con=engine,
            if_exists='append',
            index=False,
            chunksize=1000,
            method='multi',
            # dtype=data_types  # Specify data types
        )
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Tempo decorrido: {elapsed_time:.2f} segundos")
    print('-----------------------------------')


Tabela: competitors / Tamanho em memória: 122.72 MB
Tempo decorrido: 133.28 segundos
-----------------------------------


In [None]:
import time

tables_list = ['competitors', 'general', 'performance_table', 'stock_seller', 'suggested_items']
tables_list = ['competitors']
index_list = {
    'competitors': ['channel', 'glm_id', 'seller_id', 'seller_sku'],
    'general': ['glm_id', 'seller_id', 'seller_sku', 'item_id'],
    'performance_table': ['channel', 'seller_id', 'item_id'],
    'stock_seller': ['glm_id', 'seller_id', 'seller_sku'],
    'suggested_items': ['seller_sku'],
}

for table in tables_list:
    
    df= bigquery.run_query(f'select * from datalake-v2-424516.tables_frontend.{table}')
    df['created_at'] = datetime.now()
    df['updated_at'] = datetime.now()
    memory_usage = df.memory_usage(deep=True).sum()/ (1024 ** 2)
    print(f"Tabela: {table} / Tamanho em memória: {memory_usage:.2f} MB" )

    start_time = time.time()
    upload_data_to_mysql(df, table_name= f'{table}', index_list= index_list[table])
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Tempo decorrido: {elapsed_time:.2f} segundos")
    print('-----------------------------------')

In [None]:
df= bigquery.run_query(f'select * from datalake-v2-424516.tables_frontend.{table_name}')
df['created_at'] = datetime.now()
df['updated_at'] = datetime.now()

df.head(15).to_sql(
        name=table_name,
        con=engine,
        if_exists='append',
        index=False,
        chunksize=1000,
        method='multi',
        # dtype=data_types  # Specify data types
    )

15

In [None]:
import pandas as pd
from sqlalchemy import create_engine, text
from sqlalchemy.types import String, Integer, Float, DateTime
import numpy as np
from urllib.parse import quote_plus

def upload_data_to_mysql(df, table_name, index_list=None):
    # Replace pandas.NA and np.nan with None
    df = df.where(pd.notnull(df), None)
    
    password = quote_plus('Glm@mysql24')  # Your actual password

    # Create the SQLAlchemy engine
    engine = create_engine(f'mysql+pymysql://geraldo-papa:{password}@34.123.250.92/glm')

    # Define data types for columns
    data_types = {}
    for col in df.columns:
        if df[col].dtype == object:
            data_types[col] = String(255)  # Set VARCHAR(255) for object columns
        elif pd.api.types.is_integer_dtype(df[col].dtype):
            data_types[col] = Integer()
        elif pd.api.types.is_float_dtype(df[col].dtype):
            data_types[col] = Float()
        elif pd.api.types.is_datetime64_any_dtype(df[col].dtype):
            data_types[col] = DateTime()

    # Upload the data to MySQL with specified data types
    df.to_sql(
        name=table_name,
        con=engine,
        if_exists='append',
        index=False,
        chunksize=1000,
        method='multi',
        dtype=data_types  # Specify data types
    )

    # # Add indexes to specified columns
    # if index_list:
    #     with engine.connect() as conn:
    #         for index_column in index_list:
    #             if index_column in df.columns and df[index_column].dtype == object:
    #                 # Specify a key length for VARCHAR/TEXT columns
    #                 conn.execute(
    #                     text(f'CREATE INDEX idx_{index_column} ON {table_name} ({index_column}(255));')
    #                 )
    #             else:
    #                 # Create index for other types without key length
    #                 conn.execute(
    #                     text(f'CREATE INDEX idx_{index_column} ON {table_name} ({index_column});')
    #                 )

    print("Data uploaded and indexes added!")


In [None]:
import time

tables_list = ['competitors', 'general', 'performance_table', 'stock_seller', 'suggested_items']
tables_list = ['general']
index_list = {
    'competitors': ['channel', 'glm_id', 'seller_id', 'seller_sku'],
    'general': ['glm_id', 'seller_id', 'seller_sku', 'item_id'],
    'performance_table': ['channel', 'seller_id', 'item_id'],
    'stock_seller': ['glm_id', 'seller_id', 'seller_sku'],
    'suggested_items': ['seller_sku'],
}

for table in tables_list:
    
    df= bigquery.run_query(f'select * from datalake-v2-424516.tables_frontend.{table}')
    df['created_at'] = datetime.now()
    df['updated_at'] = datetime.now()
    memory_usage = df.memory_usage(deep=True).sum()/ (1024 ** 2)
    print(f"Tabela: {table} / Tamanho em memória: {memory_usage:.2f} MB" )

    start_time = time.time()
    upload_data_to_mysql(df, table_name= f'{table}', index_list= index_list[table])
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Tempo decorrido: {elapsed_time:.2f} segundos")
    print('-----------------------------------')

In [None]:
import mysql.connector
import pandas as pd
import numpy as np

def recreate_table(df):
    # Map pandas dtypes to MySQL data types
    dtype_mapping = {
        'int64': 'BIGINT',
        'float64': 'DOUBLE',
        'object': 'TEXT',
        'datetime64[ns]': 'DATETIME',
        'bool': 'BOOLEAN'
    }

    # Build the CREATE TABLE statement
    columns = df.columns.tolist()
    sql_types = []
    for col in columns:
        dtype = str(df[col].dtype)
        sql_type = dtype_mapping.get(dtype, 'TEXT')  # Default to TEXT if dtype not found
        sql_types.append(f"`{col}` {sql_type}")

    create_table_query = f"""
    CREATE TABLE IF NOT EXISTS test_general (
        {', '.join(sql_types)}
    );
    """

    # Connect to MySQL
    conn = mysql.connector.connect(
        host="34.123.250.92",
        user="geraldo-papa",
        password="Glm@mysql24",
        database="test_general_table"
    )
    cursor = conn.cursor()

    try:
        # Drop the table if it exists
        cursor.execute("DROP TABLE IF EXISTS test_general;")
        print("Existing table dropped.")

        # Create the new table
        cursor.execute(create_table_query)
        print("New table created with the following schema:")
        print(create_table_query)
    except mysql.connector.Error as err:
        print("Error: {}".format(err))
        conn.rollback()
    finally:
        cursor.close()
        conn.close()

def upload_data_to_mysql(df):
    # Replace pandas.NA and np.nan with None
    df = df.where(pd.notnull(df), None)
    print(df.isnull().sum())

    columns = df.columns.tolist()

    # Connect to MySQL with the specified database
    conn = mysql.connector.connect(
        host="34.123.250.92",
        user="geraldo-papa",
        password="Glm@mysql24",
        database="test_general_table"
    )
    cursor = conn.cursor()

    # Prepare the INSERT query
    insert_query = (
        "INSERT INTO test_general (" +
        ", ".join(f"`{col}`" for col in columns) +
        ") VALUES (" +
        ", ".join(["%s"] * len(columns)) +
        ")"
    )

    # Convert DataFrame rows to list of tuples
    data_to_insert = []
    for _, row in df.iterrows():
        row_values = []
        for col in columns:
            value = row[col]
            if pd.isna(value):
                value = None
            row_values.append(value)
        data_to_insert.append(tuple(row_values))

    try:
        # Insert data into MySQL
        cursor.executemany(insert_query, data_to_insert)

        conn.commit()  # Execute commit after all insertions
        print("Data uploaded successfully to MySQL.")
    except mysql.connector.Error as err:
        print("Error: {}".format(err))
        conn.rollback()
    finally:
        cursor.close()
        conn.close()

# Usage
recreate_table(df)
upload_data_to_mysql(df)


In [None]:
upload_data_to_mysql(df)

In [33]:
from src.common.trigger_cloud_function import TriggerCloudFunction


In [2]:

import importlib

from src.common.cloud_storage_connector import CloudStorage
from src.common.bigquery_connector import BigQueryManager
from src.common.trigger_cloud_function import TriggerCloudFunction

from src.common.utils import authenticate, fetch_sales_for_day, log_process
from src.config import settings
from datetime import datetime, timedelta
from flask import jsonify

In [3]:
data = {
  "access_token": "APP_USR-2951712600123976-112403-2daaae6d069355aea298d9f75a5000ff-354359142",
  "client_id": "2951712600123976",
  "client_secret": "QprAIl8ydXzcxFVHjnIHT6fUQ8KpzADV",
  "seller_id": 354359142,
  "store_name": "lojamercadoobra"
}

In [4]:
storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)

store_name = 'avelar_shop'
bucket_name = settings.BUCKET_STORES
blob_basic_path = settings.BLOB_ORDERS(store_name)

Using local credentials from: C:/Users/User/Documents/papa preco/service account/service_account_datalakev2.json


In [5]:
bool_first_time = storage.blob_exists(bucket_name, blob_basic_path)

In [None]:

trigger_functions = TriggerCloudFunction(credentials_path=settings.PATH_SERVICE_ACCOUNT)
trigger_functions.trigger_function(function_url='https://southamerica-east1-datalake-v2-424516.cloudfunctions.net/fetch_historic_orders',
                                           params= data) 



Using local credentials from: C:/Users/User/Documents/papa preco/service account/service_account_datalakev2.json
Bad response for function: HTTPSConnectionPool(host='southamerica-east1-datalake-v2-424516.cloudfunctions.net', port=443): Max retries exceeded with url: /fetch_historic_orders (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1000)')))


In [7]:
import time

In [9]:
tables_list = ['general']  #performance_table, suggested_items
# Database connection
password = quote_plus('Glm@mysql24')
engine = create_engine(f'mysql+pymysql://geraldo-papa:{password}@34.123.250.92/glm')
for table_name in tables_list:
    with engine.connect() as conn:
        conn.execute(text(f"TRUNCATE TABLE {table_name};"))
    if table_name == 'competitor':
        table_name = 'competitors'
        
    df= bigquery.run_query(f'select * from datalake-v2-424516.tables_frontend.{table_name}')
    df['created_at'] = datetime.now()
    df['updated_at'] = datetime.now()
    memory_usage = df.memory_usage(deep=True).sum()/ (1024 ** 2)
    print(f"Tabela: {table_name} / Tamanho em memória: {memory_usage:.2f} MB" )
    if table_name == 'competitors':
        table_name = 'competitor'
        
    start_time = time.time()
    df.to_sql(
            name=table_name,
            con=engine,
            if_exists='append',
            index=False,
            chunksize=1000,
            method='multi',
            # dtype=data_types  # Specify data types
        )
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Tempo decorrido: {elapsed_time:.2f} segundos")
    print('-----------------------------------')

Tabela: general / Tamanho em memória: 37.63 MB
Tempo decorrido: 78.35 segundos
-----------------------------------


In [1]:
import requests

In [19]:
args_aventure = {
  "access_token": "TG-673604cb0656130001f7240a-548409917",
  "client_id": "2951712600123976",
  "client_secret": "QprAIl8ydXzcxFVHjnIHT6fUQ8KpzADV",
  "seller_id": 1904654004,
  "store_name": "avelar_shop"
}


args_avelar = {
  "access_token": "TG-673604f2cda3960001605660-1904654004",
  "client_id": "2951712600123976",
  "client_secret": "QprAIl8ydXzcxFVHjnIHT6fUQ8KpzADV",
  "seller_id": 1904654004,
  "store_name": "avelar_shop"
}

In [20]:
url = "https://api.mercadolibre.com/oauth/token"

payload = {
    "grant_type": "refresh_token",
    "client_id": f"{args_avelar['client_id']}",
    "client_secret": f"{args_avelar['client_secret']}",
    "refresh_token": f"{args_avelar['access_token']}"
}

headers = {
    "Content-Type": "application/x-www-form-urlencoded"
}

response = requests.post(url, data=payload, headers=headers)
tokens = response.json()
access_token = tokens.get("access_token")

In [21]:
response.json()

{'access_token': 'APP_USR-2951712600123976-120410-8892b015182934a37d9355d27b3ac095-1904654004',
 'token_type': 'Bearer',
 'expires_in': 21600,
 'scope': 'offline_access read',
 'user_id': 1904654004,
 'refresh_token': 'TG-67506e54202d530001723375-1904654004'}

In [2]:
client_secret = 'Hw9wWSydd8PMvMEJewWoMvKGYMAWyKEw'
client_id = '4959083987776428'

def authenticate(client_id, client_secret):
  url = "https://api.mercadolibre.com/oauth/token"
  payload = {
      'grant_type': 'client_credentials',
      'client_id': client_id,
      'client_secret': client_secret
  }
  response = requests.post(url, data=payload)
  if response.status_code == 200:
    return response.json()['access_token']
  else:
    raise Exception("Authentication failed")
  
access_token= authenticate(client_id, client_secret)

access_token

'APP_USR-4959083987776428-120510-845986165e780d9dbf7e1ef261163cfa-189643563'

In [34]:
headers = {
    'Authorization': f'Bearer {access_token}',
    'Content-Type': 'application/json'
}
promotion_id = 'P-MLB14345076'
url = f'https://api.mercadolibre.com/seller-promotions/promotions/{promotion_id}?promotion_type=MARKETPLACE_CAMPAIGN&app_version=v2'

requests.get(url, headers=headers).json()

{'id': 'P-MLB14345076',
 'type': 'MARKETPLACE_CAMPAIGN',
 'status': 'finished',
 'start_date': '2024-11-01T03:00:00Z',
 'finish_date': '2024-11-18T02:00:00Z',
 'deadline_date': '2024-11-18T01:00:00Z',
 'name': 'VENDA+ PRODUTOS POR ATÉ R$200',
 'benefits': {'type': 'REBATE', 'meli_percent': 6, 'seller_percent': 13}}

# Fix insert orders

In [11]:
import asyncio
import pandas as pd
import numpy as np
from datetime import datetime
import concurrent.futures
import os
from src.common.cloud_storage_connector import CloudStorage
from src.common.bigquery_connector import BigQueryManager
from src.config import settings
import json

def insert_bq_orders(request):
    return asyncio.run(main_async(request))

async def main_async(request):
    data = request.get_json()
    store_name = data.get('store_name')
    seller_id = data.get('seller_id')

    print('** Connecting to storage and BigQuery... **')
    # Initialize storage and BigQuery
    storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
    bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)

    # Define paths and table names from the config
    bucket_name = settings.BUCKET_STORES
    table_management = settings.TABLE_MANAGEMENT
    destiny_table = settings.TABLE_ORDERS
    blob_shipping_cost = settings.BLOB_ORDERS(store_name)

    # Get dates to process
    loop = asyncio.get_event_loop()
    list_dates_to_process = await loop.run_in_executor(
        None,
        bigquery.get_list_dates_to_process,
        seller_id,
        table_management,
        destiny_table
    )

    list_dates_to_process = [date.strftime('%Y-%m-%d') for date in list_dates_to_process]

    print(f'*** Starting to process dates: {len(list_dates_to_process)} dates to process ***')

    # Create a semaphore to limit concurrency
    semaphore = asyncio.Semaphore(20)  # Adjust the value as needed
    if len(list_dates_to_process) != 0:
        # Use asyncio.gather to process dates asynchronously
        tasks = [process_date(date, storage, bucket_name, blob_shipping_cost, semaphore) for date in list_dates_to_process]
        results = await asyncio.gather(*tasks)

        # Combine all DataFrames
        df_all_processed_data = pd.concat(results, ignore_index=True)

        print(f'*** Finished processing all dates. Total sales: {df_all_processed_data.shape[0]} ***')
    
    else:
        print('** 0 dates to process**')
        return ('Success', 200)

    # The following steps are synchronous and don't need to be async
    print('** Deleting existing data **')
    bigquery.delete_existing_data(destiny_table, seller_id, list_dates_to_process, 'processed_json')

    print('** Correcting dataframe schema **')
    bigquery.match_dataframe_schema(df_all_processed_data, destiny_table)

    print('** Inserting data into BigQuery **')
    bigquery.insert_dataframe(df_all_processed_data, destiny_table)

    print('** Updating log table **')
    bigquery.update_logs_table(seller_id, list_dates_to_process, destiny_table, table_management)

    return ('Success', 200)
import re
from datetime import datetime

async def process_date(date, storage, bucket_name, blob_shipping_cost, semaphore):
    async with semaphore:
        try:
            print(f'Processing date: {date}')
            blob_prefix = blob_shipping_cost + f'date={date}/'

            loop = asyncio.get_event_loop()
            blobs = await loop.run_in_executor(None, storage.list_blobs, bucket_name, blob_prefix)

            if not blobs:
                print(f"No blobs found for date {date}. Skipping...")
                return pd.DataFrame()

            # Extract processing-time from each blob name
            def extract_processing_time(blob_name):
                match = re.search(r'processing-time=(.*?)\.json', blob_name)
                if match:
                    return datetime.fromisoformat(match.group(1))
                return datetime.min  

            # Sort blobs by processing-time and take the most recent one
            most_recent_blob = max(blobs, key=lambda b: extract_processing_time(b.name))

            print(f"Most recent blob for date {date}: {most_recent_blob.name}")

            # Process only the most recent blob
            blob_semaphore = asyncio.Semaphore(25)
            df_processed_data = await process_blob(most_recent_blob, storage, bucket_name, blob_semaphore)

            if not df_processed_data.empty:
                df_processed_data['processed_json'] = pd.to_datetime(df_processed_data['date_created'])
                df_processed_data['process_time'] = datetime.now()

            print(f'*** Finished processing data for date {date}. {df_processed_data.shape[0]} sales ***')
            return df_processed_data

        except Exception as e:
            print(f'Error processing date {date}: {e}')
            return pd.DataFrame()

async def process_blob(blob, storage, bucket_name, semaphore):
    async with semaphore:
        try:
            print(f"Reading file: {blob.name}")
            loop = asyncio.get_event_loop()
            content = await loop.run_in_executor(None, storage.download_json, bucket_name, blob.name)

            # Use ThreadPoolExecutor for compatibility in async context
            with concurrent.futures.ThreadPoolExecutor() as executor:
                df_list = list(executor.map(process_orders_sync, [json_content['results'] for json_content in content]))

            df_blob = pd.concat(df_list, ignore_index=True)
            return df_blob
        except Exception as e:
            print(f'Error processing blob {blob.name}: {e}')
            return pd.DataFrame()

def process_orders_sync(json_data):
    try:
        structured_sales = []  # List to collect all structured_sale dictionaries
        for sale in json_data:
            structured_sale = {
                'reason': sale['payments'][0].get('reason'),
                'status_code': sale['payments'][0].get('status_code'),
                'total_paid_amount': sale['payments'][0].get('total_paid_amount'),
                'operation_type': sale['payments'][0].get('operation_type'),
                'transaction_amount': sale['payments'][0].get('transaction_amount'),
                'transaction_amount_refunded': sale['payments'][0].get('transaction_amount_refunded'),
                'date_approved': sale['payments'][0].get('date_approved'),
                'collector_id': sale['payments'][0].get('collector', {}).get('id'),
                'coupon_id': sale['payments'][0].get('coupon_id'),
                'installments': sale['payments'][0].get('installments'),
                'authorization_code': sale['payments'][0].get('authorization_code'),
                'taxes_amount': sale['payments'][0].get('taxes_amount'),
                'payment_id': sale['payments'][0].get('id'),
                'date_last_modified': sale['payments'][0].get('date_last_modified'),
                'coupon_amount': sale['payments'][0].get('coupon_amount'),
                'installment_amount': sale['payments'][0].get('installment_amount'),
                'activation_uri': sale['payments'][0].get('activation_uri'),
                'overpaid_amount': sale['payments'][0].get('overpaid_amount'),
                'card_id': sale['payments'][0].get('card_id'),
                'issuer_id': sale['payments'][0].get('issuer_id'),
                'payment_method_id': sale['payments'][0].get('payment_method_id'),
                'payment_type': sale['payments'][0].get('payment_type'),
                'deferred_period': sale['payments'][0].get('deferred_period'),
                'atm_transfer_reference_transaction_id': sale['payments'][0].get('atm_transfer_reference', {}).get('transaction_id'),
                'atm_transfer_reference_company_id': sale['payments'][0].get('atm_transfer_reference', {}).get('company_id'),
                'site_id': sale['payments'][0].get('site_id'),
                'payer_id': sale['payments'][0].get('payer_id'),
                'order_id': sale['payments'][0].get('order_id'),
                'currency_id': sale['payments'][0].get('currency_id'),
                'payment_status': sale['payments'][0].get('status'),
                'shipping_id': sale.get('shipping', {}).get('id'),
                'fulfilled': sale.get('fulfilled'),
                'seller_id': sale.get('seller', {}).get('id'),
                'buyer_id': sale.get('buyer', {}).get('id'),
                'item_id': sale['order_items'][0]['item'].get('id'),
                'item_title': sale['order_items'][0]['item'].get('title'),
                'item_category_id': sale['order_items'][0]['item'].get('category_id'),
                'item_variation_id': sale['order_items'][0]['item'].get('variation_id'),
                'seller_custom_field': sale['order_items'][0]['item'].get('seller_custom_field'),
                'global_price': sale['order_items'][0]['item'].get('global_price'),
                'net_weight': sale['order_items'][0]['item'].get('net_weight'),
                'warranty': sale['order_items'][0]['item'].get('warranty'),
                'condition': sale['order_items'][0]['item'].get('condition'),
                'seller_sku': sale['order_items'][0]['item'].get('seller_sku'),
                'quantity': sale['order_items'][0].get('quantity'),
                'unit_price': sale['order_items'][0].get('unit_price'),
                'full_unit_price': sale['order_items'][0].get('full_unit_price'),
                'manufacturing_days': sale['order_items'][0].get('manufacturing_days'),
                'requested_quantity_measure': sale['order_items'][0].get('requested_quantity', {}).get('measure'),
                'requested_quantity_value': sale['order_items'][0].get('requested_quantity', {}).get('value'),
                'sale_fee': sale['order_items'][0].get('sale_fee'),
                'listing_type_id': sale['order_items'][0].get('listing_type_id'),
                'base_exchange_rate': sale['order_items'][0].get('base_exchange_rate'),
                'base_currency_id': sale['order_items'][0].get('base_currency_id'),
                'bundle': sale['order_items'][0].get('bundle'),
                'element_id': sale['order_items'][0].get('element_id'),
                'date_created': sale.get('date_created'),
                'date_closed': sale.get('date_closed'),
                'status': sale.get('status'),
                'expiration_date': sale.get('expiration_date'),
                'date_last_updated': sale.get('date_last_updated'),
                'last_updated': sale.get('last_updated'),
                'comment': sale.get('comment'),
                'pack_id': sale.get('pack_id'),
                'coupon_amount': sale.get('coupon', {}).get('amount'),
                'coupon_id': sale.get('coupon', {}).get('id'),
                'shipping_cost': sale.get('shipping_cost'),
                'pickup_id': sale.get('pickup_id'),
                'status_detail': sale.get('status_detail'),
                'total_amount': sale.get('total_amount'),
                'paid_amount': sale.get('paid_amount'),
                'context_application': sale.get('context', {}).get('application'),
                'context_product_id': sale.get('context', {}).get('product_id'),
                'context_channel': sale.get('context', {}).get('channel'),
                'context_site': sale.get('context', {}).get('site'),
            }
            structured_sales.append(structured_sale)  # Collect dictionaries
        df_ = pd.DataFrame(structured_sales)  # Create DataFrame once
        return df_
    except Exception as e:
        print(f'Error processing JSON data: {e}')
        return pd.DataFrame()


In [12]:
request = {
  "access_token": "APP_USR-2951712600123976-121603-c8ff1c736073feca070fd450733b334d-1009098057",
  "client_id": "2951712600123976",
  "client_secret": "QprAIl8ydXzcxFVHjnIHT6fUQ8KpzADV",
  "seller_id": 1009098057,
  "store_name": "smart magenta"
}

In [13]:
data = request
store_name = data.get('store_name')
seller_id = data.get('seller_id')
print('** Connecting to storage and BigQuery... **')
# Initialize storage and BigQuery
storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)
# Define paths and table names from the config
bucket_name = settings.BUCKET_STORES
table_management = settings.TABLE_MANAGEMENT
destiny_table = settings.TABLE_ORDERS
blob_shipping_cost = settings.BLOB_ORDERS(store_name)
# Get dates to process
loop = asyncio.get_event_loop()
list_dates_to_process = await loop.run_in_executor(
    None,
    bigquery.get_list_dates_to_process,
    seller_id,
    table_management,
    destiny_table
)

list_dates_to_process = [date.strftime('%Y-%m-%d') for date in list_dates_to_process]

print(f'*** Starting to process dates: {len(list_dates_to_process)} dates to process ***')
# Create a semaphore to limit concurrency
semaphore = asyncio.Semaphore(20)  # Adjust the value as needed
if len(list_dates_to_process) != 0:
    # Use asyncio.gather to process dates asynchronously
    tasks = [process_date(date, storage, bucket_name, blob_shipping_cost, semaphore) for date in list_dates_to_process]
    results = await asyncio.gather(*tasks)

    df_all_processed_data = pd.concat(results, ignore_index=True)

else:
    print('** 0 dates to process**')

# The following steps are synchronous and don't need to be async
print('** Deleting existing data **')
bigquery.delete_existing_data(destiny_table, seller_id, list_dates_to_process, 'processed_json')
print('** Correcting dataframe schema **')
bigquery.match_dataframe_schema(df_all_processed_data, destiny_table)
print('** Inserting data into BigQuery **')
bigquery.insert_dataframe(df_all_processed_data, destiny_table)
print('** Updating log table **')
bigquery.update_logs_table(seller_id, list_dates_to_process, destiny_table, table_management)


** Connecting to storage and BigQuery... **
Using local credentials from: C:/Users/User/Documents/papa preco/service account/service_account_datalakev2.json
Using local credentials from: C:/Users/User/Documents/papa preco/service account/service_account_datalakev2.json
*** Starting to process dates: 387 dates to process ***
Processing date: 2023-11-25
Processing date: 2023-11-26
Processing date: 2023-11-27
Processing date: 2023-11-28
Processing date: 2023-11-29
Processing date: 2023-11-30
Processing date: 2023-12-01
Processing date: 2023-12-02
Processing date: 2023-12-03
Processing date: 2023-12-04
Processing date: 2023-12-05
Processing date: 2023-12-06
Processing date: 2023-12-07
Processing date: 2023-12-08
Processing date: 2023-12-09
Processing date: 2023-12-10
Processing date: 2023-12-11
Processing date: 2023-12-12
Processing date: 2023-12-13
Processing date: 2023-12-14
Most recent blob for date 2023-11-30: smart magenta/meli/api_response/orders/date=2023-11-30/total_sales=13__data=

  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2023-12-06. 59 sales ***
Processing date: 2024-01-06
*** Finished processing data for date 2023-12-18. 31 sales ***
*** Finished processing data for date 2023-12-19. 34 sales ***
Processing date: 2024-01-07
Processing date: 2024-01-08
Most recent blob for date 2023-12-30: smart magenta/meli/api_response/orders/date=2023-12-30/total_sales=15__data=2023-12-30__processing-time=2024-11-24T14:50:50.887830-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2023-12-30/total_sales=15__data=2023-12-30__processing-time=2024-11-24T14:50:50.887830-03:00.json
Most recent blob for date 2023-12-31: smart magenta/meli/api_response/orders/date=2023-12-31/total_sales=8__data=2023-12-31__processing-time=2024-11-24T14:50:50.904107-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2023-12-31/total_sales=8__data=2023-12-31__processing-time=2024-11-24T14:50:50.904107-03:00.json
Most recent blob for date 2024-01-02: smart magenta/meli/

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-01-10. 53 sales ***
Processing date: 2024-01-28
Most recent blob for date 2024-01-23: smart magenta/meli/api_response/orders/date=2024-01-23/total_sales=67__data=2024-01-23__processing-time=2024-11-24T14:50:50.305900-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-01-23/total_sales=67__data=2024-01-23__processing-time=2024-11-24T14:50:50.305900-03:00.json
Most recent blob for date 2024-01-24: smart magenta/meli/api_response/orders/date=2024-01-24/total_sales=49__data=2024-01-24__processing-time=2024-11-24T14:50:50.878630-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-01-24/total_sales=49__data=2024-01-24__processing-time=2024-11-24T14:50:50.878630-03:00.json
*** Finished processing data for date 2024-01-14. 31 sales ***
*** Finished processing data for date 2024-01-15. 40 sales ***
Processing date: 2024-01-29
Processing date: 2024-01-30
Most recent blob for date 2024-01-25: smart magenta/mel

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-01-11. 46 sales ***
*** Finished processing data for date 2024-01-17. 55 sales ***
*** Finished processing data for date 2024-01-16. 47 sales ***
Most recent blob for date 2024-01-27: smart magenta/meli/api_response/orders/date=2024-01-27/total_sales=32__data=2024-01-27__processing-time=2024-11-24T14:50:50.745417-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-01-27/total_sales=32__data=2024-01-27__processing-time=2024-11-24T14:50:50.745417-03:00.json
Most recent blob for date 2024-01-28: smart magenta/meli/api_response/orders/date=2024-01-28/total_sales=28__data=2024-01-28__processing-time=2024-11-24T14:50:50.855782-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-01-28/total_sales=28__data=2024-01-28__processing-time=2024-11-24T14:50:50.855782-03:00.json
*** Finished processing data for date 2024-01-12. 44 sales ***
*** Finished processing data for date 2024-01-18. 54 sales ***
*** Finished 

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-02-06. 57 sales ***
Processing date: 2024-02-18
Most recent blob for date 2024-02-15: smart magenta/meli/api_response/orders/date=2024-02-15/total_sales=37__data=2024-02-15__processing-time=2024-11-24T14:50:50.996035-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-02-15/total_sales=37__data=2024-02-15__processing-time=2024-11-24T14:50:50.996035-03:00.json
Most recent blob for date 2024-02-14: smart magenta/meli/api_response/orders/date=2024-02-14/total_sales=17__data=2024-02-14__processing-time=2024-11-24T14:50:50.979862-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-02-14/total_sales=17__data=2024-02-14__processing-time=2024-11-24T14:50:50.979862-03:00.json
*** Finished processing data for date 2024-02-04. 44 sales ***
Processing date: 2024-02-19
*** Finished processing data for date 2024-01-30. 53 sales ***
Processing date: 2024-02-20
*** Finished processing data for date 2024-02-07. 49 sa

  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-02-01. 53 sales ***
*** Finished processing data for date 2024-02-02. 43 sales ***
*** Finished processing data for date 2024-01-31. 64 sales ***
*** Finished processing data for date 2024-02-09. 18 sales ***
*** Finished processing data for date 2024-02-13. 15 sales ***
Processing date: 2024-02-26
Processing date: 2024-02-27
Processing date: 2024-02-28
Processing date: 2024-02-29
Processing date: 2024-03-01
*** Finished processing data for date 2024-02-08. 43 sales ***
Most recent blob for date 2024-02-19: smart magenta/meli/api_response/orders/date=2024-02-19/total_sales=35__data=2024-02-19__processing-time=2024-11-24T14:50:50.607103-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-02-19/total_sales=35__data=2024-02-19__processing-time=2024-11-24T14:50:50.607103-03:00.json
*** Finished processing data for date 2024-02-12. 14 sales ***
*** Finished processing data for date 2024-02-10. 9 sales ***
Most recent blob for 

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-04-06. 27 sales ***
Processing date: 2024-04-22
*** Finished processing data for date 2024-04-08. 43 sales ***
Processing date: 2024-04-23
*** Finished processing data for date 2024-04-04. 51 sales ***
Processing date: 2024-04-24
Most recent blob for date 2024-04-16: smart magenta/meli/api_response/orders/date=2024-04-16/total_sales=48__data=2024-04-16__processing-time=2024-11-24T14:50:50.228804-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-04-16/total_sales=48__data=2024-04-16__processing-time=2024-11-24T14:50:50.228804-03:00.json
Most recent blob for date 2024-04-15: smart magenta/meli/api_response/orders/date=2024-04-15/total_sales=44__data=2024-04-15__processing-time=2024-11-24T14:50:50.460988-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-04-15/total_sales=44__data=2024-04-15__processing-time=2024-11-24T14:50:50.460988-03:00.json
*** Finished processing data for date 2024-04-01. 46 sa

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-04-10. 53 sales ***
Most recent blob for date 2024-04-25: smart magenta/meli/api_response/orders/date=2024-04-25/total_sales=49__data=2024-04-25__processing-time=2024-11-24T14:50:50.539355-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-04-25/total_sales=49__data=2024-04-25__processing-time=2024-11-24T14:50:50.539355-03:00.json
Most recent blob for date 2024-04-26: smart magenta/meli/api_response/orders/date=2024-04-26/total_sales=47__data=2024-04-26__processing-time=2024-11-24T14:50:50.554113-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-04-26/total_sales=47__data=2024-04-26__processing-time=2024-11-24T14:50:50.554113-03:00.json
Most recent blob for date 2024-04-23: smart magenta/meli/api_response/orders/date=2024-04-23/total_sales=61__data=2024-04-23__processing-time=2024-11-24T14:50:50.709278-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-04-23/total_sales=61__

  df_blob = pd.concat(df_list, ignore_index=True)


Most recent blob for date 2024-05-24: smart magenta/meli/api_response/orders/date=2024-05-24/total_sales=63__data=2024-05-24__processing-time=2024-11-24T14:50:50.742425-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-05-24/total_sales=63__data=2024-05-24__processing-time=2024-11-24T14:50:50.742425-03:00.json
Most recent blob for date 2024-05-23: smart magenta/meli/api_response/orders/date=2024-05-23/total_sales=73__data=2024-05-23__processing-time=2024-11-24T14:50:50.471657-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-05-23/total_sales=73__data=2024-05-23__processing-time=2024-11-24T14:50:50.471657-03:00.json
Most recent blob for date 2024-05-22: smart magenta/meli/api_response/orders/date=2024-05-22/total_sales=50__data=2024-05-22__processing-time=2024-11-24T14:50:50.878788-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-05-22/total_sales=50__data=2024-05-22__processing-time=2024-11-24T14:50:50.878788-03:

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-05-13. 82 sales ***
*** Finished processing data for date 2024-05-11. 38 sales ***
*** Finished processing data for date 2024-05-10. 64 sales ***
Processing date: 2024-05-29
Processing date: 2024-05-30
Processing date: 2024-05-31
Processing date: 2024-06-01
Most recent blob for date 2024-05-25: smart magenta/meli/api_response/orders/date=2024-05-25/total_sales=32__data=2024-05-25__processing-time=2024-11-24T14:50:50.302880-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-05-25/total_sales=32__data=2024-05-25__processing-time=2024-11-24T14:50:50.302880-03:00.json
*** Finished processing data for date 2024-05-09. 73 sales ***
Processing date: 2024-06-02
*** Finished processing data for date 2024-05-16. 61 sales ***
Processing date: 2024-06-03
*** Finished processing data for date 2024-05-14. 92 sales ***
Most recent blob for date 2024-05-26: smart magenta/meli/api_response/orders/date=2024-05-26/total_sales=62__data=2024

  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-05-26. 62 sales ***
Processing date: 2024-06-16
Processing date: 2024-06-17
Processing date: 2024-06-18
*** Finished processing data for date 2024-05-31. 54 sales ***
Processing date: 2024-06-19
Most recent blob for date 2024-06-11: smart magenta/meli/api_response/orders/date=2024-06-11/total_sales=91__data=2024-06-11__processing-time=2024-11-24T14:51:51.678795-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-06-11/total_sales=91__data=2024-06-11__processing-time=2024-11-24T14:51:51.678795-03:00.json
Most recent blob for date 2024-06-10: smart magenta/meli/api_response/orders/date=2024-06-10/total_sales=89__data=2024-06-10__processing-time=2024-11-24T14:51:51.723624-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-06-10/total_sales=89__data=2024-06-10__processing-time=2024-11-24T14:51:51.723624-03:00.json
*** Finished processing data for date 2024-06-02. 45 sales ***
Processing date: 2024-06-20

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-06-10. 89 sales ***
Processing date: 2024-07-02
Most recent blob for date 2024-06-26: smart magenta/meli/api_response/orders/date=2024-06-26/total_sales=69__data=2024-06-26__processing-time=2024-11-24T14:51:51.144674-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-06-26/total_sales=69__data=2024-06-26__processing-time=2024-11-24T14:51:51.144674-03:00.json
*** Finished processing data for date 2024-06-12. 53 sales ***
*** Finished processing data for date 2024-06-13. 62 sales ***
Processing date: 2024-07-03
Processing date: 2024-07-04
*** Finished processing data for date 2024-06-16. 57 sales ***
*** Finished processing data for date 2024-06-15. 49 sales ***
Most recent blob for date 2024-06-27: smart magenta/meli/api_response/orders/date=2024-06-27/total_sales=64__data=2024-06-27__processing-time=2024-11-24T14:51:51.608533-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-06-27/total_sales=64__

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-06-21. 79 sales ***
Processing date: 2024-07-08
Processing date: 2024-07-09
Processing date: 2024-07-10
Most recent blob for date 2024-07-04: smart magenta/meli/api_response/orders/date=2024-07-04/total_sales=74__data=2024-07-04__processing-time=2024-11-24T14:51:51.421699-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-07-04/total_sales=74__data=2024-07-04__processing-time=2024-11-24T14:51:51.421699-03:00.json
Most recent blob for date 2024-07-02: smart magenta/meli/api_response/orders/date=2024-07-02/total_sales=106__data=2024-07-02__processing-time=2024-11-24T14:51:51.509791-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-07-02/total_sales=106__data=2024-07-02__processing-time=2024-11-24T14:51:51.509791-03:00.json
Most recent blob for date 2024-07-03: smart magenta/meli/api_response/orders/date=2024-07-03/total_sales=92__data=2024-07-03__processing-time=2024-11-24T14:51:51.585329-03:00.json

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-07-01. 73 sales ***
*** Finished processing data for date 2024-07-02. 106 sales ***
Processing date: 2024-07-18
Processing date: 2024-07-19
Processing date: 2024-07-20
Processing date: 2024-07-21
Processing date: 2024-07-22
*** Finished processing data for date 2024-07-04. 74 sales ***
*** Finished processing data for date 2024-07-05. 78 sales ***
*** Finished processing data for date 2024-07-06. 62 sales ***
Most recent blob for date 2024-07-12: smart magenta/meli/api_response/orders/date=2024-07-12/total_sales=105__data=2024-07-12__processing-time=2024-11-24T14:51:51.179019-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-07-12/total_sales=105__data=2024-07-12__processing-time=2024-11-24T14:51:51.179019-03:00.json
Most recent blob for date 2024-07-17: smart magenta/meli/api_response/orders/date=2024-07-17/total_sales=77__data=2024-07-17__processing-time=2024-11-24T14:51:51.055390-03:00.json
Reading file: smart magent

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-07-10. 147 sales ***
Processing date: 2024-07-29
*** Finished processing data for date 2024-07-09. 120 sales ***
Processing date: 2024-07-30
Most recent blob for date 2024-07-25: smart magenta/meli/api_response/orders/date=2024-07-25/total_sales=113__data=2024-07-25__processing-time=2024-11-24T14:51:51.176425-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-07-25/total_sales=113__data=2024-07-25__processing-time=2024-11-24T14:51:51.176425-03:00.json
*** Finished processing data for date 2024-07-15. 85 sales ***
Processing date: 2024-07-31
Most recent blob for date 2024-07-23: smart magenta/meli/api_response/orders/date=2024-07-23/total_sales=138__data=2024-07-23__processing-time=2024-11-24T14:51:51.070341-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-07-23/total_sales=138__data=2024-07-23__processing-time=2024-11-24T14:51:51.070341-03:00.json
Most recent blob for date 2024-07-24: smart magen

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


Most recent blob for date 2024-07-28: smart magenta/meli/api_response/orders/date=2024-07-28/total_sales=64__data=2024-07-28__processing-time=2024-11-24T14:51:51.611047-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-07-28/total_sales=64__data=2024-07-28__processing-time=2024-11-24T14:51:51.611047-03:00.json
Most recent blob for date 2024-07-29: smart magenta/meli/api_response/orders/date=2024-07-29/total_sales=128__data=2024-07-29__processing-time=2024-11-24T14:51:51.168224-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-07-29/total_sales=128__data=2024-07-29__processing-time=2024-11-24T14:51:51.168224-03:00.json
*** Finished processing data for date 2024-07-16. 72 sales ***
*** Finished processing data for date 2024-07-17. 77 sales ***
Most recent blob for date 2024-07-30: smart magenta/meli/api_response/orders/date=2024-07-30/total_sales=92__data=2024-07-30__processing-time=2024-11-24T14:51:51.929040-03:00.json
Reading file: smart 

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-07-24. 82 sales ***
Most recent blob for date 2024-08-07: smart magenta/meli/api_response/orders/date=2024-08-07/total_sales=153__data=2024-08-07__processing-time=2024-11-24T14:51:51.093581-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-08-07/total_sales=153__data=2024-08-07__processing-time=2024-11-24T14:51:51.093581-03:00.json
Most recent blob for date 2024-08-05: smart magenta/meli/api_response/orders/date=2024-08-05/total_sales=143__data=2024-08-05__processing-time=2024-11-24T14:51:51.020250-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-08-05/total_sales=143__data=2024-08-05__processing-time=2024-11-24T14:51:51.020250-03:00.json
Most recent blob for date 2024-08-06: smart magenta/meli/api_response/orders/date=2024-08-06/total_sales=150__data=2024-08-06__processing-time=2024-11-24T14:51:51.910496-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-08-06/total_sales

  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-07-23. 138 sales ***
Most recent blob for date 2024-08-10: smart magenta/meli/api_response/orders/date=2024-08-10/total_sales=68__data=2024-08-10__processing-time=2024-11-24T14:51:51.016339-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-08-10/total_sales=68__data=2024-08-10__processing-time=2024-11-24T14:51:51.016339-03:00.json
Most recent blob for date 2024-08-13: smart magenta/meli/api_response/orders/date=2024-08-13/total_sales=129__data=2024-08-13__processing-time=2024-11-24T14:51:51.931649-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-08-13/total_sales=129__data=2024-08-13__processing-time=2024-11-24T14:51:51.931649-03:00.json
Processing date: 2024-08-18
*** Finished processing data for date 2024-07-30. 92 sales ***
*** Finished processing data for date 2024-07-29. 128 sales ***
Most recent blob for date 2024-08-15: smart magenta/meli/api_response/orders/date=2024-08-15/total_sales=95

  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-08-15. 95 sales ***
Processing date: 2024-08-27
*** Finished processing data for date 2024-08-10. 68 sales ***
*** Finished processing data for date 2024-08-17. 33 sales ***
Most recent blob for date 2024-08-21: smart magenta/meli/api_response/orders/date=2024-08-21/total_sales=100__data=2024-08-21__processing-time=2024-11-24T14:51:51.308332-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-08-21/total_sales=100__data=2024-08-21__processing-time=2024-11-24T14:51:51.308332-03:00.json
*** Finished processing data for date 2024-08-14. 104 sales ***
Processing date: 2024-08-28
Processing date: 2024-08-29
Processing date: 2024-08-30
*** Finished processing data for date 2024-08-12. 122 sales ***
*** Finished processing data for date 2024-08-16. 65 sales ***
Most recent blob for date 2024-08-22: smart magenta/meli/api_response/orders/date=2024-08-22/total_sales=84__data=2024-08-22__processing-time=2024-11-24T14:51:51.512370-0

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-08-06. 150 sales ***
*** Finished processing data for date 2024-08-07. 153 sales ***
*** Finished processing data for date 2024-08-09. 90 sales ***
Most recent blob for date 2024-08-28: smart magenta/meli/api_response/orders/date=2024-08-28/total_sales=97__data=2024-08-28__processing-time=2024-11-24T14:51:51.947128-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-08-28/total_sales=97__data=2024-08-28__processing-time=2024-11-24T14:51:51.947128-03:00.json
Most recent blob for date 2024-08-29: smart magenta/meli/api_response/orders/date=2024-08-29/total_sales=100__data=2024-08-29__processing-time=2024-11-24T14:51:51.796715-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-08-29/total_sales=100__data=2024-08-29__processing-time=2024-11-24T14:51:51.796715-03:00.json
Most recent blob for date 2024-08-30: smart magenta/meli/api_response/orders/date=2024-08-30/total_sales=71__data=2024-08-30__processin

  df_blob = pd.concat(df_list, ignore_index=True)


Most recent blob for date 2024-09-04: smart magenta/meli/api_response/orders/date=2024-09-04/total_sales=75__data=2024-09-04__processing-time=2024-11-24T14:51:51.089518-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-09-04/total_sales=75__data=2024-09-04__processing-time=2024-11-24T14:51:51.089518-03:00.json
*** Finished processing data for date 2024-08-30. 71 sales ***
Processing date: 2024-09-11
Most recent blob for date 2024-09-05: smart magenta/meli/api_response/orders/date=2024-09-05/total_sales=87__data=2024-09-05__processing-time=2024-11-24T14:51:51.719518-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-09-05/total_sales=87__data=2024-09-05__processing-time=2024-11-24T14:51:51.719518-03:00.json
Most recent blob for date 2024-09-06: smart magenta/meli/api_response/orders/date=2024-09-06/total_sales=72__data=2024-09-06__processing-time=2024-11-24T14:51:51.737900-03:00.json
Reading file: smart magenta/meli/api_response/orders/date

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-09-01. 57 sales ***
*** Finished processing data for date 2024-08-19. 114 sales ***
Processing date: 2024-09-15
Processing date: 2024-09-16
*** Finished processing data for date 2024-08-23. 76 sales ***
Most recent blob for date 2024-09-11: smart magenta/meli/api_response/orders/date=2024-09-11/total_sales=51__data=2024-09-11__processing-time=2024-11-24T14:51:51.331914-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-09-11/total_sales=51__data=2024-09-11__processing-time=2024-11-24T14:51:51.331914-03:00.json
*** Finished processing data for date 2024-08-22. 84 sales ***
*** Finished processing data for date 2024-08-31. 51 sales ***
*** Finished processing data for date 2024-09-02. 65 sales ***
Processing date: 2024-09-17
Processing date: 2024-09-18
Processing date: 2024-09-19
Processing date: 2024-09-20
*** Finished processing data for date 2024-09-06. 72 sales ***
*** Finished processing data for date 2024-08-26. 104 

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-09-13. 72 sales ***
Processing date: 2024-09-29
*** Finished processing data for date 2024-09-11. 51 sales ***
Most recent blob for date 2024-09-24: smart magenta/meli/api_response/orders/date=2024-09-24/total_sales=119__data=2024-09-24__processing-time=2024-11-24T14:51:51.542077-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-09-24/total_sales=119__data=2024-09-24__processing-time=2024-11-24T14:51:51.542077-03:00.json
Most recent blob for date 2024-09-23: smart magenta/meli/api_response/orders/date=2024-09-23/total_sales=151__data=2024-09-23__processing-time=2024-11-24T14:51:51.599990-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-09-23/total_sales=151__data=2024-09-23__processing-time=2024-11-24T14:51:51.599990-03:00.json
*** Finished processing data for date 2024-09-05. 87 sales ***
*** Finished processing data for date 2024-09-09. 85 sales ***
Processing date: 2024-09-30
Processing date:

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-09-20. 68 sales ***
Processing date: 2024-10-06
*** Finished processing data for date 2024-09-16. 88 sales ***
Most recent blob for date 2024-09-28: smart magenta/meli/api_response/orders/date=2024-09-28/total_sales=97__data=2024-09-28__processing-time=2024-11-24T14:51:51.169070-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-09-28/total_sales=97__data=2024-09-28__processing-time=2024-11-24T14:51:51.169070-03:00.json
Most recent blob for date 2024-09-29: smart magenta/meli/api_response/orders/date=2024-09-29/total_sales=82__data=2024-09-29__processing-time=2024-11-24T14:51:51.318685-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-09-29/total_sales=82__data=2024-09-29__processing-time=2024-11-24T14:51:51.318685-03:00.json
Processing date: 2024-10-07
*** Finished processing data for date 2024-09-12. 82 sales ***
*** Finished processing data for date 2024-09-21. 36 sales ***
*** Finished process

  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-09-18. 106 sales ***
Processing date: 2024-10-11
Most recent blob for date 2024-10-05: smart magenta/meli/api_response/orders/date=2024-10-05/total_sales=70__data=2024-10-05__processing-time=2024-11-24T14:51:51.908149-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-10-05/total_sales=70__data=2024-10-05__processing-time=2024-11-24T14:51:51.908149-03:00.json
Most recent blob for date 2024-10-06: smart magenta/meli/api_response/orders/date=2024-10-06/total_sales=49__data=2024-10-06__processing-time=2024-11-24T14:51:51.732954-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-10-06/total_sales=49__data=2024-10-06__processing-time=2024-11-24T14:51:51.732954-03:00.json
*** Finished processing data for date 2024-09-19. 86 sales ***
*** Finished processing data for date 2024-09-26. 126 sales ***
Most recent blob for date 2024-10-07: smart magenta/meli/api_response/orders/date=2024-10-07/total_sales=114_

  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-09-23. 151 sales ***
Most recent blob for date 2024-10-08: smart magenta/meli/api_response/orders/date=2024-10-08/total_sales=137__data=2024-10-08__processing-time=2024-11-24T14:51:51.200559-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-10-08/total_sales=137__data=2024-10-08__processing-time=2024-11-24T14:51:51.200559-03:00.json
Processing date: 2024-10-12
Processing date: 2024-10-13
Processing date: 2024-10-14
Most recent blob for date 2024-10-09: smart magenta/meli/api_response/orders/date=2024-10-09/total_sales=140__data=2024-10-09__processing-time=2024-11-24T14:51:51.109699-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-10-09/total_sales=140__data=2024-10-09__processing-time=2024-11-24T14:51:51.109699-03:00.json
*** Finished processing data for date 2024-09-24. 119 sales ***
Most recent blob for date 2024-10-10: smart magenta/meli/api_response/orders/date=2024-10-10/total_sales=125__da

  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-10-05. 70 sales ***
*** Finished processing data for date 2024-10-06. 49 sales ***
Most recent blob for date 2024-10-12: smart magenta/meli/api_response/orders/date=2024-10-12/total_sales=67__data=2024-10-12__processing-time=2024-11-24T14:51:51.768220-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-10-12/total_sales=67__data=2024-10-12__processing-time=2024-11-24T14:51:51.768220-03:00.json
Most recent blob for date 2024-10-13: smart magenta/meli/api_response/orders/date=2024-10-13/total_sales=70__data=2024-10-13__processing-time=2024-11-24T14:51:51.885446-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-10-13/total_sales=70__data=2024-10-13__processing-time=2024-11-24T14:51:51.885446-03:00.json
Most recent blob for date 2024-10-14: smart magenta/meli/api_response/orders/date=2024-10-14/total_sales=131__data=2024-10-14__processing-time=2024-11-24T14:51:51.021254-03:00.json
Reading file: smart m

  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-10-16. 99 sales ***
*** Finished processing data for date 2024-10-17. 76 sales ***
Processing date: 2024-11-04
Processing date: 2024-11-05
*** Finished processing data for date 2024-10-15. 97 sales ***
Processing date: 2024-11-06
Most recent blob for date 2024-10-31: smart magenta/meli/api_response/orders/date=2024-10-31/total_sales=75__data=2024-10-31__processing-time=2024-11-24T14:51:51.694792-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-10-31/total_sales=75__data=2024-10-31__processing-time=2024-11-24T14:51:51.694792-03:00.json
Most recent blob for date 2024-11-01: smart magenta/meli/api_response/orders/date=2024-11-01/total_sales=70__data=2024-11-01__processing-time=2024-11-24T14:51:51.805917-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-11-01/total_sales=70__data=2024-11-01__processing-time=2024-11-24T14:51:51.805917-03:00.json
*** Finished processing data for date 2024-10-22. 109 s

  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-10-25. 97 sales ***
Processing date: 2024-11-09
*** Finished processing data for date 2024-10-20. 66 sales ***
Processing date: 2024-11-10
*** Finished processing data for date 2024-10-18. 74 sales ***
Most recent blob for date 2024-11-03: smart magenta/meli/api_response/orders/date=2024-11-03/total_sales=46__data=2024-11-03__processing-time=2024-11-24T14:51:51.152876-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-11-03/total_sales=46__data=2024-11-03__processing-time=2024-11-24T14:51:51.152876-03:00.json
*** Finished processing data for date 2024-10-24. 94 sales ***
Processing date: 2024-11-11
Processing date: 2024-11-12
Most recent blob for date 2024-11-04: smart magenta/meli/api_response/orders/date=2024-11-04/total_sales=122__data=2024-11-04__processing-time=2024-11-24T14:51:51.024365-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-11-04/total_sales=122__data=2024-11-04__processing-time=

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-10-23. 131 sales ***
Processing date: 2024-11-13
Processing date: 2024-11-14
Processing date: 2024-11-15
Processing date: 2024-11-16
Processing date: 2024-11-17
Processing date: 2024-11-18
*** Finished processing data for date 2024-10-31. 75 sales ***
Most recent blob for date 2024-11-12: smart magenta/meli/api_response/orders/date=2024-11-12/total_sales=91__data=2024-11-12__processing-time=2024-11-24T14:51:51.873853-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-11-12/total_sales=91__data=2024-11-12__processing-time=2024-11-24T14:51:51.873853-03:00.json
Most recent blob for date 2024-11-11: smart magenta/meli/api_response/orders/date=2024-11-11/total_sales=82__data=2024-11-11__processing-time=2024-11-24T14:51:51.541640-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-11-11/total_sales=82__data=2024-11-11__processing-time=2024-11-24T14:51:51.541640-03:00.json
Most recent blob for date 2024-10

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-11-03. 46 sales ***
Processing date: 2024-11-19
Processing date: 2024-11-20
Processing date: 2024-11-21
Processing date: 2024-11-22
*** Finished processing data for date 2024-11-04. 122 sales ***
*** Finished processing data for date 2024-11-05. 127 sales ***
Most recent blob for date 2024-11-14: smart magenta/meli/api_response/orders/date=2024-11-14/total_sales=74__data=2024-11-14__processing-time=2024-11-24T14:51:51.787682-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-11-14/total_sales=74__data=2024-11-14__processing-time=2024-11-24T14:51:51.787682-03:00.json
Most recent blob for date 2024-11-15: smart magenta/meli/api_response/orders/date=2024-11-15/total_sales=62__data=2024-11-15__processing-time=2024-11-24T14:51:51.836378-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-11-15/total_sales=62__data=2024-11-15__processing-time=2024-11-24T14:51:51.836378-03:00.json
Most recent blob for date

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-11-12. 91 sales ***
Processing date: 2024-11-23
Processing date: 2024-11-24
Processing date: 2024-11-25
Processing date: 2024-11-26
Processing date: 2024-11-27
Processing date: 2024-11-28
Processing date: 2024-11-29
*** Finished processing data for date 2024-10-26. 67 sales ***
*** Finished processing data for date 2024-11-11. 82 sales ***
Most recent blob for date 2024-11-20: smart magenta/meli/api_response/orders/date=2024-11-20/total_sales=60__data=2024-11-20__processing-time=2024-11-24T14:51:51.114500-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-11-20/total_sales=60__data=2024-11-20__processing-time=2024-11-24T14:51:51.114500-03:00.json
Most recent blob for date 2024-11-19: smart magenta/meli/api_response/orders/date=2024-11-19/total_sales=121__data=2024-11-19__processing-time=2024-11-24T14:51:51.219074-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-11-19/total_sales=121__data=2024-11

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-11-06. 107 sales ***
*** Finished processing data for date 2024-11-15. 62 sales ***
Most recent blob for date 2024-11-26: smart magenta/meli/api_response/orders/date=2024-11-26/total_sales=142__data=2024-11-26__processing-time=2024-11-27T07:00:00.216150-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-11-26/total_sales=142__data=2024-11-26__processing-time=2024-11-27T07:00:00.216150-03:00.json
Most recent blob for date 2024-11-27: smart magenta/meli/api_response/orders/date=2024-11-27/total_sales=104__data=2024-11-27__processing-time=2024-11-28T12:17:17.300209-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-11-27/total_sales=104__data=2024-11-27__processing-time=2024-11-28T12:17:17.300209-03:00.json
*** Finished processing data for date 2024-11-17. 70 sales ***
Most recent blob for date 2024-11-29: smart magenta/meli/api_response/orders/date=2024-11-29/total_sales=127__data=2024-11-29__process

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-11-27. 104 sales ***
Most recent blob for date 2024-12-07: smart magenta/meli/api_response/orders/date=2024-12-07/total_sales=65__data=2024-12-07__processing-time=2024-12-08T07:00:00.453367-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-12-07/total_sales=65__data=2024-12-07__processing-time=2024-12-08T07:00:00.453367-03:00.json
Most recent blob for date 2024-12-04: smart magenta/meli/api_response/orders/date=2024-12-04/total_sales=127__data=2024-12-04__processing-time=2024-12-05T07:00:00.472115-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-12-04/total_sales=127__data=2024-12-04__processing-time=2024-12-05T07:00:00.472115-03:00.json
Most recent blob for date 2024-12-08: smart magenta/meli/api_response/orders/date=2024-12-08/total_sales=68__data=2024-12-08__processing-time=2024-12-09T13:57:57.105488-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-12-08/total_sales=6

  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-12-05. 94 sales ***
*** Finished processing data for date 2024-12-08. 68 sales ***
*** Finished processing data for date 2024-12-06. 72 sales ***
*** Finished processing data for date 2024-12-04. 127 sales ***
*** Finished processing data for date 2024-12-07. 65 sales ***
Most recent blob for date 2024-12-14: smart magenta/meli/api_response/orders/date=2024-12-14/total_sales=66__data=2024-12-14__processing-time=2024-12-15T07:00:00.712229-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-12-14/total_sales=66__data=2024-12-14__processing-time=2024-12-15T07:00:00.712229-03:00.json
Most recent blob for date 2024-12-15: smart magenta/meli/api_response/orders/date=2024-12-15/total_sales=70__data=2024-12-15__processing-time=2024-12-16T07:00:00.338372-03:00.json
Reading file: smart magenta/meli/api_response/orders/date=2024-12-15/total_sales=70__data=2024-12-15__processing-time=2024-12-16T07:00:00.338372-03:00.json


  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-12-13. 93 sales ***
*** Finished processing data for date 2024-12-10. 110 sales ***
*** Finished processing data for date 2024-12-12. 83 sales ***
*** Finished processing data for date 2024-12-11. 96 sales ***


  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)
  df_blob = pd.concat(df_list, ignore_index=True)


*** Finished processing data for date 2024-12-09. 138 sales ***
*** Finished processing data for date 2024-11-28. 114 sales ***
*** Finished processing data for date 2024-12-14. 66 sales ***
*** Finished processing data for date 2024-12-15. 70 sales ***


  df_all_processed_data = pd.concat(results, ignore_index=True)


** Deleting existing data **
Existing data deleted from datalake-v2-424516.datalake_v2.orders for dates ['2023-11-25', '2023-11-26', '2023-11-27', '2023-11-28', '2023-11-29', '2023-11-30', '2023-12-01', '2023-12-02', '2023-12-03', '2023-12-04', '2023-12-05', '2023-12-06', '2023-12-07', '2023-12-08', '2023-12-09', '2023-12-10', '2023-12-11', '2023-12-12', '2023-12-13', '2023-12-14', '2023-12-15', '2023-12-16', '2023-12-17', '2023-12-18', '2023-12-19', '2023-12-20', '2023-12-21', '2023-12-22', '2023-12-23', '2023-12-24', '2023-12-25', '2023-12-26', '2023-12-27', '2023-12-28', '2023-12-29', '2023-12-30', '2023-12-31', '2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05', '2024-01-06', '2024-01-07', '2024-01-08', '2024-01-09', '2024-01-10', '2024-01-11', '2024-01-12', '2024-01-13', '2024-01-14', '2024-01-15', '2024-01-16', '2024-01-17', '2024-01-18', '2024-01-19', '2024-01-20', '2024-01-21', '2024-01-22', '2024-01-23', '2024-01-24', '2024-01-25', '2024-01-26', '2024-01-27',

100%|██████████| 1/1 [00:00<?, ?it/s]


Data inserted into datalake-v2-424516.datalake_v2.orders.
** Updating log table **
Logs table datalake-v2-424516.datalake_v2.datalake_management updated for seller_id 1009098057 and dates ['2023-11-25', '2023-11-26', '2023-11-27', '2023-11-28', '2023-11-29', '2023-11-30', '2023-12-01', '2023-12-02', '2023-12-03', '2023-12-04', '2023-12-05', '2023-12-06', '2023-12-07', '2023-12-08', '2023-12-09', '2023-12-10', '2023-12-11', '2023-12-12', '2023-12-13', '2023-12-14', '2023-12-15', '2023-12-16', '2023-12-17', '2023-12-18', '2023-12-19', '2023-12-20', '2023-12-21', '2023-12-22', '2023-12-23', '2023-12-24', '2023-12-25', '2023-12-26', '2023-12-27', '2023-12-28', '2023-12-29', '2023-12-30', '2023-12-31', '2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05', '2024-01-06', '2024-01-07', '2024-01-08', '2024-01-09', '2024-01-10', '2024-01-11', '2024-01-12', '2024-01-13', '2024-01-14', '2024-01-15', '2024-01-16', '2024-01-17', '2024-01-18', '2024-01-19', '2024-01-20', '2024-01-21',

# Correct management table

In [None]:
request = {
  "access_token": "APP_USR-2951712600123976-121603-f3c3dd7ecaa9aef71419d3ad9f58b549-548409917",
  "client_id": "2951712600123976",
  "client_secret": "QprAIl8ydXzcxFVHjnIHT6fUQ8KpzADV",
  "seller_id": 548409917,
  "store_name": "adventure comercio"
}

store_name = request['store_name']
seller_id = request['seller_id']

In [None]:
import re
import pandas as pd
from datetime import datetime
from src.common.cloud_storage_connector import CloudStorage
from src.common.bigquery_connector import BigQueryManager
from src.config import settings

def insert_dates_to_bq(seller_id, store_name):
    # Initialize Storage and BigQuery connectors
    storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
    bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)

    # Config variables
    bucket_name = settings.BUCKET_STORES
    blob_prefix = settings.BLOB_ORDERS(store_name)  # Adjust to the parent directory path

    # Step 1: List all blobs
    blobs = storage.list_blobs(bucket_name, prefix=blob_prefix)
    
    # Step 2: Extract dates from blob names
    date_pattern = re.compile(r"date=(\d{4}-\d{2}-\d{2})")
    unique_dates = set()

    for blob in blobs:
        match = date_pattern.search(blob.name)
        if match:
            unique_dates.add(match.group(1))

    table_name = "datalake-v2-424516.datalake_v2.orders"  # Replace with actual table name
    
    df_dates = pd.DataFrame({
        "seller_id": seller_id,
        "table_name": table_name,
        "process_date": [f"{date}T00:00:00" for date in sorted(unique_dates)],
        "processed_to_bq": False,
        "last_bq_processing": None  # UTC timestamp
    })
    
    bigquery.run_query(f"delete from {settings.TABLE_MANAGEMENT} where seller_id = {seller_id} and table_name like '%orders'")
    # # Step 4: Insert into BigQuery
    bigquery.match_dataframe_schema(df_dates, settings.TABLE_MANAGEMENT)
    bigquery.insert_dataframe(df_dates, settings.TABLE_MANAGEMENT)

    print("Dates successfully inserted into BigQuery.")

# Run the function
insert_dates_to_bq(seller_id, store_name)


Using local credentials from: C:/Users/User/Documents/papa preco/service account/service_account_datalakev2.json
Using local credentials from: C:/Users/User/Documents/papa preco/service account/service_account_datalakev2.json
Schema adjusted to match BigQuery table.


100%|██████████| 1/1 [00:00<?, ?it/s]

Data inserted into datalake-v2-424516.datalake_v2.datalake_management.
Dates successfully inserted into BigQuery.





In [4]:
df.dtypes

seller_id              int64
table_name            object
process_date          object
processed_to_bq         bool
last_bq_processing    object
dtype: object

In [18]:
pd.DataFrame(results[0]).shape

(49, 75)

In [19]:
pd.DataFrame(results[0])['date_approved']

0     2024-12-06T23:15:10.000-04:00
1     2024-12-06T23:15:11.000-04:00
2     2024-12-07T07:04:53.000-04:00
3     2024-12-07T07:34:55.000-04:00
4     2024-12-07T07:34:56.000-04:00
5     2024-12-07T07:35:27.000-04:00
6     2024-12-07T07:36:17.000-04:00
7     2024-12-07T07:47:10.000-04:00
8     2024-12-07T07:48:51.000-04:00
9     2024-12-07T07:49:50.000-04:00
10    2024-12-07T08:18:26.000-04:00
11    2024-12-07T08:19:19.000-04:00
12    2024-12-07T08:48:39.000-04:00
13    2024-12-07T08:49:23.000-04:00
14    2024-12-07T09:43:10.000-04:00
15    2024-12-07T10:37:44.000-04:00
16    2024-12-07T10:49:02.000-04:00
17    2024-12-07T11:42:55.000-04:00
18    2024-12-07T12:18:50.000-04:00
19    2024-12-07T12:29:42.000-04:00
20    2024-12-07T12:45:49.000-04:00
21    2024-12-07T12:50:20.000-04:00
22    2024-12-07T13:09:44.000-04:00
23    2024-12-07T13:46:03.000-04:00
24    2024-12-07T14:07:17.000-04:00
25    2024-12-07T14:45:47.000-04:00
26    2024-12-07T14:45:52.000-04:00
27    2024-12-07T15:21:15.00

In [2]:
import json
import requests
import traceback
import time
from src.common.bigquery_connector import BigQueryManager
from src.config import settings
from src.common.trigger_cloud_function import TriggerCloudFunction
from sqlalchemy import create_engine, text
from urllib.parse import quote_plus
import pandas as pd
import time
from datetime import datetime

In [5]:

bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)

print('Uploading data to mysql')
 # Send frontend tables to mysql
tables_list = ['general', 'competitor']
# Database connection
password = quote_plus('Glm@mysql24')
engine = create_engine(f'mysql+pymysql://geraldo-papa:{password}@34.123.250.92/glm')

for table_name in tables_list:
    with engine.connect() as conn:
        conn.execute(text(f"TRUNCATE TABLE {table_name};"))
    if table_name == 'competitor':
        table_name = 'competitors'
        
    df= bigquery.run_query(f'select * from datalake-v2-424516.tables_frontend.{table_name}')
    df['created_at'] = datetime.now()
    df['updated_at'] = datetime.now()
    memory_usage = df.memory_usage(deep=True).sum()/ (1024 ** 2)
    print(f"Tabela: {table_name} / Tamanho em memória: {memory_usage:.2f} MB" )
    if table_name == 'competitors':
        table_name = 'competitor'
        
    start_time = time.time()
    df.to_sql(
            name=table_name,
            con=engine,
            if_exists='append',
            index=False,
            chunksize=1000,
            method='multi',
            # dtype=data_types  # Specify data types
        )
    
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Tempo decorrido: {elapsed_time:.2f} segundos")
    print('-----------------------------------')

Using local credentials from: C:/Users/User/Documents/papa preco/service account/service_account_datalakev2.json
Uploading data to mysql
Tabela: general / Tamanho em memória: 42.59 MB
Tempo decorrido: 56.51 segundos
-----------------------------------
Tabela: competitors / Tamanho em memória: 144.69 MB
Tempo decorrido: 203.16 segundos
-----------------------------------
