In [1]:
from src.cloud_functions._1_fetch_data._1_13_fetch_items_promotions.main import fetch_promotions_data


In [2]:
class MockRequest:
    
    def __init__(self, json_data):
        self._json_data = json_data

    def get_json(self):
        return self._json_data

In [3]:
test_data = {
 "access_token": None,
 "client_id": "4959083987776428",
 "client_secret": "Hw9wWSydd8PMvMEJewWoMvKGYMAWyKEw",
 "seller_id": 189643563,
 "store_name": "hubsmarthome"
}

mock_request = MockRequest(test_data)

In [12]:
from src.common.cloud_storage_connector import CloudStorage
from src.common.bigquery_connector import BigQueryManager
from src.common.utils import batch_process, log_process, authenticate, fetch_items_from_storage
from src.config import settings
import json
import asyncio
import aiohttp
from datetime import datetime
import time

semaphore = asyncio.Semaphore(100)  # Control the number of simultaneous requests

async def main_async(request):
    # Parsing request data
    data = request.get_json()
    client_id = data.get('client_id')
    client_secret = data.get('client_secret')
    store_name = data.get('store_name')
    seller_id = data.get('seller_id')
    access_token = data.get('access_token')
    print('** Defining authentication... **')
    
    # Authenticate (assuming this is now centralized in utils.py or a similar file)
    if not access_token:
        access_token = authenticate(client_id, client_secret)  # You can add this to a common module
    print('** Connecting to storage and BigQuery... **')

    # Initialize storage and BigQuery
    storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
    #bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)
    
    # Define paths and table names from the config
    bucket_name = settings.BUCKET_STORES
    table_management = settings.TABLE_MANAGEMENT
    destiny_table = settings.TABLE_ITEM_PROMOTION
    # Define today's date
    today_str = datetime.today().strftime('%Y-%m-%d')
    
    # Fetch item IDs from the storage bucket
    blob_items_prefix = f'{store_name}/meli/api_response/item_detail/date={today_str}/'
    items_id = fetch_items_from_storage(storage, settings.BUCKET_STORES, blob_items_prefix, 'id')
    print(f'** Items found: {len(items_id)}**')
    
    print(f'** Cleaning blob **')
    # Path for saving promotions details
    # marketplace
    blob_basic_path_marketplace = settings.BLOB_PROMOTIONS(store_name)
    date_blob_path_marketplace = f'{blob_basic_path_marketplace}date={today_str}/'
    
    # mshops
    blob_basic_path_mshops = settings.BLOB_PROMOTIONS_MSHOPS(store_name)
    date_blob_path_mshops = f'{blob_basic_path_mshops}date={today_str}/'
    
    # Clean existing files in the storage bucket
    storage.clean_blobs(bucket_name, date_blob_path_marketplace)
    storage.clean_blobs(bucket_name, date_blob_path_mshops)
    print(f'** Starting API requests for {len(items_id)} items**')
    # URL function for API
    url_marketplace = settings.URL_PROMOTIONS_MARKETPLACE
    url_mshops = settings.URL_PROMOTIONS_MSHOPS
    headers = {'Authorization': f'Bearer {access_token}'}
    
    # Batch processing the API requests
    # PROMOTIONS MELI
    async with aiohttp.ClientSession() as session:
        await batch_process(session, items_id, url_marketplace, headers, bucket_name, date_blob_path_marketplace, storage, add_item_id = True, sleep_time=15)
    
    time.sleep(60)
    
    # Batch processing the API requests
    # PROMOTIONS SHOPS
    async with aiohttp.ClientSession() as session:
        await batch_process(session, items_id, url_mshops, headers, bucket_name, date_blob_path_mshops, storage, add_item_id = True, sleep_time=15)


    
    print('** Logging process in management table... **')
    # Log the process in BigQuery
    log_process(seller_id, destiny_table, today_str, table_management, processed_to_bq=False)

    return ('Success', 200)

def fetch_promotions_data(request):
    return (main_async(request))



In [13]:
result = await fetch_promotions_data(mock_request)


** Defining authentication... **
** Connecting to storage and BigQuery... **
Using local credentials from: D:/bacar/Savvi/GLM/Desenvolvimento/service_account/service_account_datalakev2.json
Reading file: hubsmarthome/meli/api_response/item_detail/date=2024-11-02/batch_0__process_time=2024-11-02T07:00:00.589121-03:00.json
Reading file: hubsmarthome/meli/api_response/item_detail/date=2024-11-02/batch_10__process_time=2024-11-02T07:00:00.278753-03:00.json
Reading file: hubsmarthome/meli/api_response/item_detail/date=2024-11-02/batch_1__process_time=2024-11-02T07:00:00.054162-03:00.json
Reading file: hubsmarthome/meli/api_response/item_detail/date=2024-11-02/batch_2__process_time=2024-11-02T07:00:00.423752-03:00.json
Reading file: hubsmarthome/meli/api_response/item_detail/date=2024-11-02/batch_3__process_time=2024-11-02T07:00:00.699642-03:00.json
Reading file: hubsmarthome/meli/api_response/item_detail/date=2024-11-02/batch_4__process_time=2024-11-02T07:00:00.056926-03:00.json
Reading fil

In [None]:
import pandas as pd
from datetime import datetime
from src.common.cloud_storage_connector import CloudStorage
from src.common.bigquery_connector import BigQueryManager
from src.config import settings


def insert_bq_promotions(request):
    data = request.get_json()
    store_name = data.get('store_name')
    seller_id = data.get('seller_id')

    print('** Connecting to storage and BigQuery... **')
    # Initialize storage and BigQuery
    storage = CloudStorage(credentials_path=settings.PATH_SERVICE_ACCOUNT)
    bigquery = BigQueryManager(credentials_path=settings.PATH_SERVICE_ACCOUNT)

    # Define paths and table names from the config
    bucket_name = settings.BUCKET_STORES
    table_management = settings.TABLE_MANAGEMENT
    destiny_table = settings.TABLE_ITEM_PROMOTION
    blob_promotions = settings.BLOB_PROMOTIONS(store_name)
    blob_promotions_mshops = settings.BLOB_PROMOTIONS_MSHOPS(store_name)

    # Define today's date
    today_str = datetime.today().strftime('%Y-%m-%d')

    # Get dates to treat
    list_dates_to_process = bigquery.get_list_dates_to_process(seller_id, table_management, destiny_table)

    print(f'*** Starting to process dates: {len(list_dates_to_process)} dates to process ***')

    for date in list_dates_to_process:
        # Transform date to string
        date_to_process = date.strftime('%Y-%m-%d')
        print(f'Processing date: {date_to_process}')

        # Collect processed data for this date in a list
        processed_data = []

        # Channel 'marketplace' processing
        blob_prefix = blob_promotions + f'date={date_to_process}/'
        blobs = storage.list_blobs(bucket_name, blob_prefix)
        for blob in blobs:
            print(f"Reading file: {blob.name}")
            content = storage.download_json(bucket_name, blob.name)
            for json_item in content:
                processed_dict = process_response(json_item, 'Marketplace')
                if processed_dict:  # Add only valid data
                    processed_data.append(processed_dict)

        # Channel 'mshops' processing
        blob_prefix_mshops = blob_promotions_mshops + f'date={date_to_process}/'
        blobs = storage.list_blobs(bucket_name, blob_prefix_mshops)
        for blob in blobs:
            print(f"Reading file: {blob.name}")
            content = storage.download_json(bucket_name, blob.name)
            for json_item in content:
                processed_dict = process_response(json_item, 'mshops')
                if processed_dict:  # Add only valid data
                    processed_data.append(processed_dict)

        # Convert the processed data list to a DataFrame
        df_processed_data = pd.DataFrame(processed_data)

        if df_processed_data.empty:
            print(f'Nenhum dado processado para a data {date_to_process}, pulando inserção...')
            continue  # Passa para a próxima d

        # Set static columns once after data collection
        df_processed_data['correspondent_date'] = pd.to_datetime(date_to_process)
        df_processed_data['process_time'] = datetime.now()
        df_processed_data['seller_id'] = seller_id

        print(f'*** Finished treating all data. {df_processed_data.shape[0]} products ***')

        # Create the table if it does not exist
        if not bigquery.table_exists(destiny_table):
            print(f'Table {destiny_table} does not exist. Creating table...')
            bigquery.create_table(destiny_table, df_processed_data)

        print('** Deleting existing data **')
        bigquery.delete_existing_data(destiny_table, seller_id, date_to_process)

        print('** Correcting dataframe schema **')
        df_processed_data = bigquery.match_dataframe_schema(df_processed_data, destiny_table)

        print('** Inserting data into BQ **')
        bigquery.insert_dataframe(df_processed_data, destiny_table)

        print('** Updating log table **')
        bigquery.update_logs_table(seller_id, date_to_process, destiny_table, table_management)

    return ('Success', 200)


def process_response(json_item, channel):
    try:
        if channel == "Marketplace":
            # Process specific data for the Marketplace channel
            data = {
                'item_id': json_item.get('item_id'),
                'promotion_id': json_item.get('id'),
                'status': json_item.get('status'),
                'type': json_item.get('type'),
                'name': json_item.get('name'),
                'meli_percent': json_item.get('benefits', {}).get('meli_percent'),
                'seller_percent': json_item.get('benefits', {}).get('seller_percent'),
                'start_date': json_item.get('start_date'),
                'finish_date': json_item.get('finish_date'),
                'channel': channel,
            }
        
        elif channel == "mshops":
            # Process specific data for the mshops channel
            data = {
                'item_id': json_item.get('item_id'),
                'promotion_id': json_item.get('id'),
                'status': json_item.get('status'),
                'type': json_item.get('type'),
                'name': json_item.get('name'),
                'target': json_item.get('target'),
                'buy_quantity': json_item.get('buy_quantity'),
                'start_date': json_item.get('start_date'),
                'finish_date': json_item.get('finish_date'),
                'channel': channel,
            }
        
        else:
            print(f'Unknown channel: {channel}')
            return None  # Return None for unknown channels

        return data  # Return the processed data dictionary

    except Exception as e:
        print(f'Error processing json item: {json_item} | Exception: {e}')
        return None  # Return None in case of an exception to avoid errors in the main flow


In [18]:
result = insert_bq_promotions(mock_request)

** Connecting to storage and BigQuery... **
Using local credentials from: D:/bacar/Savvi/GLM/Desenvolvimento/service_account/service_account_datalakev2.json
Using local credentials from: D:/bacar/Savvi/GLM/Desenvolvimento/service_account/service_account_datalakev2.json
*** Starting to process dates: 1 dates to process ***
Processing date: 2024-11-02
Reading file: hubsmarthome/meli/api_response/items_promotions/date=2024-11-02/batch_0__process_time=2024-11-02T18:21:22.549588-03:00.json
Error processing json item: [{'type': 'PRICE_DISCOUNT', 'status': 'candidate', 'item_id': 'MLB3634096021'}, {'id': 'P-MLB14223240', 'type': 'DEAL', 'status': 'started', 'start_date': '2024-10-21T00:00:00-03:00', 'finish_date': '2024-11-18T00:00:00-03:00', 'deadline_date': '2024-11-04T03:00:00Z', 'name': 'Black Friday Esquenta', 'item_id': 'MLB3634096021'}, {'id': 'P-MLB14279052', 'type': 'DEAL', 'status': 'pending', 'start_date': '2024-11-18T00:00:00-03:00', 'finish_date': '2024-12-03T00:00:00-03:00', 'de

In [11]:
import os
from src.config.settings import *

path_credentials = PATH_SERVICE_ACCOUNT

if os.path.exists(path_credentials):
    print("O arquivo existe.")
else:
    print("O arquivo não foi encontrado.")


O arquivo existe.
