In [0]:
%run ../dao/coingeckoapi_dao/

In [0]:
%run ../utils/save_helper/

In [0]:
import time

from pyspark.sql import functions as F
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, LongType, TimestampType

In [0]:
cgk_api = CoinGeckoAPI_DAO()

In [0]:
schema = StructType([
    StructField("id", StringType(), False),
    StructField("symbol", StringType(), False),
    StructField("name", StringType(), False),
    StructField("image", StringType(), False),
    StructField("current_price", DoubleType(), True),
    StructField("market_cap", DoubleType(), True),
    StructField("market_cap_rank", IntegerType(), True),
    StructField("fully_diluted_valuation", LongType(), True),
    StructField("total_volume", DoubleType(), True),
    StructField("high_24h", DoubleType(), True),
    StructField("low_24h", DoubleType(), True),
    StructField("price_change_24h", DoubleType(), True),
    StructField("price_change_percentage_24h", DoubleType(), True),
    StructField("market_cap_change_24h", DoubleType(), True),
    StructField("market_cap_change_percentage_24h", DoubleType(), True),
    StructField("circulating_supply", DoubleType(), True),
    StructField("total_supply", DoubleType(), True),
    StructField("max_supply", DoubleType(), True),
    StructField("ath", DoubleType(), True),
    StructField("ath_change_percentage", DoubleType(), True),
    StructField("ath_date", StringType(), True),
    StructField("atl", DoubleType(), True),
    StructField("atl_change_percentage", DoubleType(), True),
    StructField("atl_date", StringType(), True),
    StructField("roi", DoubleType(), True),
    StructField("last_updated", StringType(), True)
])

In [0]:
df_market_data = spark.createDataFrame([], schema)

page = 1
while True:
    if page <= 4:
        try:
            market_data = cgk_api.make_request('coins/markets', params={'vs_currency': 'usd', 'page': page, 'per_page': 250, 'order': 'market_cap_desc'})
            
            if not market_data:
                break
                
            # Trata o campo roi que pode vir como dict
            for record in market_data:
                if 'roi' in record and isinstance(record['roi'], dict):
                    record['roi'] = None
            
            df_request = spark.createDataFrame(market_data, schema=schema)
            
            if df_request.count() > 0:
                df_market_data = df_market_data.union(df_request)
                page += 1
                print(f"Página {page-1} processada com sucesso")
                
                # Delay entre requisições
                time.sleep(3)
            else:
                break
                
        except Exception as e:
            if "rate limit" in str(e).lower() or "429" in str(e):
                print(f"Rate limit atingido. Aguardando 60 segundos...")
                time.sleep(60)  # Aguarda 1 minuto
                # Não incrementa a página para tentar novamente
            else:
                print(f"Erro inesperado: {e}")
                break
    else:
        break

In [0]:
save_helper = SaveHelper(df_market_data, 'bronze', 'workspace.api_coingecko.bronze_market_data_coins', 'overwrite')
save_helper.execute()