In [211]:
import requests
import pandas as pd
from datetime import datetime, timedelta, timezone
from deltalake import DeltaTable, write_deltalake
from deltalake.exceptions import TableNotFoundError
import pyarrow as pa
from pprint import pprint

In [212]:
def get_data(base_url, endpoint, data_field, params=None, headers=None):
        try:
            endpoint_url = f"{base_url}/{endpoint}"
            response = requests.get(endpoint_url, params=params, headers=headers)
            response.raise_for_status()  # Levanta una excepción si hay un error en la respuesta HTTP.

        # Verificar si los datos están en formato JSON.
            try:
                data = response.json()
                data = data[data_field]
            except:
                print("El formato de respuesta no es el esperado")
                return None
            return data

        except requests.exceptions.RequestException as e:
            # Capturar cualquier error de solicitud, como errores HTTP.
            print(f"La petición ha fallado. Código de error : {e}")
        return None

def build_table(json_data):
    try:
        df = pd.json_normalize(json_data)
        return df
    except:
        print("Los datos no están en el formato esperado")
        return None

In [213]:

# Usar la función para hacer la petición a la API de CoinCap
base_url = "https://api.coincap.io/v2"

In [214]:

endpoint = "assets"
endpoint2 = "markets"

json_data = get_data(base_url, endpoint, data_field="data")
json_data_times =get_data(base_url, endpoint, data_field="timestamp")

json_data2 = get_data(base_url, endpoint2, data_field="data")
json_data_times2 =get_data(base_url, endpoint2, data_field="timestamp")


In [215]:
#pprint(json_data)

#pprint(json_data2)

In [216]:
df_assets = build_table(json_data)

df_markets = build_table(json_data2)

In [217]:
df_assets.head()

df_markets.head()


Unnamed: 0,exchangeId,rank,baseSymbol,baseId,quoteSymbol,quoteId,priceQuote,priceUsd,volumeUsd24Hr,percentExchangeVolume,tradesCount24Hr,updated
0,alterdice,1,BTC,bitcoin,USDT,tether,96924.74,97038.07364400568,62520857.61064683,100.0,7.0,1733442669467
1,bibox,1,ETH,ethereum,USDT,tether,3704.51,3708.841666172697,13947486.288712377,7.32983607391675,,1733442516296
2,bibox,2,LINK,chainlink,USDT,tether,23.106,23.133017737456868,13646531.739685347,7.171675136210512,,1733440192073
3,bibox,3,BNB,binance-coin,USDT,tether,718.52,719.3601620668877,5785090.394925004,3.0402441981181743,,1733441532544
4,bibox,4,LPT,livepeer,USDT,tether,18.217797,18.23909898460956,5355985.660906924,2.81473636869377,,1733430695356


In [218]:
# Si los datos de los activos y el timestamp son válidos
if df_assets is not None and json_data_times is not None:
    # Convertir el timestamp de milisegundos a segundos y luego a una fecha legible
    timestamp_datetime = datetime.fromtimestamp(json_data_times / 1000, tz=timezone.utc)

    # Dividir el timestamp en columnas de fecha y hora
    df_assets['date'] = timestamp_datetime.date()  
    df_assets['time'] = timestamp_datetime.time()

    # Convertir la columna 'date' a un tipo compatible con Delta Lake
    df_assets['date'] = pd.to_datetime(df_assets['date'], errors='coerce')

    # Convertir la columna 'time' a un tipo compatible (por ejemplo, string)
    df_assets['time'] = df_assets['time'].astype(str)
# Si los datos de los activos y el timestamp son válidos
if df_markets is not None and json_data_times2 is not None:
    # Convertir el timestamp de milisegundos a segundos y luego a una fecha legible
    timestamp_datetime2 = datetime.fromtimestamp(json_data_times2 / 1000, tz=timezone.utc)

# Asignar el timestamp al DataFrame
    # Dividir el timestamp en columnas de fecha y hora
    df_markets['date'] = timestamp_datetime2.date()  
    df_markets['time'] = timestamp_datetime2.time()
    
    # Convertir la columna 'date' a un tipo compatible con Delta Lake
    df_markets['date'] = pd.to_datetime(df_markets['date'], errors='coerce')

    # Convertir la columna 'time' a un tipo compatible (por ejemplo, string)
    df_markets['time'] = df_markets['time'].astype(str)



In [219]:
# Aquí debería aparecer la columna 'timestamp' en el DataFrame
# DF temporal
df_assets.head()

Unnamed: 0,id,rank,symbol,name,supply,maxSupply,marketCapUsd,volumeUsd24Hr,priceUsd,changePercent24Hr,vwap24Hr,explorer,date,time
0,bitcoin,1,BTC,Bitcoin,19790568.0,21000000.0,1918910062572.4917,46059584752.32312,96960.8382423633,-1.9622262132228756,100983.512541216,https://blockchain.info/,2024-12-05,23:52:03.973000
1,ethereum,2,ETH,Ethereum,120441887.62048264,,456925069794.4052,21247716860.50368,3793.7388629626503,-1.1855778996849213,3873.7655878925857,https://etherscan.io/,2024-12-05,23:52:03.973000
2,tether,3,USDT,Tether,135691101232.32043,,135849764218.74324,82737029696.51674,1.0011692954437088,0.0362115845310114,1.000543509699584,https://www.omniexplorer.info/asset/31,2024-12-05,23:52:03.973000
3,binance-coin,4,BNB,BNB,166801148.0,166801148.0,119959293572.73174,1234687406.1171756,719.1754673818657,-2.4567955885956647,720.2092740201016,https://etherscan.io/token/0xB8c77482e45F1F44d...,2024-12-05,23:52:03.973000
4,solana,5,SOL,Solana,475338088.0108557,,112642479340.93433,3162998635.359764,236.973392585283,3.369836422249804,235.7824047865852,https://explorer.solana.com/,2024-12-05,23:52:03.973000


In [220]:
# DF estatico
df_markets.head()


Unnamed: 0,exchangeId,rank,baseSymbol,baseId,quoteSymbol,quoteId,priceQuote,priceUsd,volumeUsd24Hr,percentExchangeVolume,tradesCount24Hr,updated,date,time
0,alterdice,1,BTC,bitcoin,USDT,tether,96924.74,97038.07364400568,62520857.61064683,100.0,7.0,1733442669467,2024-12-05,23:52:05.763000
1,bibox,1,ETH,ethereum,USDT,tether,3704.51,3708.841666172697,13947486.288712377,7.32983607391675,,1733442516296,2024-12-05,23:52:05.763000
2,bibox,2,LINK,chainlink,USDT,tether,23.106,23.133017737456868,13646531.739685347,7.171675136210512,,1733440192073,2024-12-05,23:52:05.763000
3,bibox,3,BNB,binance-coin,USDT,tether,718.52,719.3601620668877,5785090.394925004,3.0402441981181743,,1733441532544,2024-12-05,23:52:05.763000
4,bibox,4,LPT,livepeer,USDT,tether,18.217797,18.23909898460956,5355985.660906924,2.81473636869377,,1733430695356,2024-12-05,23:52:05.763000


## Delta lake

In [221]:
def save_data_as_delta(df, path, mode="overwrite", partition_cols=None):
    #Guardar datos (opcionalmente particionados)|Escribe (write_deltalake)

    write_deltalake(
        path, df, mode=mode, partition_by=partition_cols
    )

def save_new_data_as_delta(new_data, data_path, predicate, partition_cols=None):
    #Guardar solo datos nuevos evitando duplicados	| MERGE (inserción condicional)
    try:
        dt = DeltaTable(data_path)
        # Convertir los nuevos datos a formato Arrow para trabajar con Delta
        new_data_pa = pa.Table.from_pandas(new_data)
        # Se insertan en target, datos de source que no existen en target
        dt.merge(
            source=new_data_pa,
            source_alias="source",
            target_alias="target",
            predicate=predicate
        ) \
        .when_not_matched_insert_all() \
        .execute()

    # sino se guarda como nueva
    except TableNotFoundError:
        save_data_as_delta(new_data, data_path, partition_cols=partition_cols)

def upsert_data_as_delta(data, data_path, predicate):
    #Actualizar o insertar registros dependiendo de coincidencias | MERGE (actualización/inserción)
    try:
        dt = DeltaTable(data_path)
        data_pa = pa.Table.from_pandas(data)
        dt.merge(
            source=data_pa,
            source_alias="source",
            target_alias="target",
            predicate=predicate
        ) \
        .when_matched_update_all() \
        .when_not_matched_insert_all() \
        .execute()
    except TableNotFoundError:
        save_data_as_delta(data, data_path)
    except Exception as e:
        # Capturar cualquier otro error
        print(f"Ocurrió un error al realizar el upsert: {e}")

In [222]:
#Ruta de guardado
bronze_dir = "datalake/bronze/api_coincap"
assets_raw_dir = f"{bronze_dir}/assets"

markets_raw_dir = f"{bronze_dir}/markets"

### Assets
https://api.coincap.io/v2/assets

In [223]:
# Guardar solo los nuevos datos (evitar duplicados) usando el predicado basado en el id
try:
    upsert_data_as_delta(
        df_assets,
        assets_raw_dir,
        "target.id = source.id"
    )
except Exception as e:
    print(f"Ocurrió un error al guardar los datos: {e}")

In [224]:
canRow = DeltaTable(assets_raw_dir)
print(f"Cant de filas: {canRow.to_pandas().shape[0]}")

Cant de filas: 100


In [225]:
#.sort_values("rank")
DeltaTable(assets_raw_dir).to_pandas()
# Cargar la tabla Delta como un DataFrame de Pandas
#df = DeltaTable(f"{bronze_dir}/assets").to_pandas()

# Filtrar las filas donde el id es "bitcoin"
#df_bitcoin = df[df['id'] == 'bitcoin']

# Mostrar las primeras 10 filas del DataFrame filtrado
#df_bitcoin.head(10)


Unnamed: 0,id,rank,symbol,name,supply,maxSupply,marketCapUsd,volumeUsd24Hr,priceUsd,changePercent24Hr,vwap24Hr,explorer,date,time
0,near-protocol,20,NEAR,NEAR Protocol,1217906155.0000000000000000,,9355430971.4468810443722355,349647623.4134561937116089,7.6815696620294041,3.0315887128672234,7.6143633463100621,https://explorer.nearprotocol.com/,2024-12-05,23:52:03.973000
1,internet-computer,23,ICP,Internet Computer,475416669.9987201700000000,,6820134483.2764641997445493,167905917.0961184336146508,14.3455939045949401,-2.4727377831923153,14.5589588322840011,https://www.dfinityexplorer.org/#/,2024-12-05,23:52:03.973000
2,superfarm,81,SUPER,SuperVerse,487976093.4169172600000000,1000000000.0000000000000000,835772720.0492422061000801,18155355.4963606527129051,1.7127329213957502,3.8121019975227144,1.7154827858601999,https://etherscan.io/token/0xe53ec727dbdeb9e2d...,2024-12-05,23:52:03.973000
3,zilliqa,90,ZIL,Zilliqa,19242163422.3206630000000000,,678901747.4903832486806382,69408588.8850953954970335,0.0352819863645304,4.2972549985653162,0.0341712871983576,https://etherscan.io/token/0x05f4a42e251f2d52b...,2024-12-05,23:52:03.973000
4,enjin-coin,91,ENJ,Enjin Coin,1781482552.7055097000000000,,673620637.3357083825144632,33138359.0373701455502530,0.3781236231096910,0.5284013238983048,0.3760434390384362,https://etherscan.io/token/0xf629cbd94d3791c92...,2024-12-05,23:52:03.973000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,synthetix-network-token,89,SNX,Synthetix,233716733.4900000000000000,212424133.0000000000000000,685770198.1471310937533554,63604707.4342375016117696,2.9341938333075027,-3.1552859653153411,2.9915613551556801,https://etherscan.io/token/0xc011a72400e58ecd9...,2024-12-05,23:52:03.973000
96,polygon,61,MATIC,Polygon,1964506878.5117414000000000,,1331888889.7272047892630242,27438243.7244490711243199,0.6779761905115897,-3.1488424803710700,0.7011477379859512,https://etherscan.io/token/0x7D1AfA7B718fb893d...,2024-12-05,23:52:03.973000
97,dogecoin,7,DOGE,Dogecoin,147080056383.7052300000000000,,63129533158.7978064105026035,6000919558.9616223831422963,0.4292188533984804,0.2983712713294922,0.4401487017784374,http://dogechain.info/chain/Dogecoin,2024-12-05,23:52:03.973000
98,vechain,26,VET,VeChain,80985041177.0000000000000000,86712634466.0000000000000000,5283561720.9433205127639870,190642184.9709318437206220,0.0652412055875310,-3.9831079913484601,0.0693823109693123,https://explore.veforge.com/,2024-12-05,23:52:03.973000


### markets
https://api.coincap.io/v2/markets

In [226]:

save_new_data_as_delta(
    df_markets,
    markets_raw_dir,
    """target.date = source.date""",
    partition_cols=["date"]
    )

In [227]:
canRowTwo = DeltaTable(markets_raw_dir)
print(f"Cant de filas: {canRowTwo.to_pandas().shape[0]}")

Cant de filas: 100


In [228]:
DeltaTable(markets_raw_dir).to_pandas()


Unnamed: 0,exchangeId,rank,baseSymbol,baseId,quoteSymbol,quoteId,priceQuote,priceUsd,volumeUsd24Hr,percentExchangeVolume,tradesCount24Hr,updated,date,time
0,alterdice,1,BTC,bitcoin,USDT,tether,97105.5200000000000000,97264.5730023624463082,63154115.7638217481382352,100.0000000000000000,7,1733441384109,2024-12-05,23:31:10.928000
1,bibox,1,ETH,ethereum,USDT,tether,3704.5000000000000000,3710.5677482315287777,13997638.1669639204464723,7.3769522036240596,,1733441293876,2024-12-05,23:31:10.928000
2,bibox,2,LINK,chainlink,USDT,tether,23.1060000000000000,23.1438462385308959,13652919.6431260947502457,7.1952806927789518,,1733440192073,2024-12-05,23:31:10.928000
3,bibox,3,BNB,binance-coin,USDT,tether,709.9900000000000000,711.1529209250649526,5697261.4577504435373166,3.0025369254483664,,1733437274382,2024-12-05,23:31:10.928000
4,bibox,4,LPT,livepeer,USDT,tether,18.2177970000000000,18.2476366559668242,5358492.7828544297066274,2.8240010686856822,,1733430695356,2024-12-05,23:31:10.928000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,bibox,95,MKR,maker,ETH,ethereum,0.5185691500000000,1979.3272139114297329,302014.4553383471589309,0.1591658660739464,,1733441348389,2024-12-05,23:31:10.928000
96,bibox,96,PHA,phala-network,ETH,ethereum,0.0000471500000000,0.1799668918521742,234853.7065710510139058,0.1237712067959799,,1733433696628,2024-12-05,23:31:10.928000
97,bibox,97,QTUM,qtum,BTC,bitcoin,0.0000521700000000,5.0802936321380402,234494.2512759720925370,0.1235817687994046,,1733441364855,2024-12-05,23:31:10.928000
98,bibox,98,ALGO,algorand,USDC,usd-coin,0.4422000000000000,0.4423352919450511,216081.6969025059269112,0.1138780936551034,,1733440181853,2024-12-05,23:31:10.928000


## TP 2 final


In [229]:
# Leer los datos en DataFrames
assets_for_modify = DeltaTable(assets_raw_dir).to_pandas()
markets_for_modify = DeltaTable(markets_raw_dir).to_pandas()

In [230]:
# Renombrar columnas para mayor claridad
assets_for_modify = assets_for_modify.rename(columns={
    'id': 'asset_id',
    'rank': 'asset_rank',
    'name': 'asset_name'
})
markets_for_modify = markets_for_modify.rename(columns={
    'rank': 'market_rank',
    'baseSymbol': 'base_asset',
    'quoteSymbol': 'quote_asset'
})

In [231]:
# Convertir columnas a sus tipos correctos
assets_for_modify['supply'] = assets_for_modify['supply'].astype(float)  
assets_for_modify['priceUsd'] = assets_for_modify['priceUsd'].astype(float)  

markets_for_modify['volumeUsd24Hr'] = markets_for_modify['volumeUsd24Hr'].astype(float)
markets_for_modify['percentExchangeVolume'] = markets_for_modify['percentExchangeVolume'].astype(float)

#modificar valores nulos

markets_for_modify['volumeUsd24Hr'] = markets_for_modify['volumeUsd24Hr'].fillna(0)
assets_for_modify['maxSupply'] = assets_for_modify['maxSupply'].fillna(0)
assets_for_modify['explorer'] = assets_for_modify['explorer'].fillna("Not Data")

##
# Asegurarse de que las columnas en assets_for_modify tengan los tipos correctos
assets_for_modify['volumeUsd24Hr'] = pd.to_numeric(assets_for_modify['volumeUsd24Hr'], errors='coerce')
assets_for_modify['marketCapUsd'] = pd.to_numeric(assets_for_modify['marketCapUsd'], errors='coerce')

# Para aquellas columnas como 'maxSupply' que ya son numéricas, asegurarse de que no haya problemas con el tipo
assets_for_modify['maxSupply'] = pd.to_numeric(assets_for_modify['maxSupply'], errors='coerce')

# Asegurarse de que 'priceUsd' en assets_for_modify sea numérica
assets_for_modify['priceUsd'] = pd.to_numeric(assets_for_modify['priceUsd'], errors='coerce')

# Rellenar NaN en columnas numéricas con 0
assets_for_modify['volumeUsd24Hr'] = assets_for_modify['volumeUsd24Hr'].fillna(0)
assets_for_modify['marketCapUsd'] = assets_for_modify['marketCapUsd'].fillna(0)
assets_for_modify['maxSupply'] = assets_for_modify['maxSupply'].fillna(0)
assets_for_modify['priceUsd'] = assets_for_modify['priceUsd'].fillna(0)

# Asegurarse de que 'tradesCount24Hr' en markets_for_modify sea numérico (convertir a int64)
markets_for_modify['tradesCount24Hr'] = pd.to_numeric(markets_for_modify['tradesCount24Hr'], errors='coerce', downcast='integer')

# Rellenar NaN en 'tradesCount24Hr' con 0
markets_for_modify['tradesCount24Hr'] = markets_for_modify['tradesCount24Hr'].fillna(0).astype(int)

# Verificar los tipos de datos después de la conversión
print(markets_for_modify.dtypes)


# Asegurarse de que 'priceUsd' y 'priceQuote' en markets_for_modify sean numéricas
markets_for_modify['priceUsd'] = pd.to_numeric(markets_for_modify['priceUsd'], errors='coerce')
markets_for_modify['priceQuote'] = pd.to_numeric(markets_for_modify['priceQuote'], errors='coerce')

# Rellenar NaN en las columnas de mercado con 0
markets_for_modify['priceUsd'] = markets_for_modify['priceUsd'].fillna(0)
markets_for_modify['priceQuote'] = markets_for_modify['priceQuote'].fillna(0)




exchangeId                       object
market_rank                      object
base_asset                       object
baseId                           object
quote_asset                      object
quoteId                          object
priceQuote                       object
priceUsd                         object
volumeUsd24Hr                   float64
percentExchangeVolume           float64
tradesCount24Hr                   int64
updated                           int64
date                     datetime64[us]
time                             object
dtype: object


In [232]:
markets_for_modify.head(10)

Unnamed: 0,exchangeId,market_rank,base_asset,baseId,quote_asset,quoteId,priceQuote,priceUsd,volumeUsd24Hr,percentExchangeVolume,tradesCount24Hr,updated,date,time
0,alterdice,1,BTC,bitcoin,USDT,tether,97105.52,97264.573002,63154120.0,100.0,7,1733441384109,2024-12-05,23:31:10.928000
1,bibox,1,ETH,ethereum,USDT,tether,3704.5,3710.567748,13997640.0,7.376952,0,1733441293876,2024-12-05,23:31:10.928000
2,bibox,2,LINK,chainlink,USDT,tether,23.106,23.143846,13652920.0,7.195281,0,1733440192073,2024-12-05,23:31:10.928000
3,bibox,3,BNB,binance-coin,USDT,tether,709.99,711.152921,5697261.0,3.002537,0,1733437274382,2024-12-05,23:31:10.928000
4,bibox,4,LPT,livepeer,USDT,tether,18.217797,18.247637,5358493.0,2.824001,0,1733430695356,2024-12-05,23:31:10.928000
5,bibox,5,DOT,polkadot,USDT,tether,10.6369,10.654323,4768450.0,2.51304,0,1733437189293,2024-12-05,23:31:10.928000
6,bibox,6,LTC,litecoin,ETH,ethereum,0.037139,141.75536,4503391.0,2.37335,0,1733441324211,2024-12-05,23:31:10.928000
7,bibox,7,GNO,gnosis-gno,USDT,tether,283.3785,283.842657,4471712.0,2.356655,0,1733430811247,2024-12-05,23:31:10.928000
8,bibox,8,BTC,bitcoin,USDT,tether,97248.0,97407.286376,4003275.0,2.109782,0,1733441278463,2024-12-05,23:31:10.928000
9,bibox,9,SUSHI,sushiswap,USDT,tether,1.4832,1.485629,3835021.0,2.02111,0,1733425404232,2024-12-05,23:31:10.928000


In [233]:
# Crear una columna booleana que indique si el precio supera cierto límite
assets_for_modify['high_value'] = assets_for_modify['priceUsd'] > 1000

# Crear una columna que indique si el volumen del mercado es significativo
markets_for_modify['high_volume'] = markets_for_modify['volumeUsd24Hr'] > 1_000_000



In [234]:
assets_for_modify.head(10)

Unnamed: 0,asset_id,asset_rank,symbol,asset_name,supply,maxSupply,marketCapUsd,volumeUsd24Hr,priceUsd,changePercent24Hr,vwap24Hr,explorer,date,time,high_value
0,near-protocol,20,NEAR,NEAR Protocol,1217906000.0,0.0,9355431000.0,349647600.0,7.68157,3.0315887128672236,7.614363346310062,https://explorer.nearprotocol.com/,2024-12-05,23:52:03.973000,False
1,internet-computer,23,ICP,Internet Computer,475416700.0,0.0,6820134000.0,167905900.0,14.345594,-2.4727377831923154,14.558958832284,https://www.dfinityexplorer.org/#/,2024-12-05,23:52:03.973000,False
2,superfarm,81,SUPER,SuperVerse,487976100.0,1000000000.0,835772700.0,18155360.0,1.712733,3.8121019975227135,1.7154827858602,https://etherscan.io/token/0xe53ec727dbdeb9e2d...,2024-12-05,23:52:03.973000,False
3,zilliqa,90,ZIL,Zilliqa,19242160000.0,0.0,678901700.0,69408590.0,0.035282,4.297254998565316,0.0341712871983576,https://etherscan.io/token/0x05f4a42e251f2d52b...,2024-12-05,23:52:03.973000,False
4,enjin-coin,91,ENJ,Enjin Coin,1781483000.0,0.0,673620600.0,33138360.0,0.378124,0.5284013238983049,0.3760434390384362,https://etherscan.io/token/0xf629cbd94d3791c92...,2024-12-05,23:52:03.973000,False
5,solana,5,SOL,Solana,475338100.0,0.0,112642500000.0,3162999000.0,236.973393,3.369836422249804,235.7824047865852,https://explorer.solana.com/,2024-12-05,23:52:03.973000,False
6,litecoin,19,LTC,Litecoin,75272730.0,84000000.0,10270750000.0,1409693000.0,136.447198,2.27761005451026,135.96782264235569,http://explorer.litecoin.net/chain/Litecoin,2024-12-05,23:52:03.973000,False
7,uniswap,21,UNI,Uniswap,600483100.0,0.0,9056756000.0,364591300.0,15.08245,-3.7278648654266626,15.365831495640212,https://etherscan.io/token/0x1f9840a85d5af5bf1...,2024-12-05,23:52:03.973000,False
8,crypto-com-coin,28,CRO,Crypto.com Coin,25263010000.0,30263010000.0,5157904000.0,44347050.0,0.204168,-4.604369141158522,0.2114260564416005,https://etherscan.io/token/0xa0b73e1ff0b80914a...,2024-12-05,23:52:03.973000,False
9,monero,36,XMR,Monero,18446740.0,0.0,3557786000.0,108638400.0,192.867942,-1.7969938024738907,198.83747084670009,http://moneroblocks.info/,2024-12-05,23:52:03.973000,False


### Join

In [235]:
# Hacer un JOIN entre assets y markets basado en el símbolo base (base_asset)
merged_markets_assets = pd.merge(markets_for_modify, assets_for_modify, left_on='base_asset', right_on='symbol', how='inner')

merged_markets_assets.head(10)

Unnamed: 0,exchangeId,market_rank,base_asset,baseId,quote_asset,quoteId,priceQuote,priceUsd_x,volumeUsd24Hr_x,percentExchangeVolume,...,maxSupply,marketCapUsd,volumeUsd24Hr_y,priceUsd_y,changePercent24Hr,vwap24Hr,explorer,date_y,time_y,high_value
0,alterdice,1,BTC,bitcoin,USDT,tether,97105.52,97264.573002,63154120.0,100.0,...,21000000.0,1918910000000.0,46059580000.0,96960.838242,-1.9622262132228756,100983.512541216,https://blockchain.info/,2024-12-05,23:52:03.973000,True
1,bibox,1,ETH,ethereum,USDT,tether,3704.5,3710.567748,13997640.0,7.376952,...,0.0,456925100000.0,21247720000.0,3793.738863,-1.1855778996849213,3873.7655878925857,https://etherscan.io/,2024-12-05,23:52:03.973000,True
2,bibox,2,LINK,chainlink,USDT,tether,23.106,23.143846,13652920.0,7.195281,...,0.0,14549020000.0,713240300.0,23.209736,-3.4995118883174885,24.0225546383416,https://etherscan.io/token/0x514910771af9ca656...,2024-12-05,23:52:03.973000,False
3,bibox,3,BNB,binance-coin,USDT,tether,709.99,711.152921,5697261.0,3.002537,...,166801148.0,119959300000.0,1234687000.0,719.175467,-2.4567955885956647,720.2092740201016,https://etherscan.io/token/0xB8c77482e45F1F44d...,2024-12-05,23:52:03.973000,False
4,bibox,4,LPT,livepeer,USDT,tether,18.217797,18.247637,5358493.0,2.824001,...,0.0,710626200.0,66119520.0,19.558132,11.360992860376186,18.56752453719652,https://explorer.livepeer.org/,2024-12-05,23:52:03.973000,False
5,bibox,5,DOT,polkadot,USDT,tether,10.6369,10.654323,4768450.0,2.51304,...,0.0,15906750000.0,778172400.0,10.449381,-0.7088552262900197,10.676958394818302,https://polkascan.io/polkadot,2024-12-05,23:52:03.973000,False
6,bibox,6,LTC,litecoin,ETH,ethereum,0.037139,141.75536,4503391.0,2.37335,...,84000000.0,10270750000.0,1409693000.0,136.447198,2.27761005451026,135.96782264235569,http://explorer.litecoin.net/chain/Litecoin,2024-12-05,23:52:03.973000,False
7,bibox,7,GNO,gnosis-gno,USDT,tether,283.3785,283.842657,4471712.0,2.356655,...,3000000.0,733520000.0,6476101.0,283.257406,-0.6361249686569954,286.50451911648565,https://etherscan.io/token/Gnosis,2024-12-05,23:52:03.973000,False
8,bibox,8,BTC,bitcoin,USDT,tether,97248.0,97407.286376,4003275.0,2.109782,...,21000000.0,1918910000000.0,46059580000.0,96960.838242,-1.9622262132228756,100983.512541216,https://blockchain.info/,2024-12-05,23:52:03.973000,True
9,bibox,10,MANA,decentraland,USDT,tether,0.748143,0.749368,3737715.0,1.969828,...,0.0,1446645000.0,155383200.0,0.744828,-4.348179998522792,0.7697938592371478,https://etherscan.io/token/decentraland,2024-12-05,23:52:03.973000,False


### Aggregation

In [236]:
#Convertir tradesCount24Hr a valores numéricos
markets_for_modify['tradesCount24Hr'] = pd.to_numeric(markets_for_modify['tradesCount24Hr'], errors='coerce').fillna(0)
#Confirmar los tipos de datos
print(markets_for_modify.dtypes[['volumeUsd24Hr', 'tradesCount24Hr']])

volumeUsd24Hr      float64
tradesCount24Hr      int64
dtype: object


In [237]:
# Calcular el volumen total en dólares y el promedio de transacciones por mercado
market_volume_summary = markets_for_modify.groupby('exchangeId').agg({
    'volumeUsd24Hr': 'sum', # Sumar el volumen de intercambio en USD por mercado
    'tradesCount24Hr': 'mean' # Calcular el promedio de transacciones por mercado
}).reset_index()

market_volume_summary.head()

Unnamed: 0,exchangeId,volumeUsd24Hr,tradesCount24Hr
0,alterdice,63154120.0,7.0
1,bibox,188176800.0,0.0


### Guardado en Delta lake



In [238]:
#Ruta de guardado
silver_dir = "datalake/silver/api_coincap"
assets_for_modify_raw_dir = f"{silver_dir}/assets_for_modify"

markets_for_modify_raw_dir = f"{silver_dir}/markets_for_modify"

merged_markets_assets_raw_dir = f"{silver_dir}/merged_markets_assets"

market_volume_summary_raw_dir = f"{silver_dir}/market_volume_summary"

In [239]:
# Guardar datos de assets
try:
    upsert_data_as_delta(
        assets_for_modify,
        assets_for_modify_raw_dir,
        "target.asset_id = source.asset_id",
        
    )
except Exception as e:
    print(f"Ocurrió un error al guardar los datos (1): {e}")

# Guardar datos de markets
try:
    save_new_data_as_delta(
        markets_for_modify,
        markets_for_modify_raw_dir,
        "target.exchangeId = source.exchangeId",
        partition_cols=["high_volume"]  # Particionar por columna booleana
        )
except Exception as e:
    print(f"Ocurrió un error al guardar los datos (2): {e}")

# Guardar datos de la tabla combinada (assets y markets)
try:
    save_new_data_as_delta(
        merged_markets_assets,
        merged_markets_assets_raw_dir,
        "target.base_asset = source.base_asset AND target.asset_id = source.asset_id",
        partition_cols=["high_value", "high_volume"]  # Particionar por múltiples columnas
    )
except Exception as e:
    print(f"Ocurrió un error al guardar los datos (3): {e}")

# Guardar los datos del resumen del volumen de mercado
try:
    upsert_data_as_delta(
        market_volume_summary,
        market_volume_summary_raw_dir,
        "target.exchangeId = source.exchangeId",
    )
except Exception as e:
    print(f"Ocurrió un error al guardar los datos (4): {e}")

In [240]:
DeltaTable(assets_for_modify_raw_dir).to_pandas().head(10)


Unnamed: 0,asset_id,asset_rank,symbol,asset_name,supply,maxSupply,marketCapUsd,volumeUsd24Hr,priceUsd,changePercent24Hr,vwap24Hr,explorer,date,time,high_value
0,near-protocol,20,NEAR,NEAR Protocol,1217906000.0,0.0,9355431000.0,349647600.0,7.68157,3.0315887128672236,7.614363346310062,https://explorer.nearprotocol.com/,2024-12-05,23:52:03.973000,False
1,internet-computer,23,ICP,Internet Computer,475416700.0,0.0,6820134000.0,167905900.0,14.345594,-2.4727377831923154,14.558958832284,https://www.dfinityexplorer.org/#/,2024-12-05,23:52:03.973000,False
2,superfarm,81,SUPER,SuperVerse,487976100.0,1000000000.0,835772700.0,18155360.0,1.712733,3.8121019975227135,1.7154827858602,https://etherscan.io/token/0xe53ec727dbdeb9e2d...,2024-12-05,23:52:03.973000,False
3,tron,11,TRX,TRON,86278460000.0,0.0,27811520000.0,2310245000.0,0.322346,-2.1518534623264505,0.3303026160186558,https://tronscan.org/#/,2024-12-05,23:52:03.973000,False
4,unus-sed-leo,22,LEO,UNUS SED LEO,924602400.0,0.0,8744097000.0,1656894.0,9.457143,-1.339460432315403,9.292834646457417,https://eospark.com/account/bitfinexleo1,2024-12-05,23:52:03.973000,False
5,stacks,32,STX,Stacks,1503559000.0,1818000000.0,4154155000.0,173521100.0,2.762881,2.8258571471895118,2.8128687318208927,https://explorer.xinfin.network/,2024-12-05,23:52:03.973000,False
6,dash,82,DASH,Dash,12057600.0,18900000.0,785398500.0,159444100.0,65.137229,11.875440680082075,63.87130199213159,https://explorer.dash.org,2024-12-05,23:52:03.973000,False
7,zilliqa,90,ZIL,Zilliqa,19242160000.0,0.0,678901700.0,69408590.0,0.035282,4.297254998565316,0.0341712871983576,https://etherscan.io/token/0x05f4a42e251f2d52b...,2024-12-05,23:52:03.973000,False
8,enjin-coin,91,ENJ,Enjin Coin,1781483000.0,0.0,673620600.0,33138360.0,0.378124,0.5284013238983049,0.3760434390384362,https://etherscan.io/token/0xf629cbd94d3791c92...,2024-12-05,23:52:03.973000,False
9,the-sandbox,44,SAND,The Sandbox,2430232000.0,0.0,2175683000.0,497792900.0,0.895257,-3.6612011388036696,0.9162018073269071,https://etherscan.io/token/0x3845badAde8e6dFF0...,2024-12-05,23:52:03.973000,False


In [241]:
DeltaTable(markets_for_modify_raw_dir).to_pandas().head(10)


Unnamed: 0,exchangeId,market_rank,base_asset,baseId,quote_asset,quoteId,priceQuote,priceUsd,volumeUsd24Hr,percentExchangeVolume,tradesCount24Hr,updated,date,time,high_volume
0,bibox,52,BAT,basic-attention-token,USDT,tether,0.3295,0.33004,1261414.0,0.664783,0,1733440188206,2024-12-05,23:31:10.928000,True
1,bibox,41,SXP,swipe,USDT,tether,0.446635,0.447367,1509090.0,0.795312,0,1733439095508,2024-12-05,23:31:10.928000,True
2,bibox,42,TLM,alien-worlds,USDT,tether,0.018477,0.018507,1493577.0,0.787136,0,1733433359382,2024-12-05,23:31:10.928000,True
3,bibox,43,XRP,xrp,USDT,tether,2.24348,2.247155,1481701.0,0.780877,0,1733440152649,2024-12-05,23:31:10.928000,True
4,bibox,44,AVAX,avalanche,USDT,tether,51.126,51.209741,1464355.0,0.771736,0,1733430818091,2024-12-05,23:31:10.928000,True
5,bibox,45,UNI,uniswap,USDT,tether,15.3211,15.346195,1455243.0,0.766934,0,1733433699187,2024-12-05,23:31:10.928000,True
6,bibox,46,ZEN,horizen,USDT,tether,17.2777,17.306,1366143.0,0.719976,0,1733425475229,2024-12-05,23:31:10.928000,True
7,bibox,47,TRB,tellor,USDT,tether,89.122,89.267976,1361866.0,0.717723,0,1733438735429,2024-12-05,23:31:10.928000,True
8,bibox,48,BSV,bitcoin-sv,USDT,tether,82.24,82.374704,1341028.0,0.706741,0,1733429657302,2024-12-05,23:31:10.928000,True
9,bibox,49,ETC,ethereum-classic,ETH,ethereum,0.009722,37.108791,1315961.0,0.69353,0,1733441305214,2024-12-05,23:31:10.928000,True


In [242]:
DeltaTable(merged_markets_assets_raw_dir).to_pandas().head(10)


Unnamed: 0,exchangeId,market_rank,base_asset,baseId,quote_asset,quoteId,priceQuote,priceUsd_x,volumeUsd24Hr_x,percentExchangeVolume,...,maxSupply,marketCapUsd,volumeUsd24Hr_y,priceUsd_y,changePercent24Hr,vwap24Hr,explorer,date_y,time_y,high_value
0,bibox,81,ETH,ethereum,USDC,usd-coin,3818.37,3819.538238,591105.626403,0.311521,...,0.0,457355800000.0,21289410000.0,3797.315168,-1.298368570935078,3873.7655878925857,https://etherscan.io/,2024-12-05,23:39:07.074000,True
1,bibox,95,MKR,maker,ETH,ethereum,0.518569,1979.327214,302014.455338,0.159166,...,1005577.0,1876699000.0,129132500.0,2110.333273,-6.53997840831962,2188.8885510317405,https://etherscan.io/token/Maker,2024-12-05,23:39:07.074000,True
2,bibox,98,ALGO,algorand,USDC,usd-coin,0.4422,0.442335,216081.696903,0.113878,...,10000000000.0,3738076000.0,292781000.0,0.450462,-4.24734094707728,0.4734202166831125,https://algoexplorer.io/,2024-12-05,23:39:07.074000,False
3,bibox,62,ZIL,zilliqa,USDT,tether,0.0353,0.035358,954203.357296,0.502879,...,0.0,676898900.0,69180300.0,0.035177,4.762119860256036,0.0341712871983576,https://etherscan.io/token/0x05f4a42e251f2d52b...,2024-12-05,23:39:07.074000,False
4,bibox,64,KSM,kusama,USDT,tether,43.854156,43.925986,913285.144674,0.481314,...,0.0,713760800.0,50393470.0,45.15313,-0.0492598052100713,45.07057528818629,https://kusama.subscan.io/,2024-12-05,23:39:07.074000,False
5,bibox,67,DOGE,dogecoin,USDT,tether,0.422042,0.422733,854889.852969,0.450539,...,0.0,63027560000.0,6013964000.0,0.428529,0.7116515153194115,0.4401487017784374,http://dogechain.info/chain/Dogecoin,2024-12-05,23:39:07.074000,False
6,bibox,69,DOGE,dogecoin,ETH,ethereum,0.000112,0.42631,814190.492758,0.42909,...,0.0,63027560000.0,6013964000.0,0.428529,0.7116515153194115,0.4401487017784374,http://dogechain.info/chain/Dogecoin,2024-12-05,23:39:07.074000,False
7,bibox,70,TRX,tron,USDT,tether,0.319427,0.31995,786137.368579,0.414305,...,0.0,27632760000.0,2297901000.0,0.320274,-1.8651131232211369,0.3303026160186558,https://tronscan.org/#/,2024-12-05,23:39:07.074000,False
8,bibox,72,COMP,compound,ETH,ethereum,0.03052,116.491443,729500.450076,0.384457,...,0.0,1009715000.0,159406500.0,114.11949,-7.4839757904344655,117.53337551960418,https://etherscan.io/token/0xc00e94cb662c35202...,2024-12-05,23:39:07.074000,False
9,bibox,74,LINK,chainlink,ETH,ethereum,0.005868,22.396317,649895.325788,0.342504,...,0.0,14648840000.0,703783400.0,23.368967,-2.299514398469536,24.0225546383416,https://etherscan.io/token/0x514910771af9ca656...,2024-12-05,23:39:07.074000,False


In [243]:
DeltaTable(market_volume_summary_raw_dir).to_pandas()

Unnamed: 0,exchangeId,volumeUsd24Hr,tradesCount24Hr
0,alterdice,63154120.0,7.0
1,bibox,188176800.0,0.0
