In [2]:
import requests
import pandas as pd
from datetime import datetime, timedelta, timezone
from deltalake import DeltaTable, write_deltalake
from deltalake.exceptions import TableNotFoundError
import pyarrow as pa
from pprint import pprint

#requests.get? 
# #Signature: requests.get(url, params=None, **kwargs)
#requests.post?
# #Signature: requests.post(url, data=None, json=none, **kwargs)

In [3]:
def get_data(base_url, endpoint, data_field, params=None, headers=None):
        try:
            endpoint_url = f"{base_url}/{endpoint}"
            response = requests.get(endpoint_url, params=params, headers=headers)
            response.raise_for_status()  # Levanta una excepción si hay un error en la respuesta HTTP.

        # Verificar si los datos están en formato JSON.
            try:
                data = response.json()
                data = data[data_field]
            except:
                print("El formato de respuesta no es el esperado")
                return None
            return data

        except requests.exceptions.RequestException as e:
            # Capturar cualquier error de solicitud, como errores HTTP.
            print(f"La petición ha fallado. Código de error : {e}")
        return None

def build_table(json_data):
    try:
        df = pd.json_normalize(json_data)
        return df
    except:
        print("Los datos no están en el formato esperado")
        return None

In [4]:

# Usar la función para hacer la petición a la API de CoinCap
base_url = "https://api.coincap.io/v2"

In [5]:

endpoint = "assets"
endpoint2 = "markets"

json_data = get_data(base_url, endpoint, data_field="data")
json_data_times =get_data(base_url, endpoint, data_field="timestamp")

json_data2 = get_data(base_url, endpoint2, data_field="data")
json_data_times2 =get_data(base_url, endpoint2, data_field="timestamp")


In [6]:
pprint(json_data)

pprint(json_data2)

[{'changePercent24Hr': '4.2783813194041564',
  'explorer': 'https://blockchain.info/',
  'id': 'bitcoin',
  'marketCapUsd': '1979715808471.1145134388494256',
  'maxSupply': '21000000.0000000000000000',
  'name': 'Bitcoin',
  'priceUsd': '100033.2991186061215342',
  'rank': '1',
  'supply': '19790568.0000000000000000',
  'symbol': 'BTC',
  'volumeUsd24Hr': '27055581785.2751703938677858',
  'vwap24Hr': '97051.5062276099056651'},
 {'changePercent24Hr': '5.0276625574947599',
  'explorer': 'https://etherscan.io/',
  'id': 'ethereum',
  'marketCapUsd': '461769382749.4349832916737059',
  'maxSupply': None,
  'name': 'Ethereum',
  'priceUsd': '3833.9567645040828457',
  'rank': '2',
  'supply': '120441990.1196158200000000',
  'symbol': 'ETH',
  'volumeUsd24Hr': '19690161901.8066180021863565',
  'vwap24Hr': '3776.4260733775174657'},
 {'changePercent24Hr': '0.0060468274224712',
  'explorer': 'https://www.omniexplorer.info/asset/31',
  'id': 'tether',
  'marketCapUsd': '135746019561.97128128623706

In [7]:
df_assets = build_table(json_data)

df_markets = build_table(json_data2)

In [8]:
df_assets.head()

df_markets.head()


Unnamed: 0,exchangeId,rank,baseSymbol,baseId,quoteSymbol,quoteId,priceQuote,priceUsd,volumeUsd24Hr,percentExchangeVolume,tradesCount24Hr,updated
0,alterdice,1,BTC,bitcoin,USDT,tether,100915.98,100937.73999587148,48443840.05012937,100.0,7.0,1733366502860
1,bibox,1,ETH,ethereum,USDT,tether,3834.2,3835.0267489070648,16844719.530641988,12.856086333950977,,1733366280938
2,bibox,2,LINK,chainlink,USDT,tether,23.304,23.309024922155924,13572243.652856205,10.358494591087291,,1733361064151
3,bibox,3,BNB,binance-coin,USDT,tether,705.33,705.4820866951698,5568908.252020706,4.25025570438225,,1733363083954
4,bibox,4,LPT,livepeer,USDT,tether,18.145105,18.14901753605115,5375608.168800345,4.102726827233521,,1733341126193


In [9]:
# Si los datos de los activos y el timestamp son válidos
if df_assets is not None and json_data_times is not None:
    # Convertir el timestamp de milisegundos a segundos y luego a una fecha legible
    timestamp_datetime = datetime.fromtimestamp(json_data_times / 1000, tz=timezone.utc)

    # Dividir el timestamp en columnas de fecha y hora
    df_assets['date'] = timestamp_datetime.date()  
    df_assets['time'] = timestamp_datetime.time()

    # Convertir la columna 'date' a un tipo compatible con Delta Lake
    df_assets['date'] = pd.to_datetime(df_assets['date'], errors='coerce')

    # Convertir la columna 'time' a un tipo compatible (por ejemplo, string)
    df_assets['time'] = df_assets['time'].astype(str)
# Si los datos de los activos y el timestamp son válidos
if df_markets is not None and json_data_times2 is not None:
    # Convertir el timestamp de milisegundos a segundos y luego a una fecha legible
    timestamp_datetime2 = datetime.fromtimestamp(json_data_times2 / 1000, tz=timezone.utc)

# Asignar el timestamp al DataFrame
    # Dividir el timestamp en columnas de fecha y hora
    df_markets['date'] = timestamp_datetime2.date()  
    df_markets['time'] = timestamp_datetime2.time()
    
    # Convertir la columna 'date' a un tipo compatible con Delta Lake
    df_markets['date'] = pd.to_datetime(df_markets['date'], errors='coerce')

    # Convertir la columna 'time' a un tipo compatible (por ejemplo, string)
    df_markets['time'] = df_markets['time'].astype(str)



In [10]:
# Aquí debería aparecer la columna 'timestamp' en el DataFrame
# DF temporal
df_assets.head()

Unnamed: 0,id,rank,symbol,name,supply,maxSupply,marketCapUsd,volumeUsd24Hr,priceUsd,changePercent24Hr,vwap24Hr,explorer,date,time
0,bitcoin,1,BTC,Bitcoin,19790568.0,21000000.0,1979715808471.1145,27055581785.27517,100033.29911860613,4.278381319404157,97051.5062276099,https://blockchain.info/,2024-12-05,02:43:00.613000
1,ethereum,2,ETH,Ethereum,120441990.11961582,,461769382749.43494,19690161901.806618,3833.9567645040825,5.02766255749476,3776.426073377517,https://etherscan.io/,2024-12-05,02:43:00.613000
2,tether,3,USDT,Tether,135691101232.32043,,135746019561.97128,67212530575.97337,1.0004047305177133,0.0060468274224712,1.000820774595195,https://www.omniexplorer.info/asset/31,2024-12-05,02:43:00.613000
3,binance-coin,4,BNB,BNB,166801148.0,166801148.0,120620418671.59428,1925464184.0828705,723.1390198321316,-4.068737407707899,749.400544440909,https://etherscan.io/token/0xB8c77482e45F1F44d...,2024-12-05,02:43:00.613000
4,solana,5,SOL,Solana,475356454.2156859,,110067415722.11572,2053827815.3164763,231.547115319432,-2.7100850749053786,232.67966918220893,https://explorer.solana.com/,2024-12-05,02:43:00.613000


In [11]:
# DF estatico
df_markets.head()


Unnamed: 0,exchangeId,rank,baseSymbol,baseId,quoteSymbol,quoteId,priceQuote,priceUsd,volumeUsd24Hr,percentExchangeVolume,tradesCount24Hr,updated,date,time
0,alterdice,1,BTC,bitcoin,USDT,tether,100915.98,100937.73999587148,48443840.05012937,100.0,7.0,1733366502860,2024-12-05,02:43:03.304000
1,bibox,1,ETH,ethereum,USDT,tether,3834.2,3835.0267489070648,16844719.530641988,12.856086333950977,,1733366280938,2024-12-05,02:43:03.304000
2,bibox,2,LINK,chainlink,USDT,tether,23.304,23.309024922155924,13572243.652856205,10.358494591087291,,1733361064151,2024-12-05,02:43:03.304000
3,bibox,3,BNB,binance-coin,USDT,tether,705.33,705.4820866951698,5568908.252020706,4.25025570438225,,1733363083954,2024-12-05,02:43:03.304000
4,bibox,4,LPT,livepeer,USDT,tether,18.145105,18.14901753605115,5375608.168800345,4.102726827233521,,1733341126193,2024-12-05,02:43:03.304000


## Delta lake

In [12]:
def save_data_as_delta(df, path, mode="overwrite", partition_cols=None):
    #Guardar datos (opcionalmente particionados)|Escribe (write_deltalake)

    write_deltalake(
        path, df, mode=mode, partition_by=partition_cols
    )

def save_new_data_as_delta(new_data, data_path, predicate, partition_cols=None):
    #Guardar solo datos nuevos evitando duplicados	| MERGE (inserción condicional)
    try:
        dt = DeltaTable(data_path)
        # Convertir los nuevos datos a formato Arrow para trabajar con Delta
        new_data_pa = pa.Table.from_pandas(new_data)
        # Se insertan en target, datos de source que no existen en target
        dt.merge(
            source=new_data_pa,
            source_alias="source",
            target_alias="target",
            predicate=predicate
        ) \
        .when_not_matched_insert_all() \
        .execute()

    # sino se guarda como nueva
    except TableNotFoundError:
        save_data_as_delta(new_data, data_path, partition_cols=partition_cols)

def upsert_data_as_delta(data, data_path, predicate):
    #Actualizar o insertar registros dependiendo de coincidencias | MERGE (actualización/inserción)
    try:
        dt = DeltaTable(data_path)
        data_pa = pa.Table.from_pandas(data)
        dt.merge(
            source=data_pa,
            source_alias="source",
            target_alias="target",
            predicate=predicate
        ) \
        .when_matched_update_all() \
        .when_not_matched_insert_all() \
        .execute()
    except TableNotFoundError:
        save_data_as_delta(data, data_path)
    except Exception as e:
        # Capturar cualquier otro error
        print(f"Ocurrió un error al realizar el upsert: {e}")

In [13]:
#Ruta de guardado
bronze_dir = "datalake/bronze/api_coincap"
assets_raw_dir = f"{bronze_dir}/assets"

markets_raw_dir = f"{bronze_dir}/markets"

### Assets
https://api.coincap.io/v2/assets

In [14]:
# Guardar solo los nuevos datos (evitar duplicados) usando el predicado basado en el id
try:
    upsert_data_as_delta(
        df_assets,
        assets_raw_dir,
        "target.id = source.id"
    )
except Exception as e:
    print(f"Ocurrió un error al guardar los datos: {e}")

In [15]:
canRow = DeltaTable(assets_raw_dir)
print(f"Cant de filas: {canRow.to_pandas().shape[0]}")

Cant de filas: 105


In [16]:
#.sort_values("rank")
DeltaTable(assets_raw_dir).to_pandas()
# Cargar la tabla Delta como un DataFrame de Pandas
#df = DeltaTable(f"{bronze_dir}/assets").to_pandas()

# Filtrar las filas donde el id es "bitcoin"
#df_bitcoin = df[df['id'] == 'bitcoin']

# Mostrar las primeras 10 filas del DataFrame filtrado
#df_bitcoin.head(10)


Unnamed: 0,id,rank,symbol,name,supply,maxSupply,marketCapUsd,volumeUsd24Hr,priceUsd,changePercent24Hr,vwap24Hr,explorer,date,time
0,kusama,86,KSM,Kusama,15804124.9074820100000000,,705258550.3219158067133275,73921165.8189207982543236,44.6249668646969095,1.4013002527668877,46.5966792538972819,https://kusama.subscan.io/,2024-12-05,02:43:00.613000
1,enjin-coin,90,ENJ,Enjin Coin,1781382021.1205115000000000,,655046120.1442740384897777,40169331.2792776804707812,0.3677179360619357,6.0233954667309438,0.3688436857103774,https://etherscan.io/token/0xf629cbd94d3791c92...,2024-12-05,02:43:00.613000
2,algorand,33,ALGO,Algorand,8298319517.0083080000000000,10000000000.0000000000000000,3954903910.3248322047038693,327451306.3084226791951081,0.4765909413609378,-8.3264024690324576,0.5001748223200967,https://algoexplorer.io/,2024-12-05,02:43:00.613000
3,akash-network,74,AKT,Akash Network,248285794.6611410000000000,388539008.0000000000000000,1041069334.4653670968325097,8073501.8690067824162520,4.1930281830509572,-1.9255141623924219,4.2415516498791999,https://akash.bigdipper.live/,2024-12-05,02:43:00.613000
4,conflux-network,67,CFX,Conflux,4721800273.2400000000000000,,1142345533.5937651861557197,86679435.2975893224768192,0.2419300833344888,-5.6459915622422733,0.2500738003995627,http://www.confluxscan.io/,2024-12-05,02:43:00.613000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,dash,87,DASH,Dash,12056866.0061047500000000,18900000.0000000000000000,696345524.7897072719016233,107753500.9341237264161507,57.7551019010352121,1.1291753583408621,59.0471037280956610,https://explorer.dash.org,2024-12-05,02:43:00.613000
101,zilliqa,93,ZIL,Zilliqa,19238514800.4426200000000000,,642616214.4914120105409428,63029945.1965669521652060,0.0334025896051304,-2.8212111185123160,0.0342056980889209,https://etherscan.io/token/0x05f4a42e251f2d52b...,2024-12-05,02:43:00.613000
102,avalanche,12,AVAX,Avalanche,409353106.3767472000000000,715748719.0000000000000000,21084856462.1773733553440941,709826759.0292905378102378,51.5077475502823562,0.0216949785181647,53.7161168045991353,https://avascan.info/,2024-12-05,02:43:00.613000
103,curve-dao-token,62,CRV,Curve DAO Token,1245760052.0000000000000000,,1319596526.0404586612496712,306102999.0358695771070106,1.0592702213575706,21.9879468000310851,1.0910834682277222,https://etherscan.io/token/0xD533a949740bb3306...,2024-12-05,02:43:00.613000


### markets
https://api.coincap.io/v2/markets

In [17]:

save_new_data_as_delta(
    df_markets,
    markets_raw_dir,
    """target.date = source.date""",
    partition_cols=["date"]
    )

In [18]:
canRowTwo = DeltaTable(markets_raw_dir)
print(f"Cant de filas: {canRowTwo.to_pandas().shape[0]}")

Cant de filas: 400


In [19]:
DeltaTable(markets_raw_dir).to_pandas()


Unnamed: 0,exchangeId,rank,baseSymbol,baseId,quoteSymbol,quoteId,priceQuote,priceUsd,volumeUsd24Hr,percentExchangeVolume,tradesCount24Hr,updated,date,time
0,alterdice,1,BTC,bitcoin,USDT,tether,100915.9800000000000000,100937.7399958714716024,48443840.0501293730335927,100.0000000000000000,7,1733366502860,2024-12-05,02:43:03.304000
1,bibox,1,ETH,ethereum,USDT,tether,3834.2000000000000000,3835.0267489070650299,16844719.5306419892432481,12.8560863339509766,,1733366280938,2024-12-05,02:43:03.304000
2,bibox,2,LINK,chainlink,USDT,tether,23.3040000000000000,23.3090249221559239,13572243.6528562034929416,10.3584945910872924,,1733361064151,2024-12-05,02:43:03.304000
3,bibox,3,BNB,binance-coin,USDT,tether,705.3300000000000000,705.4820866951698340,5568908.2520207065753745,4.2502557043822509,,1733363083954,2024-12-05,02:43:03.304000
4,bibox,4,LPT,livepeer,USDT,tether,18.1451050000000000,18.1490175360511528,5375608.1688003445993516,4.1027268272335217,,1733341126193,2024-12-05,02:43:03.304000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,bibox,95,COMP,compound,USDT,tether,50.9800000000000000,50.9786590664539657,98973.3129116484884251,0.0801091686095969,,1731900793438,2024-11-18,03:43:38.954000
396,bibox,96,ALGO,algorand,USDC,usd-coin,0.1910000000000000,0.1909546324229206,95641.1571956254842941,0.0774121160786120,,1731901088017,2024-11-18,03:43:38.954000
397,bibox,97,FEI,fei-protocol,USDT,tether,0.9778000000000000,0.9777742807998958,92663.8837017479005621,0.0750023058247021,,1731893360925,2024-11-18,03:43:38.954000
398,bibox,98,MXC,mxc,USDT,tether,0.0053000000000000,0.0052998605934132,85479.0422475346855024,0.0691868828732385,,1731901070425,2024-11-18,03:43:38.954000


## TP 2 final


In [20]:
# Leer los datos en DataFrames
assets_for_modify = DeltaTable(assets_raw_dir).to_pandas()
markets_for_modify = DeltaTable(markets_raw_dir).to_pandas()

In [21]:
# Renombrar columnas para mayor claridad
assets_for_modify = assets_for_modify.rename(columns={
    'id': 'asset_id',
    'rank': 'asset_rank',
    'name': 'asset_name'
})
markets_for_modify = markets_for_modify.rename(columns={
    'rank': 'market_rank',
    'baseSymbol': 'base_asset',
    'quoteSymbol': 'quote_asset'
})

In [22]:
# Convertir columnas a sus tipos correctos
assets_for_modify['supply'] = assets_for_modify['supply'].astype(float)  
assets_for_modify['priceUsd'] = assets_for_modify['priceUsd'].astype(float)  

markets_for_modify['volumeUsd24Hr'] = markets_for_modify['volumeUsd24Hr'].astype(float)
markets_for_modify['percentExchangeVolume'] = markets_for_modify['percentExchangeVolume'].astype(float)

#modificar valores nulos
markets_for_modify['tradesCount24Hr'] = markets_for_modify['tradesCount24Hr'].fillna(0)
markets_for_modify['volumeUsd24Hr'] = markets_for_modify['volumeUsd24Hr'].fillna(0)
assets_for_modify['maxSupply'] = assets_for_modify['maxSupply'].fillna(0)
assets_for_modify['explorer'] = assets_for_modify['explorer'].fillna("Not Data")



In [23]:
markets_for_modify.head(10)

Unnamed: 0,exchangeId,market_rank,base_asset,baseId,quote_asset,quoteId,priceQuote,priceUsd,volumeUsd24Hr,percentExchangeVolume,tradesCount24Hr,updated,date,time
0,alterdice,1,BTC,bitcoin,USDT,tether,100915.98,100937.73999587148,48443840.0,100.0,7,1733366502860,2024-12-05,02:43:03.304000
1,bibox,1,ETH,ethereum,USDT,tether,3834.2,3835.0267489070648,16844720.0,12.856086,0,1733366280938,2024-12-05,02:43:03.304000
2,bibox,2,LINK,chainlink,USDT,tether,23.304,23.309024922155924,13572240.0,10.358495,0,1733361064151,2024-12-05,02:43:03.304000
3,bibox,3,BNB,binance-coin,USDT,tether,705.33,705.4820866951698,5568908.0,4.250256,0,1733363083954,2024-12-05,02:43:03.304000
4,bibox,4,LPT,livepeer,USDT,tether,18.145105,18.14901753605115,5375608.0,4.102727,0,1733341126193,2024-12-05,02:43:03.304000
5,bibox,5,WIN,wink,USDT,tether,0.00015555,0.0001555835404498,5046443.0,3.851504,0,1733366131432,2024-12-05,02:43:03.304000
6,bibox,6,GNO,gnosis-gno,USDT,tether,284.382,284.44331983404334,4183565.0,3.192946,0,1733357856300,2024-12-05,02:43:03.304000
7,bibox,7,SLP,small-love-potion,USDT,tether,0.005286,0.0052871397931049,3854250.0,2.941609,0,1733362023776,2024-12-05,02:43:03.304000
8,bibox,8,CHZ,chiliz,USDT,tether,0.127796,0.1278235559969035,2961764.0,2.260453,0,1733366135247,2024-12-05,02:43:03.304000
9,bibox,9,LTC,litecoin,ETH,ethereum,0.03374895,129.53441009805485,2895562.0,2.209926,0,1733366311977,2024-12-05,02:43:03.304000


In [24]:
# Crear una columna booleana que indique si el precio supera cierto límite
assets_for_modify['high_value'] = assets_for_modify['priceUsd'] > 1000

# Crear una columna que indique si el volumen del mercado es significativo
markets_for_modify['high_volume'] = markets_for_modify['volumeUsd24Hr'] > 1_000_000



In [25]:
assets_for_modify.head(10)

Unnamed: 0,asset_id,asset_rank,symbol,asset_name,supply,maxSupply,marketCapUsd,volumeUsd24Hr,priceUsd,changePercent24Hr,vwap24Hr,explorer,date,time,high_value
0,kusama,86,KSM,Kusama,15804120.0,0.0,705258550.3219159,73921165.8189208,44.624967,1.4013002527668876,46.59667925389728,https://kusama.subscan.io/,2024-12-05,02:43:00.613000,False
1,enjin-coin,90,ENJ,Enjin Coin,1781382000.0,0.0,655046120.144274,40169331.27927768,0.367718,6.023395466730944,0.3688436857103774,https://etherscan.io/token/0xf629cbd94d3791c92...,2024-12-05,02:43:00.613000,False
2,algorand,33,ALGO,Algorand,8298320000.0,10000000000.0,3954903910.324832,327451306.3084227,0.476591,-8.326402469032457,0.5001748223200967,https://algoexplorer.io/,2024-12-05,02:43:00.613000,False
3,akash-network,74,AKT,Akash Network,248285800.0,388539008.0,1041069334.465367,8073501.869006783,4.193028,-1.9255141623924217,4.2415516498792,https://akash.bigdipper.live/,2024-12-05,02:43:00.613000,False
4,conflux-network,67,CFX,Conflux,4721800000.0,0.0,1142345533.5937653,86679435.29758932,0.24193,-5.645991562242273,0.2500738003995627,http://www.confluxscan.io/,2024-12-05,02:43:00.613000,False
5,theta-fuel,96,TFUEL,Theta Fuel,6766672000.0,0.0,619010028.210194,7538323.210975675,0.091479,1.2357296750980222,0.0939208805295643,https://explorer.thetatoken.org/,2024-12-05,02:43:00.613000,False
6,wrapped-bitcoin,17,WBTC,Wrapped Bitcoin,137972.1,0.0,13730859020.5109,61025741.39896959,99519.0748,4.145931233597791,96604.77710438537,https://etherscan.io/token/0x2260fac5e5542a773...,2024-12-05,02:43:00.613000,True
7,sp8de,28,SPX,Sp8de,7783757000.0,0.0,4905117871.444105,5720435.142857428,0.630174,-0.3504096179925493,0.6573450021708661,https://etherscan.io/token/0x05aaaa829afa407d8...,2024-12-05,02:43:00.613000,False
8,theta,39,THETA,THETA,1000000000.0,1000000000.0,2914499801.257105,96742749.91310601,2.9145,-0.7506804989914074,2.986496761989486,https://explorer.thetatoken.org/,2024-12-05,02:43:00.613000,False
9,maker,46,MKR,Maker,889172.6,1005577.0,1994512456.8930216,171264004.5327778,2243.110492,-3.6399577885640655,2314.5162287797207,https://etherscan.io/token/Maker,2024-12-05,02:43:00.613000,True


### Join

In [26]:
# Hacer un JOIN entre assets y markets basado en el símbolo base (base_asset)
merged_markets_assets = pd.merge(markets_for_modify, assets_for_modify, left_on='base_asset', right_on='symbol', how='inner')

merged_markets_assets.head(10)

Unnamed: 0,exchangeId,market_rank,base_asset,baseId,quote_asset,quoteId,priceQuote,priceUsd_x,volumeUsd24Hr_x,percentExchangeVolume,...,maxSupply,marketCapUsd,volumeUsd24Hr_y,priceUsd_y,changePercent24Hr,vwap24Hr,explorer,date_y,time_y,high_value
0,alterdice,1,BTC,bitcoin,USDT,tether,100915.98,100937.73999587148,48443840.0,100.0,...,21000000.0,1979715808471.1145,27055581785.27517,100033.299119,4.278381319404157,97051.5062276099,https://blockchain.info/,2024-12-05,02:43:00.613000,True
1,bibox,1,ETH,ethereum,USDT,tether,3834.2,3835.0267489070648,16844720.0,12.856086,...,0.0,461769382749.43494,19690161901.806618,3833.956765,5.02766255749476,3776.426073377517,https://etherscan.io/,2024-12-05,02:43:00.613000,True
2,bibox,2,LINK,chainlink,USDT,tether,23.304,23.309024922155924,13572240.0,10.358495,...,0.0,14874802064.096567,936764121.2937992,23.729445,-0.7816364667146088,24.65831536168025,https://etherscan.io/token/0x514910771af9ca656...,2024-12-05,02:43:00.613000,False
3,bibox,3,BNB,binance-coin,USDT,tether,705.33,705.4820866951698,5568908.0,4.250256,...,166801148.0,120620418671.59428,1925464184.0828705,723.13902,-4.068737407707899,749.400544440909,https://etherscan.io/token/0xB8c77482e45F1F44d...,2024-12-05,02:43:00.613000,False
4,bibox,4,LPT,livepeer,USDT,tether,18.145105,18.14901753605115,5375608.0,4.102727,...,0.0,636310019.1713219,40571178.85595687,17.522849,5.9489030035011705,17.324581340248596,https://explorer.livepeer.org/,2024-12-05,02:43:00.613000,False
5,bibox,6,GNO,gnosis-gno,USDT,tether,284.382,284.44331983404334,4183565.0,3.192946,...,3000000.0,736263056.0396874,6209027.362660163,284.316677,2.33607678171212,281.47578044002285,https://etherscan.io/token/Gnosis,2024-12-05,02:43:00.613000,False
6,bibox,8,CHZ,chiliz,USDT,tether,0.127796,0.1278235559969035,2961764.0,2.260453,...,0.0,1179854339.1843696,182571710.8834059,0.12815,6.697319719665229,0.1312056098382506,https://etherscan.io/token/0x3506424f91fd33084...,2024-12-05,02:43:00.613000,False
7,bibox,9,LTC,litecoin,ETH,ethereum,0.03374895,129.53441009805485,2895562.0,2.209926,...,84000000.0,9788320465.021389,960588302.2843341,130.043909,0.8403479537374443,129.50684420753683,http://explorer.litecoin.net/chain/Litecoin,2024-12-05,02:43:00.613000,False
8,bibox,11,DOT,polkadot,USDT,tether,8.9499,8.951829821095233,2469783.0,1.884967,...,0.0,15705739800.583427,1113866189.6466355,10.317335,5.4087788648598725,10.774396764118691,https://polkascan.io/polkadot,2024-12-05,02:43:00.613000,False
9,bibox,13,BCH,bitcoin-cash,USDT,tether,597.94,598.068930739526,2185309.0,1.667853,...,0.0,11764477707.200932,294250106.939536,594.237194,5.010864515423916,569.2856248686519,https://blockchair.com/bitcoin-cash/blocks,2024-12-05,02:43:00.613000,False


### Aggregation

In [29]:
#Convertir tradesCount24Hr a valores numéricos
markets_for_modify['tradesCount24Hr'] = pd.to_numeric(markets_for_modify['tradesCount24Hr'], errors='coerce').fillna(0)
#Confirmar los tipos de datos
print(markets_for_modify.dtypes[['volumeUsd24Hr', 'tradesCount24Hr']])

volumeUsd24Hr      float64
tradesCount24Hr      int64
dtype: object


In [None]:
# Calcular el volumen total en dólares y el promedio de transacciones por mercado
market_volume_summary = markets_for_modify.groupby('exchangeId').agg({
    'volumeUsd24Hr': 'sum', # Sumar el volumen de intercambio en USD por mercado
    'tradesCount24Hr': 'mean' # Calcular el promedio de transacciones por mercado
}).reset_index()

market_volume_summary.head()

Unnamed: 0,exchangeId,volumeUsd24Hr,tradesCount24Hr
0,alterdice,159560500.0,7.0
1,bibox,548782200.0,0.0


### Guardado en Delta lake



In [None]:
#Ruta de guardado
silver_dir = "datalake/silver/api_coincap"
assets_raw_dir = f"{silver_dir}/assets"

markets_raw_dir = f"{silver_dir}/markets"