# Preprocessing exchange information

## Libraries

In [2]:
import pandas as pd
import json
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.metrics import jaccard_score
import numpy as np

## Load exchange data from JSON and save it in a CSV

In [3]:
with open('../../data/json/coin_exchanges_new.json', 'r') as file:
    json_data = json.load(file)

# Lists for the csv columns
crypto_ids = []
exchange_ids = []
exchange_names = []
exchange_slugs = []

# Process every crypto based on its id from the JSON
for crypto_id, crypto_info in json_data.items():
    # Verify if the data is present (in case of errors like request limit or something similar)
    if "data" in crypto_info:
        for exchange in crypto_info["data"]:
            # Verify that all the fields are present
            if all(k in exchange for k in ("id", "name", "slug")):
                crypto_ids.append(crypto_id)
                exchange_ids.append(exchange["id"])
                exchange_names.append(exchange["name"])
                exchange_slugs.append(exchange["slug"])
            else:
                print(f"Missing fields in the exchange: {exchange}")

# Creating the dataframe
df = pd.DataFrame({
    "crypto_id": crypto_ids,
    "exchange_id": exchange_ids,
    "exchange_name": exchange_names,
    "exchange_slug": exchange_slugs
})

# Swow the first rows to verify
print(df.head())

# Save the dataframe in a csv file
df.to_csv("../../data/csv/cryptocurrencies_exchanges.csv", index=False)

  crypto_id  exchange_id exchange_name exchange_slug
0      6535          270       Binance       binance
1      6535         7680          DOEX          doex
2      6535           21          BTCC          btcc
3      6535          521         Bybit         bybit
4      6535          407     DigiFinex     digifinex


## Convert the redundant data into a list of exchanges

In [6]:
list_df = df.groupby('crypto_id').agg({
    'exchange_id': lambda x: x.tolist(),
    'exchange_name': lambda x: x.tolist(),
    'exchange_slug': lambda x: x.tolist()
}).reset_index()

print(list_df.shape)
list_df.head()

(951, 4)


Unnamed: 0,crypto_id,exchange_id,exchange_name,exchange_slug
0,1,"[270, 7680, 21, 9200, 521, 10361, 407, 9867, 1...","[Binance, DOEX, BTCC, Zedcex Exchange, Bybit, ...","[binance, doex, btcc, zedcex-exchange, bybit, ..."
1,10040,"[302, 380, 6706, 1344, 1281, 1302]","[Gate.io, LATOKEN, PancakeSwap v3 (BSC), Panca...","[gate-io, latoken, pancakeswap-v3, pancakeswap..."
2,10046,"[102, 302, 544, 350, 1069]","[HTX, Gate.io, MEXC, CoinEx, Uniswap v2]","[htx, gate-io, mexc, coinex, uniswap-v2]"
3,10047,"[433, 102, 302, 311, 350, 6706, 1069, 1344, 6444]","[Bitrue, HTX, Gate.io, KuCoin, CoinEx, Pancake...","[bitrue, htx, gate-io, kucoin, coinex, pancake..."
4,10180,"[333, 513, 406, 1064, 102, 525, 302, 544, 955,...","[LBank, Bitget, BitMart, BingX, HTX, XT.COM, G...","[lbank, bitget, bitmart, bingx, htx, xt, gate-..."


The missing 49 coins are due to not having exchange information, but it will be regularized in **'preprocessing-coin.ipynb'**

## Saving the exchanges list dataframe in a CSV

In [7]:
list_df.to_csv('../../data/csv/exchanges_list.csv', index=False)