Iteration through Dataframes folder, to clean & organize each CSV obtained from our bot on deepnote.com.
First, we create an empty dataframe named master_df below which we stack the csv collected on deepnotes.
Then we drop useless rows and generate dataframes for each crypto of interest.

In [159]:
#Importing Modules
from pathlib import Path
import pandas as pd

directory = r'C:\Users\Fluffy\Desktop\Projects Coding\CoinGlass_Derivatives_Data_Scraping\Dataframes'

#Creating empty dataframe, 'master df', which we will stack on 
master_df = pd.DataFrame(columns=['Crypto', 'Funding Rate', 'Exchange', 'date_time'])


files = Path(directory).glob('*')
for file in files:
    x = pd.read_csv(file)
    x.drop(['Unnamed: 0'], axis = 1, inplace=True)
    #Stacking looped df named x with initial empty df
    master_df = pd.concat([df, x], ignore_index = True, axis=0)
print(master_df)

      Crypto  Funding Rate       Exchange                   date_time
0     "BTC"      -0.008009  "Binance BTC   2022-05-23 21:48:12.979297
1     "BTC"      -0.011199     "dYdX BTC   2022-05-23 21:48:12.979297
2     "BTC"      -0.007200      "FTX BTC   2022-05-23 21:48:12.979297
3     "BTC"       0.004300   "BitMEX BTC   2022-05-23 21:48:12.979297
4     "ETH"       0.010000  "Binance ETH   2022-05-23 21:48:12.979297
...      ...           ...            ...                         ...
5564  "ZIL"      -0.027128  "Binance ZIL   2022-05-26 20:01:38.111193
5565  "ZIL"      -0.024800      "FTX ZIL   2022-05-26 20:01:38.111193
5566  "ZRX"      -0.023252  "Binance ZRX   2022-05-26 20:01:38.111193
5567  "ZRX"       0.010000     "dYdX ZRX   2022-05-26 20:01:38.111193
5568  "ZRX"      -0.014400      "FTX ZRX   2022-05-26 20:01:38.111193

[5569 rows x 4 columns]


date_time Column is stored as an object and it will be inefficient to work with dates as strings  and mostly memory inefficient
Let's convert this column to datime64[ns]

In [160]:
#Checking data types in our dataframe
master_df_types = master_df.dtypes
print(master_df_types)


master_df['date_time'] = pd.to_datetime(master_df['date_time'])
master_df['date_time'].dtype


Crypto           object
Funding Rate    float64
Exchange         object
date_time        object
dtype: object


dtype('<M8[ns]')

In [161]:
# df['Crypto'].astype('string')

# condition = df['Exchange'] =='Binance BTC'
# btc_df =df[condition]
#Removing Rows including BTCBU,BTCDO, BTCST ETHBU ETHBUST ETHT
master_df.drop(master_df[master_df['Crypto'].str.contains('BTCBU')].index, inplace=True)
master_df.drop(master_df[master_df['Crypto'].str.contains('BTCDO')].index, inplace=True)
master_df.drop(master_df[master_df['Crypto'].str.contains('BTCST')].index, inplace=True)
master_df.drop(master_df[master_df['Crypto'].str.contains('ETHBU')].index, inplace=True)
master_df.drop(master_df[master_df['Crypto'].str.contains('ETHUBUSD')].index, inplace=True)
master_df.drop(master_df[master_df['Crypto'].str.contains('ETHT')].index, inplace=True)


#Taking a look at our df to ensure we are on the right path
master_df.head(25)

Unnamed: 0,Crypto,Funding Rate,Exchange,date_time
0,"""BTC""",-0.008009,"""Binance BTC",2022-05-23 21:48:12.979297
1,"""BTC""",-0.011199,"""dYdX BTC",2022-05-23 21:48:12.979297
2,"""BTC""",-0.0072,"""FTX BTC",2022-05-23 21:48:12.979297
3,"""BTC""",0.0043,"""BitMEX BTC",2022-05-23 21:48:12.979297
4,"""ETH""",0.01,"""Binance ETH",2022-05-23 21:48:12.979297
5,"""ETH""",0.019008,"""dYdX ETH",2022-05-23 21:48:12.979297
6,"""ETH""",-0.0016,"""FTX ETH",2022-05-23 21:48:12.979297
7,"""ETH""",0.0788,"""BitMEX ETH",2022-05-23 21:48:12.979297
8,"""XRP""",0.006868,"""Binance XRP",2022-05-23 21:48:12.979297
9,"""XRP""",-0.0144,"""FTX XRP",2022-05-23 21:48:12.979297


In [162]:
#Function to check the list of all cryptos on Funding Rates page in coinglass.com
#Uncomment the lines belowif you want to check the list

# master_df
# uniques = []
# for name in master_df['Crypto']:
#     if name not in uniques:
#         uniques.append(name)
# print(uniques)
    

The main dataframe, master_df, is now clean and organized, we will proceed to extract rows based on a name condition to store the data in new individual dataframes for each cryptocurrency of interest.
We mainly are interested in:

BTC, ETH, XMR, LINK, BCH, BNB, LTC, ETC, EOS, DOGE, 1INCH, AAVE, ADA, AKRO, ALGO, ALICE,ALPHA, ANC, ANKR, APE , APET, ATOM, AUDIO , AVAX, BAT, BEL, BZRX, C98, CAKE, CELR,COTI, CRV, CTK, CTSI, CVC, DASH, DENT, DGB, DODO, DUSK, DYDX, EGLD, ENJ, EOS, FIL, FLM, FTM, FTT, GAL, GALA, GMT, GRT, GST, HBAR, ICP, ICX, JASMY, KSM, LINA, LIT, LRC, MANA, MASK, MATIC, MINA, MKR, MTL, NEO, OCEAN, ONT,  QTUM, RAY, REEF, ROSE, SAND, SHIB, SKL, SNX, SOL, SRM, STORJ, STX, SUSHI, SXP, THETA, TLM, TOMO, TRX, UNFI,UNI,WAVES, XMR, YFI, ZEC, ZIL, ZRX   

In [163]:
crypto_list = ['BTC', 'ETH', 'XMR', 'LINK', 'BCH', 'BNB', 'LTC', 'ETC', 'EOS', 'DOGE', '1INCH', 'AAVE', 'ADA', 'AKRO', 'ALGO', 'ALICE',
                'ALPHA', 'ANC', 'ANKR', 'APE' , 'APET', 'ATOM', 'AUDIO' , 'AVAX', 'BAT', 'BEL', 'BZRX', 'C98', 'CAKE', 'CELR','COTI', 'CRV', 
                'CTK', 'CTSI', 'CVC', 'DASH', 'DENT', 'DGB', 'DODO', 'DUSK', 'DYDX', 'EGLD', 'ENJ', 'EOS', 'FIL', 
                'FLM', 'FTM', 'FTT', 'GAL', 'GALA', 'GMT', 'GRT', 'GST', 'HBAR', 'ICP', 'ICX', 'JASMY', 'KSM', 'LINA', 'LIT', 'LRC', 'MANA', 'MASK', 'MATIC', 'MINA', 'MKR,'
                'MTL', 'NEO', 'OCEAN', 'ONT',  'QTUM', 'RAY', 'REEF', 'ROSE', 'SAND', 'SHIB', 'SKL', 'SNX', 'SOL', 'SRM', 'STORJ', 'STX', 'SUSHI', 
                'SXP', 'THETA', 'TLM', 'TOMO', 'TRX', 'UNFI','UNI','WAVES', 'XMR', 'YFI', 'ZEC', 'ZIL', 'ZRX'  ]


btc_df = master_df[master_df['Crypto'].str.contains("BTC")]
btc_df = btc_df.reset_index()
btc_df = btc_df.drop(['index'], axis = 1)
btc_df




Unnamed: 0,Crypto,Funding Rate,Exchange,date_time
0,"""BTC""",-0.008009,"""Binance BTC",2022-05-23 21:48:12.979297
1,"""BTC""",-0.011199,"""dYdX BTC",2022-05-23 21:48:12.979297
2,"""BTC""",-0.0072,"""FTX BTC",2022-05-23 21:48:12.979297
3,"""BTC""",0.0043,"""BitMEX BTC",2022-05-23 21:48:12.979297
4,"""BTC""",-0.008009,"""Binance BTC",2022-05-23 22:17:12.899760
5,"""BTC""",-0.011199,"""dYdX BTC",2022-05-23 22:17:12.899760
6,"""BTC""",-0.0072,"""FTX BTC",2022-05-23 22:17:12.899760
7,"""BTC""",0.0043,"""BitMEX BTC",2022-05-23 22:17:12.899760
8,"""BTC""",-0.008009,"""Binance BTC",2022-05-23 22:23:49.165557
9,"""BTC""",-0.013805,"""dYdX BTC",2022-05-23 22:23:49.165557
