The goal is to scrape data related to derivatives of cryptocurrencies markets in order to find patterns, gather data over the course of time with the ultimate goal of training ML models for predictions purposes.

Cryptocurrencies Derivatives data are the following:
    Funding Rates
    Open Interest of Futures & Options

In [72]:
from requests_html import HTMLSession
import pandas as pd

url = 'https://defirate.com/funding/'
s = HTMLSession()
r = s.get(url)

# r.html.render(sleep = 3)
# print(r.status_code)

products = r.html.xpath('//*[@id="main-table"]', first = True)
print(products.text)

with open("funding_rates.txt", "w") as f:
    f.write(products.text)

Binance
Trade
dYdX
Trade
FTX
BitMEX
BTC
-0.008009%
{ "@context":"http://schema.org", "@type":"LoanOrCredit", "name":"Binance BTC Loan", "loanTerm":{ "@type":"QuantitativeValue", "value":"365", "unitCode": "DAY" }, "annualPercentageRate":[ { "@type":"QuantitativeValue", "name": "variable interest rates", "value":"-0.008009" } ], "amount":[ { "@type": "MonetaryAmount", "name":"BTC Loan", "value":"1000", "currency":"BTC" } ] }
-0.00191616%
{ "@context":"http://schema.org", "@type":"LoanOrCredit", "name":"dYdX BTC Loan", "loanTerm":{ "@type":"QuantitativeValue", "value":"365", "unitCode": "DAY" }, "annualPercentageRate":[ { "@type":"QuantitativeValue", "name": "variable interest rates", "value":"-0.00191616" } ], "amount":[ { "@type": "MonetaryAmount", "name":"BTC Loan", "value":"1000", "currency":"BTC" } ] }
0.008%
{ "@context":"http://schema.org", "@type":"LoanOrCredit", "name":"FTX BTC Loan", "loanTerm":{ "@type":"QuantitativeValue", "value":"365", "unitCode": "DAY" }, "annualPercentage

Converting the Data saved in .txt into a CSV for Optimal Data Cleaning.

In [73]:
# pip install xlwt

In [74]:
#Reading file & creating dataframe in a csv
import time
import xlwt
import csv

date_string = time.strftime('%Y-%m-%d_%H.%M')
with open('funding_rates.txt', 'r') as in_file:
    stripped = (line.strip() for line in in_file)
    lines = (line.split("\n") for line in stripped if line)

    #Adding Date + Time to our CSV filename

    with open("funding_rates_"+ date_string+ ".csv", "w") as out_file:
        writer = csv.writer(out_file)
        writer.writerows(('title', 'intro'))
        writer.writerows(lines)
        df =out_file

df = pd.read_csv("funding_rates_"+date_string+".csv", encoding = "ISO-8859-1")
df

Unnamed: 0,t,i,t.1,l,e
0,i,n,t,r,o
1,Binance,,,,
2,Trade,,,,
3,dYdX,,,,
4,Trade,,,,
5,FTX,,,,
6,BitMEX,,,,
7,BTC,,,,
8,-0.008009%,,,,
9,"{ ""@context"":""http://schema.org"", ""@type"":""Loa...",,,,


Data Cleaning & Manipulation for better presentability \n
NOTE: df is  the dataframe the bot is working with

In [75]:
# Setting Pandas as to show all rows here 
pd.set_option('display.max_rows', df.shape[0]+1)

# Dropping needless rows at the top and emtpy columns i, t.1, l ,e
df = df.drop(labels=[1,2,3,4,5,6,], axis = 0)
df = df.drop(columns = ["i", "t.1", 'l', "e"], axis =1)
df.reset_index(drop=True)

# Renaming a column for better readability
df = df.rename(columns = {'t': "messed up data 2 clean"})

#Taking a look at our Dataframe
print(df.head(20))

                               messed up data 2 clean
0                                                   i
7                                                 BTC
8                                          -0.008009%
9   { "@context":"http://schema.org", "@type":"Loa...
10                                       -0.00191616%
11  { "@context":"http://schema.org", "@type":"Loa...
12                                             0.008%
13  { "@context":"http://schema.org", "@type":"Loa...
14                                              0.01%
15  { "@context":"http://schema.org", "@type":"Loa...
16                                                ETH
17                                              0.01%
18  { "@context":"http://schema.org", "@type":"Loa...
19                                        0.01960928%
20  { "@context":"http://schema.org", "@type":"Loa...
21                                            0.0016%
22  { "@context":"http://schema.org", "@type":"Loa...
23                          

In [76]:
# As you can see in the data within the "messed up data 2 clean" column main separator is a comma
# We rely on str.split with comma to split this columns amongst many to better process for cleaning 

df = df['messed up data 2 clean'].str.split(',', expand=True)

# We want the Exchange's name located in column 2 
            # the funding rate located in column 8
            # the cryptocurrency located in column 12.


#Dropping useless columns 
cols = [0,1,3,4,5,6,7,9,10,11]
df.drop(df.columns[cols], axis = 1, inplace = True)


#Reordering columns for better readbility
#Renaming columns as aforementioned columns of interest 
df = df.reindex(columns=[12,8,2])
df.rename(columns = ({12: "Crypto", 8:'Funding Rate', 2:'Exchange' }))


Unnamed: 0,Crypto,Funding Rate,Exchange
0,,,
7,,,
8,,,
9,"""currency"":""BTC"" } ] }","""value"":""-0.008009"" } ]","""name"":""Binance BTC Loan"""
10,,,
11,"""currency"":""BTC"" } ] }","""value"":""-0.00191616"" } ]","""name"":""dYdX BTC Loan"""
12,,,
13,"""currency"":""BTC"" } ] }","""value"":""0.008"" } ]","""name"":""FTX BTC Loan"""
14,,,
15,"""currency"":""BTC"" } ] }","""value"":""0.01"" } ]","""name"":""BitMEX BTC Loan"""


In [77]:
df

Unnamed: 0,12,8,2
0,,,
7,,,
8,,,
9,"""currency"":""BTC"" } ] }","""value"":""-0.008009"" } ]","""name"":""Binance BTC Loan"""
10,,,
11,"""currency"":""BTC"" } ] }","""value"":""-0.00191616"" } ]","""name"":""dYdX BTC Loan"""
12,,,
13,"""currency"":""BTC"" } ] }","""value"":""0.008"" } ]","""name"":""FTX BTC Loan"""
14,,,
15,"""currency"":""BTC"" } ] }","""value"":""0.01"" } ]","""name"":""BitMEX BTC Loan"""
