<a href="https://colab.research.google.com/github/CristianCosci/BTC_dataset_Generator_glassnode/blob/main/datasetBuilder_glassnode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from google.colab import drive
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import requests
import math

connected = False

if(not connected):
  drive.mount('/content/drive', force_remount= True)
  path = '/content/drive/MyDrive/progettoBTC/'
  connected = True
  f = open("{}secret.txt".format(path))
  API_KEY = f.read().replace("\n", "")
  f.close()

Mounted at /content/drive


In [6]:
def getEMA(df, time):
  multiplier = 2 / (time + 1)
  name = "SMA_" + str(time)
  name1 = "EMA_" + str(time)
  df[name] = df.close.rolling(window=time).mean() #calcolo sma_time in modo da utilizzarla per il primo ema
  df[name1] = df[name]
  for i in range(1, len(df)):
    if(not math.isnan(df[name1][i-1])):
      df[name1][i] = (df['close'][i] * multiplier) + (df[name1][i-1] * (1 - multiplier))

  return df[name1]

def getMACD(df):
  name = "MACD" 
  df[name] = df["EMA_12"] - df["EMA_26"]

  return df[name]

def getKD(df, time_k, time_d):
  name_k = "K_" + str(time_k)
  name_d = "D_" + str(time_d)
  df[name_k] = df["close"]
  df[name_d] = df["close"]
  for i in range(time_k, len(df)):
    df[name_k][i] = ( df[name_k][i] - df["low"].iloc[i-time_k:i].min() ) / ( df["high"].iloc[i-time_k:i].max() - df["low"].iloc[i-time_k:i].min())
    df[name_k][i] *= 100
  for i in range(time_k + time_d, len(df)):
    sum = 0
    for j in range(time_d):
      sum += df[name_k][i-j]
    df[name_d] = sum / time_d

  return df[name_k] * df[name_d]

def getRSI(df):
  delta = df['close'].diff()
  up = delta.clip(lower=0)
  down = -1*delta.clip(upper=0)
  sma_up = up.rolling(window=14).mean()
  sma_down = down.rolling(window=14).mean()
  rs = sma_up/sma_down
  df['RSI'] = 100 - (100/(1 + rs))

  return df['RSI']

def getWR(df, time):
  name = "WR_" + str(time)
  df[name] = df["close"]
  for i in range(time, len(df)):
    df[name][i] = ( df["high"].iloc[i-time:i].max() - df[name][i] ) / ( df["high"].iloc[i-time:i].max() - df["low"].iloc[i-time:i].min())
    df[name][i] *= 100
  
  return df[name]


def get_financial_data(df):
  print()
  print('CALCULATING FINANCIAL INDICATOR')
  #CALCULATE SIMPLE MOVING AVERAGE 
  df['SMA_50'] = df.close.rolling(window=50).mean()
  df['SMA_200'] = df.close.rolling(window=200).mean()

  #CALCULATE EMA
  df['EMA_12'] = getEMA(df.copy(), 12)
  df['EMA_26'] = getEMA(df.copy(), 26)

  #CALCULATE MACD
  df['MACD'] = getMACD(df.copy())

  #CALCULATE KD
  df['KD'] = getKD(df.copy(), time_k=14, time_d=3)

  #CALCULATE RSI
  df['RSI'] = getRSI(df.copy())

  #CALCULATE WR
  df['WR_14'] = getWR(df.copy(), time=14)

  return df


def download_dataset(df, interval, since, dataset_path, coin):
  query_total = [] # To store an array of dict, each for every query
  with open(path+dataset_path,'r') as file:
    # reading each line
    for line in file:
        count = 0
        # print(line)
        link = line.split()[0] # The first element of the line is the link for the query
        query = {'link': link} # Put in a dict
        labels = [] # To store the word of the line
        for word in line.split()[1:]: # Split the word of the line
            labels.append(word)
            count += 1
        for i in range(0, count, 2): # Step of 2 on reading, beacause need couple (name in new df, name returned by query)
          query[labels[i]] = labels[i+1]
          # print(query)
        query_total.append(query)
        query = {} # Reset dict

  num_richieste = 0
  for i in query_total:
    link = i.pop('link')
    # print(link)
    res = requests.get(link,
    params={'a': coin, 's': since,'i': interval, 'api_key': API_KEY})
    foo_df = pd.json_normalize(json.loads(res.text))
    foo_df["datetime"] = pd.to_datetime(foo_df["t"], unit="s") #timestamp conversion to datetime
    foo_df = foo_df.drop("t", axis=1).set_index("datetime").sort_index()
    for j in i.keys(): # To add a new column in the real dataset -> using couple (name in new df, name returned by query)
      num_richieste += 1
      print(j)
      df[j] = foo_df[i[j]]

  print('Numero richieste per {} {}'.format(coin, num_richieste))
  return df


def get_data(interval, since, dataset_path, eth_data=False, coin='BTC', financial_data = False):
  assert interval == '1h' or interval == '24h'
  df = pd.DataFrame()
  df = download_dataset(df, interval, since, dataset_path, coin)
  if eth_data: # If requested download also eth data
    print()
    print('ETH DATA')
    coin = 'ETH'
    if interval == '1h':
      dataset_path = 'hourly_data_eth.txt'
    elif interval == '24h':
      dataset_path = 'daily_data_eth.txt'
    df = download_dataset(df, interval, since, dataset_path, coin)
    if financial_data:
      df = get_financial_data(df)
  return df

In [None]:
df_hourly = get_data(interval = '1h', since = '1333238400', dataset_path = 'hourly_data.txt', eth_data = True, financial_data = True) # data from Sunday 1 April 2012 00:00:00
df_daily = get_data(interval = '24h', since = '1333238400', dataset_path = 'daily_data.txt', eth_data = True, financial_data = True)

In [8]:
df_hourly.to_csv(path+"BTC_hourly_dataset.csv")
df_daily.to_csv(path+"BTC_daily_dataset.csv")

In [None]:
# FOR DEBUG
res = requests.get('https://api.glassnode.com/v1/metrics/supply/active_24h' ,
    params={'a': 'ETH','s': '1356998400','i': '24h', 'api_key': API_KEY})
foo_df = pd.json_normalize(json.loads(res.text))
print(foo_df)
foo_df["datetime"] = pd.to_datetime(foo_df["t"], unit="s") #timestamp conversion to datetime
foo_df = foo_df.drop("t", axis=1).set_index("datetime").sort_index()

In [None]:
foo_df