## This script generates a pandas DB from crypto data price and some indicators

### Constants

### TaLIB installation

TaLib is a package to generate indicators from data price

In [16]:
import os
# download TA-Lib 
!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar xvzf ta-lib-0.4.0-src.tar.gz
os.chdir('ta-lib') # Can't use !cd in co-lab
!./configure --prefix=/usr
!make
!make install
# wait ~ 2min
os.chdir('../')
!pip install TA-Lib

--2022-02-27 15:34:53--  http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
Resolving prdownloads.sourceforge.net (prdownloads.sourceforge.net)... 204.68.111.105
Connecting to prdownloads.sourceforge.net (prdownloads.sourceforge.net)|204.68.111.105|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://downloads.sourceforge.net/project/ta-lib/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz [following]
--2022-02-27 15:34:53--  http://downloads.sourceforge.net/project/ta-lib/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz
Resolving downloads.sourceforge.net (downloads.sourceforge.net)... 204.68.111.105
Reusing existing connection to prdownloads.sourceforge.net:80.
HTTP request sent, awaiting response... 302 Found
Location: http://phoenixnap.dl.sourceforge.net/project/ta-lib/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz [following]
--2022-02-27 15:34:53--  http://phoenixnap.dl.sourceforge.net/project/ta-lib/ta-lib/0.4.0/ta-lib-0.4.0-src.tar.gz
Resolving phoenixna

### Imports

In [17]:
import talib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

### API management

In [18]:
!pip install python-kucoin
!pip install dateparser
!pip install pytz
!pip install tvfeed
!pip install yfinance

[31mERROR: Could not find a version that satisfies the requirement tvfeed (from versions: none)[0m
[31mERROR: No matching distribution found for tvfeed[0m


In [19]:

# Python Kucoin API documentation: https://python-kucoin.readthedocs.io/en/latest/


# IMPORTS
from kucoin.client import Client

def client_creation():
    return Client(api_key, secret_key, api_passphrase)


In [20]:
client = client_creation()

In [21]:
# currencies = client.get_currencies()
# for c in currencies:
#   print(c["fullName"])

### Data generation function

Pandas to tensor

In [22]:
import torch
# determine the supported device
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu') # don't have GPU 
    return device

# convert a df to tensor to be used in pytorch
def df_to_tensor(df):
    device = get_device()
    return torch.from_numpy(df.values).float().to(device)

In [23]:
import datetime as dt
import dateparser
import pytz

# https://es.stackoverflow.com/questions/93979/problema-al-definir-zona-horaria-mediante-tzinfo-en-datetime-usando-pytz-timezon



In [24]:
spanish_timezone = pytz.timezone("Europe/Madrid")
date = dt.datetime(2022,2,19)
print(date)
print(dt.datetime.timestamp(date))

2022-02-19 00:00:00
1645228800.0


Reference: https://sammchardy.github.io/historical-data-download-kucoin/

Kucoin request: https://docs.kucoin.com/#get-klines


Ta-lib documentation: https://mrjbq7.github.io/ta-lib/funcs.html

In [34]:
from datetime import date, datetime, timedelta

def perdelta(start, end, delta):
    curr = start
    dates = []
    while curr < end:
        dates.append(curr)
        curr += delta
    dates.append(end)
    return dates

for result in perdelta(dateparser.parse("Jul 15, 2021"), dateparser.parse("Jul 20, 2021"), timedelta(days=60)):
    print(result)

2021-07-15 00:00:00
2021-07-20 00:00:00


In [39]:
import requests
import json

def get_historical_data_kucoin(symbol, interval, fromStr, toStr = None):
  """
  symbol 	String 	symbol
  startAt 	long 	[Optional] Start time (second), default is 0
  endAt 	long 	[Optional] End time (second), default is 0
  type 	String 	Type of candlestick patterns: 1min, 3min, 5min, 15min, 30min, 1hour, 2hour, 4hour, 6hour, 8hour, 12hour, 1day, 1week
  """


  base_url = "https://api.kucoin.com"
  path = "/api/v1/market/candles"

  print(dateparser.parse(fromStr))

  from_t = dt.datetime.timestamp(dateparser.parse(fromStr))

  if toStr is None:
    toStr = 'now'
  to_t = dt.datetime.timestamp(dateparser.parse(toStr))

  dates = perdelta(dateparser.parse(fromStr), dateparser.parse(toStr), timedelta(days=60))

  df = pd.DataFrame()

  for i in range(len(dates)-1):
    parameters = {"symbol": symbol,
                    "startAt": int(dt.datetime.timestamp(dates[i])),
                    "endAt": int(dt.datetime.timestamp(dates[i+1])),
                    "type": interval}

    r = requests.get(base_url + path, params = parameters)
    #for line in r.json()["data"]:
    df_t = pd.DataFrame(r.json()["data"])
    df_t.columns = ["Time", "Open", "Close", "High", "Low", "Volume", "Turnover"]
    df_t = df_t.set_index("Time",)
    df_t = df_t.astype(float)
    df_t = df_t.iloc[::-1]

    df = pd.concat([df,df_t])

  return df

In [40]:
def get_indicators(df):
  # MMA
  for name, v in zip(["EMA10", "EMA15", "EMA20", "EMA25", \
                        "EMA50", "EMA75", "EMA100", "EMA200"] \
            , [10, 15, 20, 25, 50, 75, 100, 200]):
        df[name] = talib.SMA(df.Close.values, timeperiod=v)

  # RSI
  df["RSI14"] = talib.RSI(df.Close.values, timeperiod=14)

  # Bollinger
  df["boll_upperband"], df["boll_middleband"], df["boll_lowerband"] = talib.BBANDS(df.Close.values, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)


  # Average Directional Movement Index Rating
  df["ADXR"] = talib.ADXR(df.High.values, df.Low.values, df.Close.values, timeperiod=14)

  # MACD
  df["macd"], df["macdsignal"], df["macdhist"] = talib.MACD(df.Close.values, fastperiod=12, slowperiod=26, signalperiod=9)

  return df
  

In [50]:
symbol = "BTC-USDT"
interval = "1hour"
start_time_train = "Jul 1, 2021"
end_time_train = "Sep 30, 2021"
start_time_val = "Jen 7, 2022"
end_time_val = "Feb 26, 2022"

In [51]:
X_train = get_indicators(get_historical_data_kucoin(symbol,interval,start_time_train,end_time_train))
display(X_train)
X_val = get_indicators(get_historical_data_kucoin(symbol,interval,start_time_val,end_time_val))
display(X_val)

2021-07-01 00:00:00


Unnamed: 0_level_0,Open,Close,High,Low,Volume,Turnover,EMA10,EMA15,EMA20,EMA25,EMA50,EMA75,EMA100,EMA200,RSI14,boll_upperband,boll_middleband,boll_lowerband,ADXR,macd,macdsignal,macdhist
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1625097600,35045.4,34732.0,35056.5,34712.9,249.725058,8.697831e+06,,,,,,,,,,,,,,,,
1625101200,34729.6,34939.5,35024.8,34709.9,104.785289,3.654937e+06,,,,,,,,,,,,,,,,
1625104800,34939.6,34304.4,34967.5,34216.6,347.102823,1.197044e+07,,,,,,,,,,,,,,,,
1625108400,34304.3,34120.1,34401.4,34113.7,159.266478,5.456363e+06,,,,,,,,,,,,,,,,
1625112000,34120.0,34324.4,34331.6,34058.4,130.843135,4.475068e+06,,,,,,,,,,35090.139897,34484.08,33878.020103,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1632942000,41309.6,41195.5,41531.6,41145.2,214.541763,8.859476e+06,41816.14,41999.726667,41965.730,41885.416,42034.434,42546.805333,42561.532,42771.9405,35.684942,41764.308365,41384.62,41004.931635,20.413391,-174.671556,-67.607332,-107.064223
1632945600,41201.6,41130.0,41266.5,40909.7,243.906709,1.001369e+07,41686.22,41921.953333,41942.235,41866.600,41996.522,42519.929333,42546.506,42761.9750,34.748558,41526.082105,41270.82,41015.557895,20.663143,-207.781101,-95.642086,-112.139015
1632949200,41129.9,41246.9,41284.8,40963.6,160.399450,6.602599e+06,41571.23,41841.746667,41926.670,41845.668,41961.344,42494.001333,42533.086,42752.5685,37.881468,41341.362956,41220.74,41100.117044,21.084274,-222.028395,-120.919348,-101.109047
1632952800,41246.9,41362.5,41460.0,41043.4,151.395896,6.248102e+06,41502.19,41768.993333,41909.505,41825.992,41935.152,42468.910667,42521.000,42747.1955,40.903160,41414.819642,41249.70,41084.580358,21.131443,-221.438915,-141.023261,-80.415654


2022-01-07 00:00:00


Unnamed: 0_level_0,Open,Close,High,Low,Volume,Turnover,EMA10,EMA15,EMA20,EMA25,EMA50,EMA75,EMA100,EMA200,RSI14,boll_upperband,boll_middleband,boll_lowerband,ADXR,macd,macdsignal,macdhist
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1641513600,43086.1,42781.0,43149.3,42730.8,346.122587,1.484418e+07,,,,,,,,,,,,,,,,
1641517200,42781.0,43007.6,43029.6,42781.0,218.263203,9.369814e+06,,,,,,,,,,,,,,,,
1641520800,43007.6,42795.9,43025.0,42773.2,699.659983,3.002038e+07,,,,,,,,,,,,,,,,
1641524400,42796.0,41799.1,42816.8,41544.5,1444.188855,6.080318e+07,,,,,,,,,,,,,,,,
1641528000,41799.2,41694.1,41904.4,41000.0,1127.332738,4.676717e+07,,,,,,,,,,43521.611872,42415.54,41309.468128,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1645815600,38654.6,38678.5,38959.9,38544.0,369.559543,1.432172e+07,39015.68,38854.826667,38804.050,38654.996,37372.068,37663.606667,37614.696,38695.3140,53.942621,39676.982814,39021.74,38366.497186,34.364315,424.440450,511.292329,-86.851880
1645819200,38678.5,39126.9,39233.9,38608.0,349.651924,1.360981e+07,39070.33,38899.953333,38848.755,38728.024,37398.914,37683.497333,37619.094,38674.8845,59.182707,39512.066585,38963.92,38415.773415,33.685979,414.697143,491.973292,-77.276149
1645822800,39126.8,38961.6,39227.7,38931.2,239.797278,9.367931e+06,39092.40,38916.126667,38868.620,38750.164,37425.802,37700.132000,37623.439,38658.4270,56.625064,39245.341509,38880.10,38514.858491,32.767562,389.151281,471.408890,-82.257609
1645826400,38961.6,39431.7,39492.7,38857.9,238.025236,9.323738e+06,39099.37,38980.226667,38900.195,38791.512,37460.846,37718.269333,37640.211,38645.9630,61.695002,39551.153568,38971.30,38391.446432,32.346021,402.202799,457.567672,-55.364873


### Data save on drive

In [48]:
from google.colab import drive

def save_to_drive(df,filename):
  drive.mount('/content/drive')
  path = '/content/drive/My Drive/BOT/'+filename+'.csv'

  with open(path, 'w', encoding = 'utf-8-sig') as f:
    df.to_csv(f)


In [49]:
#save_to_drive(X_train,"train_BTCUSDT_1h_Jul-1-2021_Sep-30-2021")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [52]:
#save_to_drive(X_val,"val_BTCUSDT_1h_Jen-7-2022_Feb-26-2022")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
