## This script generates a pandas DB from crypto data price and some indicators

### Constants

### TaLIB installation

TaLib is a package to generate indicators from data price

In [None]:
import os
# download TA-Lib 
!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar xvzf ta-lib-0.4.0-src.tar.gz
os.chdir('ta-lib') # Can't use !cd in co-lab
!./configure --prefix=/usr
!make
!make install
# wait ~ 2min
os.chdir('../')
!pip install TA-Lib

### Imports

In [1]:
import talib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

### API management

In [None]:
!pip install python-kucoin
!pip install dateparser
!pip install pytz
!pip install tvfeed
!pip install yfinance

In [3]:

# Python Kucoin API documentation: https://python-kucoin.readthedocs.io/en/latest/


# IMPORTS
from kucoin.client import Client

def client_creation():
    return Client(api_key, secret_key, api_passphrase)


In [4]:
# currencies = client.get_currencies()
# for c in currencies:
#   print(c["fullName"])

### Data generation function

Pandas to tensor

In [5]:
import datetime as dt
import dateparser
import pytz

# https://es.stackoverflow.com/questions/93979/problema-al-definir-zona-horaria-mediante-tzinfo-en-datetime-usando-pytz-timezon



Reference: https://sammchardy.github.io/historical-data-download-kucoin/

Kucoin request: https://docs.kucoin.com/#get-klines

Ta-lib documentation: https://mrjbq7.github.io/ta-lib/funcs.html

In [7]:
from datetime import date, datetime, timedelta

def perdelta(start, end, delta):
    curr = start
    dates = []
    while curr < end:
        dates.append(curr)
        curr += delta
    dates.append(end)
    return dates

In [8]:
import requests
import json

def get_historical_data_kucoin(symbol, interval, fromStr, toStr = None):
    """
    symbol 	String 	symbol
    startAt 	long 	[Optional] Start time (second), default is 0
    endAt 	long 	[Optional] End time (second), default is 0
    type 	String 	Type of candlestick patterns: 1min, 3min, 5min, 15min, 30min, 1hour, 2hour, 4hour, 6hour, 8hour, 12hour, 1day, 1week
    """


    base_url = "https://api.kucoin.com"
    path = "/api/v1/market/candles"

    print(dateparser.parse(fromStr))

    from_t = dt.datetime.timestamp(dateparser.parse(fromStr))

    if toStr is None:
        toStr = 'now'
    to_t = dt.datetime.timestamp(dateparser.parse(toStr))



    if interval == "1hour":
        dates = perdelta(dateparser.parse(fromStr), dateparser.parse(toStr), timedelta(days=60))
    elif interval == "30min":
        dates = perdelta(dateparser.parse(fromStr), dateparser.parse(toStr), timedelta(days=30))
    elif interval == "15min":
        dates = perdelta(dateparser.parse(fromStr), dateparser.parse(toStr), timedelta(days=15))

    df = pd.DataFrame()

    for i in range(len(dates)-1):
        parameters = {"symbol": symbol,
                        "startAt": int(dt.datetime.timestamp(dates[i])),
                        "endAt": int(dt.datetime.timestamp(dates[i+1])),
                        "type": interval}

        r = requests.get(base_url + path, params = parameters)
        #for line in r.json()["data"]:
        df_t = pd.DataFrame(r.json()["data"])
        df_t.columns = ["Time", "Open", "Close", "High", "Low", "Volume", "Turnover"]
        df_t = df_t.set_index("Time",)
        df_t = df_t.astype(float)
        df_t = df_t.iloc[::-1]

        df = pd.concat([df,df_t])

    return df

In [9]:
def get_indicators(df, percent = False, norm = False):
  # MMA
  for name, v in zip(["EMA10", "EMA15", "EMA20", "EMA25", \
                        "EMA50", "EMA75", "EMA100", "EMA200"] \
            , [10, 15, 20, 25, 50, 75, 100, 200]):
        df[name] = talib.SMA(df.Close.values, timeperiod=v)

  # RSI
  df["RSI14"] = talib.RSI(df.Close.values, timeperiod=14)

  # Bollinger
  df["boll_upperband"], df["boll_middleband"], df["boll_lowerband"] = talib.BBANDS(df.Close.values, timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)


  # Average Directional Movement Index Rating
  df["ADXR"] = talib.ADXR(df.High.values, df.Low.values, df.Close.values, timeperiod=14)

  # MACD
  df["macd"], df["macdsignal"], df["macdhist"] = talib.MACD(df.Close.values, fastperiod=12, slowperiod=26, signalperiod=9)

  percent_col = ['Open', 'High', 'Low', 'EMA10', 'EMA15',
       'EMA20', 'EMA25', 'EMA50', 'EMA75', 'EMA100', 'EMA200',
       'boll_upperband', 'boll_middleband', 'boll_lowerband']

  norm_col = ['Close', 'Volume', 'Turnover']

  if percent:
      for column in percent_col:
          df[column] = (df[column] - df["Close"]) / df[column] * 100


  return df
  

In [10]:
symbol = "BTC-USDT"
interval = "1hour"
start_time_train = "Gen 1, 2018"
end_time_train = "Dec 31, 2021"
start_time_val = "Jen 7, 2022"
end_time_val = "Feb 26, 2022"

In [None]:
X_train = get_indicators(get_historical_data_kucoin(symbol,interval,start_time_train,end_time_train), percent=True)
display(X_train)
X_val = get_indicators(get_historical_data_kucoin(symbol,interval,start_time_val,end_time_val), percent=True)
display(X_val)

## Normalize

In [13]:
from IPython.core.display import display_pdf
from sklearn.preprocessing import MinMaxScaler

def mean_normalization_train(df, cols):
    means = []
    stds = []
    for col in cols:
        mean = df[col].mean()
        std = df[col].std()
        df[col] = (df[col]-mean)/std
        means.append(mean)
        stds.append(std)
    return df, means, stds

def mean_normalization_test(df, cols, means, stds):
    for col, mean, std in zip(cols, means, stds):
        df[col] = (df[col]-mean)/std
    return df

from sklearn.preprocessing import MinMaxScaler

def minmax_normalization(df, cols, min, max):
    
    min_max_scaler = MinMaxScaler(feature_range = (min,max))
    df[cols] = min_max_scaler.fit_transform(df[cols])
    return df


In [None]:
# norm_col = ['Close', 'Volume', 'Turnover']

# X_train_norm, means, stds = mean_normalization_train(X_train, norm_col)
# X_val_norm = mean_normalization_test(X_val,norm_col, means, stds)

# X_val_norm

In [14]:
norm_col = ['Close', 'Volume', 'Turnover']

X_train_norm = minmax_normalization(X_train, norm_col, 0, 100)
X_val_norm = minmax_normalization(X_val, norm_col, 0, 100)



In [15]:
X_train_norm

Unnamed: 0_level_0,Open,Close,High,Low,Volume,Turnover,EMA10,EMA15,EMA20,EMA25,...,EMA100,EMA200,RSI14,boll_upperband,boll_middleband,boll_lowerband,ADXR,macd,macdsignal,macdhist
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1514764800,-1.859310,15.284072,2.418781,-5.303474,0.071309,0.019235,,,,,...,,,,,,,,,,
1514768400,-1.726466,15.631612,0.000000,-7.121503,0.054366,0.014679,,,,,...,,,,,,,,,,
1514772000,-2.889103,16.377470,0.000000,-8.867599,0.036007,0.009929,,,,,...,,,,,,,,,,
1514775600,5.790275,15.019004,5.790275,0.000000,0.080211,0.021719,,,,,...,,,,,,,,,,
1514779200,2.175871,14.586765,3.969247,-0.929657,0.082325,0.022018,,,,,...,,,,9.339869,3.920819,-2.187243,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1640890800,0.649459,67.829107,0.724739,-0.057224,20.907965,20.617136,-0.221517,-0.654612,-0.984006,-0.983118,...,2.643711,3.862572,53.883939,0.982465,0.024679,-0.951817,35.147811,59.214129,-59.314796,118.528925
1640894400,0.740135,67.290000,0.996808,-0.243990,21.896467,21.397268,0.574310,0.154456,-0.181606,-0.229024,...,3.305785,4.562524,47.584147,1.673294,0.784070,-0.121386,33.735934,39.018344,-39.648168,78.666512
1640898000,-0.194868,67.437460,0.086811,-0.861848,12.732546,12.394836,0.322842,0.007215,-0.308453,-0.439115,...,3.049754,4.355299,49.329198,1.456674,0.457651,-0.561834,32.544106,30.448787,-25.628777,56.077564
1640901600,0.474385,67.094558,0.513503,-0.790197,10.667614,10.351116,0.702647,0.495336,0.212160,0.024056,...,3.445039,4.792167,45.532975,1.851604,0.685429,-0.508793,31.728943,5.486898,-19.405642,24.892540


### Data save on drive

In [16]:
from google.colab import drive

def save_to_drive(df,filename):
  drive.mount('/content/drive')
  path = '/content/drive/My Drive/BOT/'+filename+'.csv'

  with open(path, 'w', encoding = 'utf-8-sig') as f:
    df.to_csv(f)


In [17]:
save_to_drive(X_train,"train_BTCUSDT_1h_Gen-1-2018_Dec-31-2021_percent+minmaxnorm")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:
save_to_drive(X_val,"val_BTCUSDT_1h_Jen-7-2022_Feb-26-2022_percent+minmaxnorm")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
