In [2]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import talib
from scipy import stats
import math

# Load data


In [4]:
PATH = "../Data/DAX/yahoo_fin/"

def load(filename="1d"):
     data = pd.read_csv(PATH+"{}.csv".format(filename))
     return data
def save(filename, data):
     data.to_csv(PATH+filename+".csv")

data = load("1d")
period = data[200:400]
fig = go.Figure(data=[go.Candlestick(x=period[period.columns[0]],
                open=period['open'], high=period['high'],
                low=period['low'], close=period['close'])
                     ])

fig.update_layout(xaxis_rangeslider_visible=False)
fig.show()

# Clean the data

Get rid of None

@1030
@2564 (volume)


In [5]:
def clean(data):
    id1 = 0
    for i in range(len(data["close"])):
        if (not np.isnan(data["close"][i]) and data["close"][i] != data["open"][i]):
            id1 = i
            break

    if (id1 > 0):
        samples1 = data.drop(np.arange(id1))
    else:
        samples1 = data
    
    samples1 = samples1.drop(columns=[samples1.columns[0], "ticker", "adjclose"])

    samples1.reset_index(drop=True, inplace=True)
    samples1 = samples1.drop(np.where(np.isnan(samples1["close"]))[0])

    return samples1

dataD = load("1d")
dataW = load("1wk")
dataM = load("1mo")

dataD = clean(dataD)
dataW = clean(dataW)
dataM = clean(dataM)






# Compute difference(%), derivative, direction (theta), reliability (phi)


In [7]:
def compute_diff(data, intra=False):
    if not intra:
        close = data["close"].values
        N = len(close)
        diff = np.zeros((N, 1))
        for i in range(1, N):
            diff[i] = (close[i] - close[i-1]) / close[i-1] * 100
        return diff 

    else:
        op = data["open"].values
        cl = data["close"].values
        N = len(cl)
        diff = np.zeros(N)
        for i in range(0, N):
            diff[i] = (cl[i] - op[i]) / op[i] * 100
        return diff 

def compute_derivative(data, intra=False, h=1):
    if not intra:
        pr = data["close"]
        g = np.gradient(pr.values)
        return g
    else:
        val = data.values
        g = np.zeros(len(val))
        for i in range(len(val)):
            g[i] = np.average(np.gradient(val[i][0:4]))
        return g

def compute_direction(data, period=1):
    val = data.values
    N = len(val)
    rad = np.zeros(N)
    errors = np.zeros(N)
    arr = np.zeros(period*4)

    for i in range(N):
        arr = val[i:i+period][:,0:4].ravel()
        slope, _, _, _, std_err = stats.linregress(np.arange(len(arr)),arr)
        rad[i] = math.atan(slope)
        errors[i] = math.atan(std_err)
    return rad, errors





In [None]:
intra_diff = compute_diff(dataD, intra=True)
inter_diff = compute_diff(dataD, intra=False)
intra_derivative = compute_derivative(dataD, intra=True)
inter_derivative = compute_derivative(dataD, intra=False)
rad_1d, err_1d = compute_direction(dataD, period=1)
rad_3d, err_3d = compute_direction(dataD, period=3)
rad_5d, err_5d = compute_direction(dataD, period=5)
rad_20d, err_20d = compute_direction(dataD, period=20)

dataD["intra-diff"] = intra_diff
dataD["inter-diff"] = inter_diff
dataD["intra-derivative"] = intra_derivative
dataD["inter-derivative"] = inter_derivative
dataD["1-rad"] = rad_1d
dataD["1-err"] = err_1d
dataD["3-rad"] = rad_3d
dataD["3-err"] = err_3d
dataD["5-rad"] = rad_5d
dataD["5-err"] = err_5d
dataD["20-rad"] = rad_20d
dataD["20-err"] = err_20d

intra_diff = compute_diff(dataW, intra=True)
inter_diff = compute_diff(dataW, intra=False)
intra_derivative = compute_derivative(dataW, intra=True)
inter_derivative = compute_derivative(dataW, intra=False)
rad_1d, err_1d = compute_direction(dataW, period=1)
rad_4d, err_4d = compute_direction(dataW, period=4)

dataW["intra-diff"] = intra_diff
dataW["inter-diff"] = inter_diff
dataW["intra-derivative"] = intra_derivative
dataW["inter-derivative"] = inter_derivative
dataW["1-rad"] = rad_1d
dataW["1-err"] = err_1d
dataW["4-rad"] = rad_4d
dataW["4-err"] = err_4d

intra_diff = compute_diff(dataM, intra=True)
inter_diff = compute_diff(dataM, intra=False)
intra_derivative = compute_derivative(dataM, intra=True)
inter_derivative = compute_derivative(dataM, intra=False)
rad_1d, err_1d = compute_direction(dataM, period=1)
rad_12d, err_12d = compute_direction(dataM, period=12)

dataM["intra-diff"] = intra_diff
dataM["inter-diff"] = inter_diff
dataM["intra-derivative"] = intra_derivative
dataM["inter-derivative"] = inter_derivative
dataM["1-rad"] = rad_1d
dataM["1-err"] = err_1d
dataM["12-rad"] = rad_12d
dataM["12-err"] = err_12d


In [68]:

save("data_D", dataD)
save("data_W", dataW)
save("data_M", dataM)

In [58]:


upper, middle, lower = talib.BBANDS(dataD["close"])
dataD["BB-upper"] = upper
dataD["BB-middle"] = middle
dataD["BB-lower"] = lower

upper, middle, lower = talib.BBANDS(dataW["close"])
dataW["BB-upper"] = upper
dataW["BB-middle"] = middle
dataW["BB-lower"] = lower

upper, middle, lower = talib.BBANDS(dataM["close"])
dataM["BB-upper"] = upper
dataM["BB-middle"] = middle
dataM["BB-lower"] = lower



In [59]:
ema5 = talib.EMA(dataD["close"], 5)
ema20 = talib.EMA(dataD["close"], 20)
dataD["ema5"] = ema5
dataD["ema20"] = ema20

ema5 = talib.EMA(dataW["close"], 5)
ema20 = talib.EMA(dataW["close"], 20)
dataW["ema5"] = ema5
dataW["ema20"] = ema20

ema5 = talib.EMA(dataM["close"], 5)
ema20 = talib.EMA(dataM["close"], 20)
dataM["ema5"] = ema5
dataM["ema20"] = ema20


In [60]:
sma50 = talib.SMA(dataD["close"], 50)
sma200 = talib.SMA(dataD["close"], 200)
dataD["sma50"] = sma50
dataD["sma200"] = sma200

sma50 = talib.SMA(dataW["close"], 50)
sma200 = talib.SMA(dataW["close"], 200)
dataW["sma50"] = sma50
dataW["sma200"] = sma200

sma50 = talib.SMA(dataM["close"], 50)
sma200 = talib.SMA(dataM["close"], 200)
dataM["sma50"] = sma50
dataM["sma200"] = sma200

In [61]:
sar = talib.SAR(dataD["high"], dataD["low"], 0.02, 0.2)
dataD["sar"] = sar

sar = talib.SAR(dataW["high"], dataW["low"], 0.02, 0.2)
dataW["sar"] = sar

sar = talib.SAR(dataM["high"], dataM["low"], 0.02, 0.2)
dataM["sar"] = sar

In [62]:
adx = talib.ADX(dataD["high"], dataD["low"], dataD["close"], 14)
dataD["adx"] = adx

adx = talib.ADX(dataW["high"], dataW["low"], dataW["close"], 14)
dataW["adx"] = adx

adx = talib.ADX(dataM["high"], dataM["low"], dataM["close"], 14)
dataM["adx"] = adx



In [63]:
macd, macdsignal, macdhist = talib.MACD(dataD["close"], fastperiod=12, slowperiod=26, signalperiod=9)
dataD["macd"] = macd
dataD["macdsignal"] = macdsignal
dataD["macdhist"] = macdhist

macd, macdsignal, macdhist = talib.MACD(dataW["close"], fastperiod=12, slowperiod=26, signalperiod=9)
dataW["macd"] = macd
dataW["macdsignal"] = macdsignal
dataW["macdhist"] = macdhist

macd, macdsignal, macdhist = talib.MACD(dataM["close"], fastperiod=12, slowperiod=26, signalperiod=9)
dataM["macd"] = macd
dataM["macdsignal"] = macdsignal
dataM["macdhist"] = macdhist


In [64]:
mom = talib.MOM(dataD["close"], timeperiod=10)
dataD["mom"] = mom

mom = talib.MOM(dataW["close"], timeperiod=10)
dataW["mom"] = mom

mom = talib.MOM(dataM["close"], timeperiod=10)
dataM["mom"] = mom


In [65]:
rsi = talib.RSI(dataD["close"], timeperiod=14)
dataD["rsi"] = rsi

rsi = talib.RSI(dataW["close"], timeperiod=14)
dataW["rsi"] = rsi

rsi = talib.RSI(dataM["close"], timeperiod=14)
dataM["rsi"] = rsi



In [66]:
stoch_k, stoch_d = talib.STOCH(dataD["high"], dataD["low"], dataD["close"], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
dataD["stoch_k"] = stoch_k
dataD["stoch_d"] = stoch_d

stoch_k, stoch_d = talib.STOCH(dataW["high"], dataW["low"], dataW["close"], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
dataW["stoch_k"] = stoch_k
dataW["stoch_d"] = stoch_d

stoch_k, stoch_d = talib.STOCH(dataM["high"], dataM["low"], dataM["close"], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
dataM["stoch_k"] = stoch_k
dataM["stoch_d"] = stoch_d


In [67]:
willr = talib.WILLR(dataD["high"], dataD["low"], dataD["close"], timeperiod=14)
dataD["willr"] = willr

willr = talib.WILLR(dataW["high"], dataW["low"], dataW["close"], timeperiod=14)
dataW["willr"] = willr

willr = talib.WILLR(dataM["high"], dataM["low"], dataM["close"], timeperiod=14)
dataM["willr"] = willr


In [35]:
def prepare(data, interval="1d"):
    
    intra_diff = compute_diff(data, intra=True)
    inter_diff = compute_diff(data, intra=False)
    intra_derivative = compute_derivative(data, intra=True)
    inter_derivative = compute_derivative(data, intra=False)

    rad_1d, err_1d = compute_direction(data, period=1)
    data["intra-diff"] = intra_diff
    data["inter-diff"] = inter_diff
    data["intra-derivative"] = intra_derivative
    data["inter-derivative"] = inter_derivative
    data["1-rad"] = rad_1d
    data["1-err"] = err_1d


    upper, middle, lower = talib.BBANDS(data["close"])
    data["BB-upper"] = upper
    data["BB-middle"] = middle
    data["BB-lower"] = lower

    ema5 = talib.EMA(data["close"], 5)
    ema20 = talib.EMA(data["close"], 20)
    data["ema5"] = ema5
    data["ema20"] = ema20

    sma50 = talib.SMA(data["close"], 50)
    sma200 = talib.SMA(data["close"], 200)
    data["sma50"] = sma50
    data["sma200"] = sma200

    sar = talib.SAR(data["high"], data["low"], 0.02, 0.2)
    data["sar"] = sar

    adx = talib.ADX(data["high"], data["low"], data["close"], 14)
    data["adx"] = adx

    macd, macdsignal, macdhist = talib.MACD(data["close"], fastperiod=12, slowperiod=26, signalperiod=9)
    data["macd"] = macd
    data["macdsignal"] = macdsignal
    data["macdhist"] = macdhist

    mom = talib.MOM(data["close"], timeperiod=10)
    data["mom"] = mom

    rsi = talib.RSI(data["close"], timeperiod=14)
    data["rsi"] = rsi

    stoch_k, stoch_d = talib.STOCH(data["high"], data["low"], data["close"], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
    data["stoch_k"] = stoch_k
    data["stoch_d"] = stoch_d

    willr = talib.WILLR(data["high"], data["low"], data["close"], timeperiod=14)
    data["willr"] = willr
    
    data["bb-upper"] = data["BB-upper"] - data["close"]
    data["bb-lower"] = data["close"] - data["BB-lower"]
    data["bb-middle"] = data["close"] - data["BB-middle"]
    data["ema-cross"] = data["ema5"] - data["ema20"]
    data["sma-cross"] = data["sma50"] - data["sma200"]
    data["stoch-diff"] = data["stoch_k"] - data["stoch_d"] 
    data["sar-diff"] = data["close"] - data["sar"]

 
    
    save("data_{}".format(interval), data)

In [41]:
newData = clean(load("new"))
oldData = load("day")
data = oldData.append(newData, ignore_index=True)

if ("Unnamed: 0" in data.columns):
    data = data.drop(columns=["Unnamed: 0"])
prepare(data)