In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import plotly.graph_objects as go
from matplotlib import pyplot as plt
import time
PATH = "../Data/"

In [2]:
def load(ticker, interval='D'):
    if ticker == "DAX":
        if interval== 'D':
           return  pd.read_csv(PATH+"DAX/[Daily]1999_01_01-2020_04_28.csv")
        elif interval == 'W':
            return  pd.read_csv(PATH+"DAX/[Weekly]1999_01_01-2020_04_28.csv")
        elif interval == 'M':
            return  pd.read_csv(PATH+"DAX/[Monthly]1999_01_01-2020_04_28.csv")
    else:
        return None

def parser(ts, arg=0):
    if (arg > 0 and arg < 4):
        return ts.split("-")[arg]
    else:
        return (ts.split("-"))
    
def extract(data, sData, eData):
    startIndex = endIndex = 0
    date = data[data.columns[0]]
    startIndex = np.where(date == sData)[0][0]
    if isinstance(eData, str):
        endIndex = np.where(date == eData)[0][0]
    elif isinstance(eData, int):
        endIndex = startIndex + eData
        if endIndex > len(date):
            endIndex = len(date) - 1 
    return startIndex, endIndex
        

# Test plot #1
Loading some samples and plot the candlestick graph using Plotly

In [3]:
data = load("DAX")

period = data[:200]

fig = go.Figure(data=[go.Candlestick(x=period[period.columns[0]],
                open=period['open'], high=period['high'],
                low=period['low'], close=period['close'])
                     ])

fig.update_layout(xaxis_rangeslider_visible=False)
fig.show()

# Test update the plot
Test update the plot every 2 seconds 

In [4]:
newPeriod = data[200:300]

for i in range(10):
    s = i*10
    nP = newPeriod[s:s+10]
    fig.add_trace(go.Candlestick(x=nP[nP.columns[0]],
                open=nP['open'], high=nP['high'],
                low=nP['low'], close=nP['close']))

    fig.show()
    time.sleep(2)


# Test polynomial regression
Using Numpy polyfit() to fit a polynom of degree=4 to the data, and add it to the graph. It should also predict the future trend (30 days).

In [5]:
period = data[:300]

x_axis = np.arange(300, dtype=np.float64)
y_axis =period['close'].values
idx = np.isfinite(x_axis) & np.isfinite(y_axis)

k = np.polyfit(x_axis[idx], y_axis[idx], deg=8)
p = np.poly1d(k)

x_axis = np.arange(330)
y_axis = np.asarray([p(x) for x in x_axis], dtype=np.float64)

fig.add_trace(go.Scatter(x=data[:330][data.columns[0]], y=y_axis, name='P',
                         line = dict(color='royalblue', width=4, dash='dash')))
fig.show()

# Check how accurace the prediction is.
By comparing the next 30 days real data to the predictions.
The error is the deviation relative to the true values.
THe accuracy is simply the percentage of predictions that fall within 10% of the true values.


In [6]:
newD = data[300:330]

fig.add_trace(go.Candlestick(x=newD[newD.columns[0]],
                open=newD['open'], high=newD['high'],
                low=newD['low'], close=newD['close']))

fig.show()

idx = np.isfinite(newD['close'])
errors = np.abs((y_axis[300:] - newD['close'].values[idx]) / newD['close'].values[idx])
accuracy = np.where(errors<=0.1)[0].shape[0] / errors.shape[0]
print("Min error (%) : {}".format(np.min(errors)*100))
print("Max error (%) : {}".format(np.max(errors)*100))
print("Avg error (%) : {}".format(np.average(errors)*100))
print("Accuracy (%) : {}".format(accuracy*100))

Min error (%) : 0.15191245962437053
Max error (%) : 34.94031178870462
Avg error (%) : 12.215341989012469
Accuracy (%) : 46.666666666666664
