In [1]:
import pandas as pd
from pytiingo import RESTClient
from dotenv import load_dotenv
from datetime import datetime, timedelta
import os
from math import nan
import time
load_dotenv()
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor



In [2]:
# Statistical indicators

def calc_parabolic_sar(df: pd.DataFrame, af=0.2, steps=10):
    up = True
    sars = [nan] * len(df)
    sar = ep_lo = df.Low.iloc[0]
    ep = ep_hi = df.High.iloc[0]
    aaf = af
    aaf_step = aaf / steps
    af = 0
    for i, (hi, lo) in enumerate(zip(df.high, df.low)):
        # parabolic sar formula:
        sar = sar + af * (ep - sar)
        # handle new extreme points
        if hi > ep_hi:
            ep_hi = hi
            if up:
                ep = ep_hi
                af = min(aaf, af + aaf_step)
        elif lo < ep_lo:
            ep_lo = lo
            if not up:
                ep = ep_lo
                af = min(aaf, af + aaf_step)
        # handle switch
        if up:
            if lo < sar:
                up = not up
                sar = ep_hi
                ep = ep_lo = lo
                af = 0
        else:
            if hi > sar:
                up = not up
                sar = ep_lo
                ep = ep_hi = hi
                af = 0
        sars[i] = sar
    df['sar'] = sars
    return sar


def calc_macd(df: pd.DataFrame, fm_span=12, sm_span=26, span=9):
    # plot macd with standard colors first
    macd = df.close.ewm(fm_span).mean() - df.close.ewm(sm_span).mean()
    df['macd_signal'] = macd.ewm(span).mean()
    df['macd_diff'] = macd - df['macd_signal']
    macd_diff = df['macd_diff']
    macd_signal = df['macd_signal']
    return macd, macd_signal, macd_diff  # pd.DataFrame(df, columns=['macd', 'macd_signal', 'macd_diff'])


def calc_rsi(price, n=14, ax=None):
    diff = price.diff().values
    gains = diff
    losses = -diff
    gains[~(gains > 0)] = 0.0
    losses[~(losses > 0)] = 1e-10  # we don't want divide by zero/NaN
    m = (n - 1) / n
    ni = 1 / n
    g = gains[n] = gains[:n].mean()
    l = losses[n] = losses[:n].mean()
    gains[:n] = losses[:n] = nan
    for i, v in enumerate(gains[n:], n):
        g = gains[i] = ni * v + m * g
    for i, v in enumerate(losses[n:], n):
        l = losses[i] = ni * v + m * l
    rs = gains / losses
    rsi = 100 - (100 / (1 + rs))
    return rsi  # pd.DataFrame(df, columns=['rsi'])


def calc_stochastic_oscillator(df: pd.DataFrame, n=14, m=3, smooth=3):
    lo = df.low.rolling(n).min()
    hi = df.high.rolling(n).max()
    k = 100 * (df.close - lo) / (hi - lo)
    d = k.rolling(m).mean()
    return k,d  # pd.DataFrame(df, columns=['k', 'd'])

In [3]:
# Params

# Store API key in `.env` next to this file in the format API_TOKEN=ac8cb618...
my_token = os.environ.get("MY_TOKEN")
symbol = os.environ.get('SYMBOL')
symbols = os.environ.get('SYMBOLS')
freq = os.environ.get('FREQUENCY')
now = datetime.utcnow()
start_date = datetime.utcnow() - timedelta(days=100)
start_date = str(start_date.strftime('%Y-%m-%d'))

In [4]:
client = RESTClient(token=f'{my_token}', output_format='pandas')
# for symbol in symbols:
df = client.iex.get_prices(ticker=symbol.lower(), startDate=start_date, resampleFreq=freq)
df['date'] = pd.to_datetime(df['date'], utc=True)
df['date'] = df['date'].astype('datetime64[ns]')
date = df['date']
#df['date'] = df['date'].strftime('%Y-%m-%d %H:%M:%S.%fZ')  # Req. format
# date = date.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
df['symbol'] = symbol
df.set_index("date", inplace=False)
# df = df.rename(columns={
#     "date": "timestamp"
# })

low = df['low']
high = df['high']
close = df['close']
price = df['open close high low'.split()]
volume = df['open close volume'.split()]
ma50 = ma200 = vema24 = sar = rsi = stoch = stoch_s = macd = macd_signal = macd_diff = None

  df['date'] = df['date'].astype('datetime64[ns]')


In [5]:

ma50 = df['ma50']= df.close.rolling(50).mean()
ma200 = df['ma200'] = df.close.rolling(200).mean()
vema24 = df['vema24'] = df.volume.ewm(span=24).mean()

macd, macd_signal, macd_diff = calc_macd(df)
k, d = calc_stochastic_oscillator(df)
rsi = calc_rsi(df.close)
df['macd'] = macd
df['macd_signal'] = macd_signal
df['macd_diff'] = macd_diff
df['k'] = k
df['d'] = d
df['rsi'] =rsi


In [6]:
df.tail()

Unnamed: 0,date,close,high,low,open,volume,symbol,ma50,ma200,vema24,macd_signal,macd_diff,macd,k,d,rsi
1911,2022-12-05 19:45:00,10.085,10.115,10.06,10.09,10286.0,SNAP,10.4126,10.20785,12192.257387,-0.051496,-0.046792,-0.098288,12.0,10.982392,26.743292
1912,2022-12-05 20:00:00,10.05,10.14,10.035,10.085,20971.0,SNAP,10.396,10.206475,12894.556796,-0.056578,-0.045741,-0.102319,5.555556,9.13054,24.397075
1913,2022-12-05 20:15:00,10.055,10.07,10.035,10.05,27479.0,SNAP,10.3861,10.205125,14061.312252,-0.061461,-0.04395,-0.105412,8.163265,8.57294,25.403905
1914,2022-12-05 20:30:00,10.085,10.115,10.055,10.055,24172.0,SNAP,10.3781,10.203925,14870.167272,-0.065982,-0.040682,-0.106664,20.408163,11.375661,31.314339
1915,2022-12-05 20:45:00,10.11,10.115,10.06,10.085,69066.0,SNAP,10.3662,10.20285,19205.83389,-0.070031,-0.036445,-0.106476,34.883721,21.151717,35.874082


In [15]:
idx = pd.period_range(start=min(df.date), end=max(df.date))
df.set_index('date').reindex(idx).fillna(0.0).rename_axis('date').reset_index()
df.fillna(method = 'ffill', inplace = True)



In [16]:
df.head()

Unnamed: 0_level_0,date,close,high,low,open,volume,symbol,ma50,ma200,vema24,macd_signal,macd_diff,macd,k,d,rsi
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,2022-08-29 13:30:00,10.675,10.83,10.475,10.475,28573.0,SNAP,,,28573.0,0.0,0.0,0.0,,,
1,2022-08-29 13:45:00,10.65,10.765,10.57,10.675,27295.0,SNAP,,,27907.375,-0.000139,-0.000125,-0.000264,,,
2,2022-08-29 14:00:00,10.605,10.695,10.57,10.65,50528.0,SNAP,,,36084.292655,-0.000453,-0.000537,-0.000991,,,
3,2022-08-29 14:15:00,10.6,10.695,10.59,10.605,29323.0,SNAP,,,34177.064215,-0.000732,-0.000679,-0.001411,,,
4,2022-08-29 14:30:00,10.515,10.68,10.475,10.6,21444.0,SNAP,,,31189.120743,-0.001324,-0.001833,-0.003156,,,


In [9]:
df.tail()

Unnamed: 0,date,close,high,low,open,volume,symbol,ma50,ma200,vema24,macd_signal,macd_diff,macd,k,d,rsi
1911,2022-12-05 19:45:00,10.085,10.115,10.06,10.09,10286.0,SNAP,10.4126,10.20785,12192.257387,-0.051496,-0.046792,-0.098288,12.0,10.982392,26.743292
1912,2022-12-05 20:00:00,10.05,10.14,10.035,10.085,20971.0,SNAP,10.396,10.206475,12894.556796,-0.056578,-0.045741,-0.102319,5.555556,9.13054,24.397075
1913,2022-12-05 20:15:00,10.055,10.07,10.035,10.05,27479.0,SNAP,10.3861,10.205125,14061.312252,-0.061461,-0.04395,-0.105412,8.163265,8.57294,25.403905
1914,2022-12-05 20:30:00,10.085,10.115,10.055,10.055,24172.0,SNAP,10.3781,10.203925,14870.167272,-0.065982,-0.040682,-0.106664,20.408163,11.375661,31.314339
1915,2022-12-05 20:45:00,10.11,10.115,10.06,10.085,69066.0,SNAP,10.3662,10.20285,19205.83389,-0.070031,-0.036445,-0.106476,34.883721,21.151717,35.874082


In [10]:
ts_dataframe = TimeSeriesDataFrame.from_data_frame(
    df,
    id_column="symbol",
    timestamp_column='date',
)
ts_dataframe

Unnamed: 0_level_0,Unnamed: 1_level_0,close,high,low,open,volume,ma50,ma200,vema24,macd_signal,macd_diff,macd,k,d,rsi
item_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
SNAP,2022-08-29 13:30:00,10.675,10.830,10.475,10.475,28573.0,,,28573.000000,0.000000,0.000000,0.000000,,,
SNAP,2022-08-29 13:45:00,10.650,10.765,10.570,10.675,27295.0,,,27907.375000,-0.000139,-0.000125,-0.000264,,,
SNAP,2022-08-29 14:00:00,10.605,10.695,10.570,10.650,50528.0,,,36084.292655,-0.000453,-0.000537,-0.000991,,,
SNAP,2022-08-29 14:15:00,10.600,10.695,10.590,10.605,29323.0,,,34177.064215,-0.000732,-0.000679,-0.001411,,,
SNAP,2022-08-29 14:30:00,10.515,10.680,10.475,10.600,21444.0,,,31189.120743,-0.001324,-0.001833,-0.003156,,,
SNAP,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SNAP,2022-12-05 19:45:00,10.085,10.115,10.060,10.090,10286.0,10.4126,10.207850,12192.257387,-0.051496,-0.046792,-0.098288,12.000000,10.982392,26.743292
SNAP,2022-12-05 20:00:00,10.050,10.140,10.035,10.085,20971.0,10.3960,10.206475,12894.556796,-0.056578,-0.045741,-0.102319,5.555556,9.130540,24.397075
SNAP,2022-12-05 20:15:00,10.055,10.070,10.035,10.050,27479.0,10.3861,10.205125,14061.312252,-0.061461,-0.043950,-0.105412,8.163265,8.572940,25.403905
SNAP,2022-12-05 20:30:00,10.085,10.115,10.055,10.055,24172.0,10.3781,10.203925,14870.167272,-0.065982,-0.040682,-0.106664,20.408163,11.375661,31.314339


In [11]:
static_features = df
static_features.index.rename("item_id", inplace=True)
static_features

Unnamed: 0_level_0,date,close,high,low,open,volume,symbol,ma50,ma200,vema24,macd_signal,macd_diff,macd,k,d,rsi
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,2022-08-29 13:30:00,10.675,10.830,10.475,10.475,28573.0,SNAP,,,28573.000000,0.000000,0.000000,0.000000,,,
1,2022-08-29 13:45:00,10.650,10.765,10.570,10.675,27295.0,SNAP,,,27907.375000,-0.000139,-0.000125,-0.000264,,,
2,2022-08-29 14:00:00,10.605,10.695,10.570,10.650,50528.0,SNAP,,,36084.292655,-0.000453,-0.000537,-0.000991,,,
3,2022-08-29 14:15:00,10.600,10.695,10.590,10.605,29323.0,SNAP,,,34177.064215,-0.000732,-0.000679,-0.001411,,,
4,2022-08-29 14:30:00,10.515,10.680,10.475,10.600,21444.0,SNAP,,,31189.120743,-0.001324,-0.001833,-0.003156,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1911,2022-12-05 19:45:00,10.085,10.115,10.060,10.090,10286.0,SNAP,10.4126,10.207850,12192.257387,-0.051496,-0.046792,-0.098288,12.000000,10.982392,26.743292
1912,2022-12-05 20:00:00,10.050,10.140,10.035,10.085,20971.0,SNAP,10.3960,10.206475,12894.556796,-0.056578,-0.045741,-0.102319,5.555556,9.130540,24.397075
1913,2022-12-05 20:15:00,10.055,10.070,10.035,10.050,27479.0,SNAP,10.3861,10.205125,14061.312252,-0.061461,-0.043950,-0.105412,8.163265,8.572940,25.403905
1914,2022-12-05 20:30:00,10.085,10.115,10.055,10.055,24172.0,SNAP,10.3781,10.203925,14870.167272,-0.065982,-0.040682,-0.106664,20.408163,11.375661,31.314339


In [13]:
predictor = TimeSeriesPredictor(target="close", ignore_time_index=True).fit(ts_dataframe)

TimeSeriesPredictor.fit() called
Fitting with arguments:
{'enable_ensemble': True,
 'evaluation_metric': None,
 'hyperparameter_tune_kwargs': None,
 'hyperparameters': 'default',
 'prediction_length': 1,
 'random_seed': None,
 'target': 'close',
 'time_limit': None}
Provided training data set with 1916 rows, 1 items (item = single time series). Average time series length is 1916.0.
Training artifacts will be saved to: /Users/erahkee/PycharmProjects/BayesianLSTM-master/AutogluonModels/ag-20221205_212033
AutoGluon will save models to AutogluonModels/ag-20221205_212033/
AutoGluon will gauge predictive performance using evaluation metric: 'mean_wQuantileLoss'
	This metric's sign has been flipped to adhere to being 'higher is better'. The reported score can be multiplied by -1 to get the metric value.
Provided columns ['rsi', 'open', 'low', 'high', 'macd_diff', 'ma50', 'vema24', 'ma200', 'macd', 'volume', 'd', 'macd_signal', 'k'] in train_data will be ignored.
tuning_data is None. Will use 

In [None]:
def calc_plot_data(df, indicators):
    '''Returns data for all plots and for the price line.'''
    price = df['Open Close High Low'.split()]
    volume = df['Open Close Volume'.split()]
    ma50 = ma200 = vema24 = sar = rsi = stoch = stoch_s = macd = macd_signal = macd_diff = None
    if 'few' in indicators or 'moar' in indicators:
        ma50 = price.Close.rolling(50).mean()
        ma200 = price.Close.rolling(200).mean()
        vema24 = volume.Volume.ewm(span=24).mean()
    if 'moar' in indicators:
        sar = calc_parabolic_sar(df)
        rsi = calc_rsi(df.Close)
        stoch, stoch_s = calc_stochastic_oscillator(df)
        macd, macd_signal, macd_diff = calc_macd(df)
    plot_data = dict(price=price, volume=volume, ma50=ma50, ma200=ma200, vema24=vema24, sar=sar, rsi=rsi,
                     stoch=stoch, stoch_s=stoch_s, macd=macd, macd_signal=macd_signal, macd_diff=macd_diff)
    # for price line
    last_close = price.iloc[-1].Close
    last_col = fplt.candle_bull_color if last_close > price.iloc[-2].Close else fplt.candle_bear_color
    price_data = dict(last_close=last_close, last_col=last_col)
    return plot_data, price_data