In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
import yfinance as yf
import ta 
from ta import add_all_ta_features
from ta.utils import dropna
from sklearn.preprocessing import MinMaxScaler

print('Tensorflow version: {}'.format(tf.__version__))

import matplotlib.pyplot as plt
plt.style.use('seaborn')

import warnings
warnings.filterwarnings('ignore')


# ichimoku, macd, rsi, obv

Tensorflow version: 2.11.0


## Hyperparameters

In [3]:
seq_len = 128
symbol = "IBM"
period = "Max"
interval = "1d"

### Load price and volume data

In [4]:
def downloadStock(symbol, period, interval):
    df = yf.download(symbol, period= period, interval=interval).reset_index().drop(columns="Adj Close")
    # Replace 0 to avoid dividing by 0 later on
    df['Volume'].replace(to_replace=0, method='ffill', inplace=True) 
    df.sort_values('Date', inplace=True)
    return df

df = downloadStock(symbol, period, interval)
df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Volume
0,1962-01-02,7.374124,7.374124,7.291268,7.291268,407940
1,1962-01-03,7.291268,7.355003,7.291268,7.355003,305955
2,1962-01-04,7.355003,7.355003,7.278521,7.281708,274575
3,1962-01-05,7.272148,7.272148,7.125558,7.138305,384405
4,1962-01-08,7.131931,7.131931,6.947100,7.004461,572685
...,...,...,...,...,...,...
15354,2022-12-29,140.580002,142.259995,140.449997,141.059998,2337200
15355,2022-12-30,140.539993,140.899994,139.449997,140.889999,2858000
15356,2023-01-03,141.100006,141.899994,140.479996,141.550003,3338600
15357,2023-01-04,142.070007,143.619995,141.369995,142.600006,3869200


### Plot daily closing prices and volume

In [5]:
def plotClosingPrices():
    plt.figure(figsize=(15, 5))
    plt.plot(df['Close'])
    plt.xticks(range(0, df.shape[0], 1464), df['Date'].loc[::1464], rotation=0)
    plt.ylabel('Price', fontsize=18)
    plt.title("Close Price", fontsize=20)
    plt.legend(['Close Price'], fontsize='x-large', loc='best')
    plt.show()

def plotVolume():
    plt.figure(figsize=(15, 5))
    plt.plot(df['Volume'])
    plt.xticks(range(0, df.shape[0], 1464), df['Date'].loc[::1464], rotation=0)
    plt.ylabel('Volume', fontsize=18)
    plt.title("Volume", fontsize=20)
    plt.legend(['Volume'], fontsize='x-large', loc='best')
    plt.show()
#plotClosingPrices()
#plotVolume()

## Technical Analysis Indicators

### Momentum Indicators

In [6]:
def defineMomentumIndicators():
    df["stochOsc"] = ta.momentum.stoch(
        high = df["High"],
        low = df["Low"],
        close = df["Close"]
    )

    df["RSI"] = ta.momentum.rsi(
        close = df["Close"]
    )

    df["sRSI"] = ta.momentum.stochrsi(
        close = df["Close"]
    )

defineMomentumIndicators()
df[["stochOsc", "RSI", "sRSI"]].tail()

Unnamed: 0,stochOsc,RSI,sRSI
15354,24.10992,44.959734,0.24316
15355,23.048096,44.573677,0.230858
15356,27.17054,46.494611,0.29207
15357,36.709767,49.493581,0.429025
15358,33.191885,45.588651,0.678992


### Volatility Indicators

In [7]:
def defineVolatilityIndicators():
    df["bollingerH"] = ta.volatility.bollinger_hband(
        close = df["Close"]
    )

    df["bollingerL"] = ta.volatility.bollinger_lband(
        close = df["Close"]
    )

    df["bollingerAvg"] = ta.volatility.bollinger_mavg(
        close = df["Close"]
    )

defineVolatilityIndicators()    
df[["bollingerH", "bollingerL", "bollingerAvg"]].tail()

Unnamed: 0,bollingerH,bollingerL,bollingerAvg
15354,152.4648,137.067202,144.766001
15355,151.95058,136.754422,144.352501
15356,151.417182,136.575821,143.996501
15357,151.028881,136.483122,143.756001
15358,150.583754,136.289249,143.436501


### Trend Indicators

In [8]:
def defineTrendIndicators():
    df["MACD"] = ta.trend.macd(
        close = df["Close"]
    )

    df["ichimokuA"] = ta.trend.ichimoku_a(
        high = df["High"],
        low = df["Close"]
    )

    df["ichimokuB"] = ta.trend.ichimoku_b(
        high = df["High"],
        low = df["Close"]
    )

    df["ichimokuCL"] = ta.trend.ichimoku_conversion_line(
        high = df["High"],
        low = df["Close"]
    )

    df["ichimokuBL"] = ta.trend.ichimoku_base_line(
        high = df["High"],
        low = df["Close"]
    )

    df["ADX"] = ta.trend.adx(
        high = df["High"],
        low = df["Low"],
        close = df["Close"]
    )

    df["movingAverage"] = ta.trend.sma_indicator(
        close = df["Close"]
    )

defineTrendIndicators()
df[["MACD", "ichimokuA", "ichimokuB", "ichimokuCL", "ADX", "movingAverage"]].tail()

Unnamed: 0,MACD,ichimokuA,ichimokuB,ichimokuCL,ADX,movingAverage
15354,-0.634238,143.509998,137.365002,140.979996,27.93974,142.605834
15355,-0.711981,143.509998,137.860004,140.979996,27.224229,141.799166
15356,-0.712127,143.797501,137.860004,141.555,26.119655,141.106667
15357,-0.620366,143.93,140.755005,141.82,24.389451,141.126667
15358,-0.660264,143.93,141.555,141.82,23.269279,141.205833


### Volume Indicators

In [10]:
def defineVolumeIndicators():
    df["OBV"] = ta.volume.on_balance_volume(
        close = df["Close"],
        volume = df["Volume"]
    )

    df["NVI"] = ta.volume.negative_volume_index(
        close = df["Close"],
        volume = df["Volume"]
    )

    df["VPT"] = ta.volume.volume_price_trend(
        close = df["Close"],
        volume = df["Volume"]
    )

    df["VWAP"] = ta.volume.volume_weighted_average_price(
        high = df["High"],
        low = df["Low"],
        close = df["Close"],
        volume = df["Volume"]
    )

defineVolumeIndicators()
df[["OBV", "NVI", "VPT", "VWAP"]].tail()

Unnamed: 0,OBV,NVI,VPT,VWAP
15354,2396547623,10280.76328,-25434.978439,143.886079
15355,2393689623,10280.76328,13915.151724,143.559659
15356,2397028223,10280.76328,12195.464503,143.127302
15357,2400897423,10280.76328,44341.096497,141.85609
15358,2398597571,10173.341168,4670.519906,141.214304


In [11]:
df

Unnamed: 0,Date,Open,High,Low,Close,Volume,stochOsc,RSI,sRSI,bollingerH,...,ichimokuA,ichimokuB,ichimokuCL,ichimokuBL,ADX,movingAverage,OBV,NVI,VPT,VWAP
0,1962-01-02,7.374124,7.374124,7.291268,7.291268,407940,,,,,...,,7.332696,,,0.000000,,407940,1000.000000,-358100.291804,
1,1962-01-03,7.291268,7.355003,7.291268,7.355003,305955,,,,,...,,7.332696,,,0.000000,,713895,1008.741279,-356231.062331,
2,1962-01-04,7.355003,7.355003,7.278521,7.281708,274575,,,,,...,,7.327916,,,0.000000,,439320,998.688828,-61.795600,
3,1962-01-05,7.272148,7.272148,7.125558,7.138305,384405,,,,,...,,7.256215,,,0.000000,,54915,998.688828,-10306.527515,
4,1962-01-08,7.131931,7.131931,6.947100,7.004461,572685,,,,,...,,7.189292,,,0.000000,,-517770,998.688828,-18308.230116,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15354,2022-12-29,140.580002,142.259995,140.449997,141.059998,2337200,24.109920,44.959734,0.243160,152.464800,...,143.509998,137.365002,140.979996,146.040001,27.939740,142.605834,2396547623,10280.763280,-25434.978439,143.886079
15355,2022-12-30,140.539993,140.899994,139.449997,140.889999,2858000,23.048096,44.573677,0.230858,151.950580,...,143.509998,137.860004,140.979996,146.040001,27.224229,141.799166,2393689623,10280.763280,13915.151724,143.559659
15356,2023-01-03,141.100006,141.899994,140.479996,141.550003,3338600,27.170540,46.494611,0.292070,151.417182,...,143.797501,137.860004,141.555000,146.040001,26.119655,141.106667,2397028223,10280.763280,12195.464503,143.127302
15357,2023-01-04,142.070007,143.619995,141.369995,142.600006,3869200,36.709767,49.493581,0.429025,151.028881,...,143.930000,140.755005,141.820000,146.040001,24.389451,141.126667,2400897423,10280.763280,44341.096497,141.856090


### Calculate normalized percentage change for all columns

In [12]:
'''Calculate percentage change'''
"""
df['Open'] = df['Open'].pct_change() # Create arithmetic returns column
df['High'] = df['High'].pct_change() # Create arithmetic returns column
df['Low'] = df['Low'].pct_change() # Create arithmetic returns column
df['Close'] = df['Close'].pct_change() # Create arithmetic returns column
df['Volume'] = df['Volume'].pct_change()
"""

df.dropna(how='any', axis=0, inplace=True) # Drop all rows with NaN valu
df[["Open", "High", "Low", "Close", "Volume"]].head()

Unnamed: 0,Open,High,Low,Close,Volume
26,7.036329,7.07457,7.036329,7.036329,266730
27,7.036329,7.07457,7.036329,7.042702,23535
28,7.042702,7.09369,7.010835,7.09369,455010
29,7.09369,7.151052,7.087317,7.100064,298110
30,7.100064,7.106437,6.985341,6.985341,274575


### Normalize Columns

In [24]:
sc = MinMaxScaler()
def normalizeColumns():
    for column in ['Open', 'High', 'Low', 'Close', 'Volume', 
        'stochOsc', 'RSI', 'sRSI', 'bollingerH', 'bollingerL', 
        'bollingerAvg', 'MACD', 'ichimokuA', 'ichimokuB', 
        'ichimokuCL', 'ADX', 'movingAverage', 'OBV', 'NVI', 
        'VPT', 'VWAP']:
        try:
            df[column] = sc.fit_transform(df[[column]])
        except:
            print(f'Warning: {column} column not in current Data Frame')

normalizeColumns()
df = df[["Date", "Open", "High", "Low", "Close", "Volume", "RSI", "ichimokuA", "ichimokuB", "ichimokuCL", "ichimokuBL", "MACD", "OBV"]]
df.head()



Unnamed: 0,Date,Open,High,Low,Close,Volume,RSI,ichimokuA,ichimokuB,ichimokuCL,ichimokuBL,MACD,OBV
26,1962-02-07,0.015523,0.014712,0.015977,0.015492,0.003349,0.526793,0.013073,0.013339,0.01386,7.025175,0.628203,0.006854
27,1962-02-08,0.015523,0.014712,0.015977,0.015524,0.0,0.532078,0.013073,0.013339,0.01386,7.025175,0.628573,0.006863
28,1962-02-09,0.015555,0.014807,0.01585,0.015776,0.005942,0.574238,0.012969,0.013339,0.01386,6.983747,0.629115,0.007037
29,1962-02-12,0.015807,0.01509,0.016231,0.015807,0.003781,0.579491,0.013246,0.013339,0.01449,6.967813,0.629588,0.007151
30,1962-02-13,0.015838,0.01487,0.015724,0.01524,0.003457,0.482401,0.013342,0.013339,0.014681,6.967813,0.629465,0.007046


In [25]:
## Keep columns

## Splitting data

In [None]:
'''Create training, validation and test split'''
def splitMethod1():
    times = sorted(df.index.values)
    last_10pct = sorted(df.index.values)[-int(0.1*len(times))] # Last 10% of series
    last_20pct = sorted(df.index.values)[-int(0.2*len(times))] # Last 20% of series

    df_train = df[(df.index < last_20pct)]  # Training data are 80% of total data
    df_val = df[(df.index >= last_20pct) & (df.index < last_10pct)]
    df_test = df[(df.index >= last_10pct)]

    # Remove date column
    df_train.drop(columns=['Date'], inplace=True)
    df_val.drop(columns=['Date'], inplace=True)
    df_test.drop(columns=['Date'], inplace=True)

    # Convert pandas columns into arrays
    train_data = df_train.values
    val_data = df_val.values
    test_data = df_test.values
    print('Training data shape: {}'.format(train_data.shape))
    print('Validation data shape: {}'.format(val_data.shape))
    print('Test data shape: {}'.format(test_data.shape))

splitMethod1()
df_train.head()

### Plot daily changes of close prices and volume

In [None]:
def plotDailyPctChanges():
    fig = plt.figure(figsize=(15,10))
    st = fig.suptitle("Data Separation", fontsize=20)
    st.set_y(0.92)

    ###############################################################################

    ax1 = fig.add_subplot(211)
    ax1.plot(np.arange(train_data.shape[0]), df_train['Close'], label='Training data')

    ax1.plot(np.arange(train_data.shape[0], 
                       train_data.shape[0]+val_data.shape[0]), df_val['Close'], label='Validation data')

    ax1.plot(np.arange(train_data.shape[0]+val_data.shape[0], 
                       train_data.shape[0]+val_data.shape[0]+test_data.shape[0]), df_test['Close'], label='Test data')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Normalized Closing Returns')

    ###############################################################################

    ax2 = fig.add_subplot(212)
    ax2.plot(np.arange(train_data.shape[0]), df_train['Volume'], label='Training data')

    ax2.plot(np.arange(train_data.shape[0], 
                       train_data.shape[0]+val_data.shape[0]), df_val['Volume'], label='Validation data')

    ax2.plot(np.arange(train_data.shape[0]+val_data.shape[0], 
                       train_data.shape[0]+val_data.shape[0]+test_data.shape[0]), df_test['Volume'], label='Test data')
    ax2.set_xlabel('Date')
    ax2.set_ylabel('Normalized Volume Changes')

    plt.legend(loc='best')
#plotDailyPctChanges()

### Create chunks of training, validation and test data

In [None]:
# Training data
X_train, y_train = [], []
for i in range(seq_len, len(train_data)):
    X_train.append(train_data[i-seq_len:i]) # Chunks of training data with a length of 128 df-rows
    y_train.append(train_data[:, 3][i]) #Value of 4th column (Close Price) of df-row 128+1
X_train, y_train = np.array(X_train), np.array(y_train)

###############################################################################

# Validation data
X_val, y_val = [], []
for i in range(seq_len, len(val_data)):
    X_val.append(val_data[i-seq_len:i])
    y_val.append(val_data[:, 3][i])
X_val, y_val = np.array(X_val), np.array(y_val)

###############################################################################

# Test data
X_test, y_test = [], []
for i in range(seq_len, len(test_data)):
    X_test.append(test_data[i-seq_len:i])
    y_test.append(test_data[:, 3][i])    
X_test, y_test = np.array(X_test), np.array(y_test)

print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)

## 1D Convolution layers for a CNN + LSTM model

## Testing Models

In [None]:
def create_model():
    in_seq = Input(shape = (seq_len, len(df_train.columns)))

    x = Bidirectional(LSTM(128, return_sequences=True))(in_seq)
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Bidirectional(LSTM(64, return_sequences=True))(x) 

    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    conc = concatenate([avg_pool, max_pool])
    conc = Dense(64, activation="relu")(conc)
    out = Dense(1, activation="linear")(conc)      

    model = Model(inputs=in_seq, outputs=out)
    model.compile(loss="mse", optimizer="adam", metrics=['mae', 'mape'])    
    return model

model = create_model()

#model.summary()
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=1)
callback = tf.keras.callbacks.ModelCheckpoint('Bi-LSTM.hdf5', monitor='val_loss', save_best_only=True, verbose=1)

model.fit(X_train, y_train,
              batch_size=2048,
              verbose=1,
              callbacks=[callback],
              epochs=50,
              #shuffle=True,
              validation_data=(X_val, y_val),)


model = tf.keras.models.load_model('Bi-LSTM.hdf5')

###############################################################################
'''Calculate predictions and metrics'''

#Calculate predication for training, validation and test data
train_pred = model.predict(X_train)
val_pred = model.predict(X_val)
test_pred = model.predict(X_test)

#Print evaluation metrics for all datasets
train_eval = model.evaluate(X_train, y_train, verbose=0)
val_eval = model.evaluate(X_val, y_val, verbose=0)
test_eval = model.evaluate(X_test, y_test, verbose=0)
print(' ')
print('Evaluation metrics')
print('Training Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(train_eval[0], train_eval[1], train_eval[2]))
print('Validation Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(val_eval[0], val_eval[1], val_eval[2]))
print('Test Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(test_eval[0], test_eval[1], test_eval[2]))

###############################################################################
'''Display results'''

fig = plt.figure(figsize=(15,15))
st = fig.suptitle("Bi-LSTM Model", fontsize=22)
st.set_y(1.02)

#Plot training data results
ax11 = fig.add_subplot(311)
ax11.plot(train_data[:, 3], label='IBM Closing Returns')
ax11.plot(train_pred, color='yellow', linewidth=3, label='Predicted IBM Closing Returns')
ax11.set_title("Training Data", fontsize=18)
ax11.set_xlabel('Date')
ax11.set_ylabel('IBM Closing Returns')

#Plot validation data results
ax21 = fig.add_subplot(312)
ax21.plot(val_data[:, 3], label='IBM Closing Returns')
ax21.plot(val_pred, color='yellow', linewidth=3, label='Predicted IBM Closing Returns')
ax21.set_title("Validation Data", fontsize=18)
ax21.set_xlabel('Date')
ax21.set_ylabel('IBM Closing Returns')

#Plot test data results
ax31 = fig.add_subplot(313)
ax31.plot(test_data[:, 3], label='IBM Closing Returns')
ax31.plot(test_pred, color='yellow', linewidth=3, label='Predicted IBM Closing Returns')
ax31.set_title("Test Data", fontsize=18)
ax31.set_xlabel('Date')
ax31.set_ylabel('IBM Closing Returns')

plt.tight_layout()
plt.legend(loc='best')