In [None]:
url = 'https://anaconda.org/conda-forge/libta-lib/0.4.0/download/linux-64/libta-lib-0.4.0-h166bdaf_1.tar.bz2'
!curl -L $url | tar xj -C /usr/lib/x86_64-linux-gnu/ lib --strip-components=1
url = 'https://anaconda.org/conda-forge/ta-lib/0.4.19/download/linux-64/ta-lib-0.4.19-py39hd257fcd_4.tar.bz2'
!curl -L $url | tar xj -C /usr/local/lib/python3.9/dist-packages/ lib/python3.9/site-packages/talib --strip-components=3

In [4]:
import pandas as pd
import numpy as np
import talib
stock_data = "CBQK_INTRADAY.csv"

In [9]:
def calculate_RSI(data, period=14):
    delta = data['Close'].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=period).mean()
    avg_loss = loss.rolling(window=period).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    data['RSI'] = rsi

def calculate_OBV(data):
    obv = pd.Series(index=data.index)
    obv.iloc[0] = 0
    for i in range(1, len(data)):
        if data.loc[i, 'Close'] > data.loc[i - 1, 'Close']:
            obv.iloc[i] = obv.iloc[i - 1] + data.loc[i, 'Volume']
        elif data.loc[i, 'Close'] < data.loc[i - 1, 'Close']:
            obv.iloc[i] = obv.iloc[i - 1] - data.loc[i, 'Volume']
        else:
            obv.iloc[i] = obv.iloc[i - 1]
    data['OBV'] = obv

def calculate_MACD(data, slow=26, fast=12, signal=9):

    ema_slow = data['Close'].ewm(span=slow, min_periods=slow).mean()
    ema_fast = data['Close'].ewm(span=fast, min_periods=fast).mean()

    macd_line = ema_fast - ema_slow

    signal_line = macd_line.ewm(span=signal, min_periods=signal).mean()

    macd_hist = macd_line - signal_line
    
    data['MACD Line'] = macd_line
    data['Signal Line'] = signal_line
    data['MACD Histogram'] = macd_hist
    

def calculate_ADX(data, period=14):
    high = data['High']
    low = data['Low']
    close = data['Close']

    tr1 = high - low
    tr2 = abs(high - close.shift())
    tr3 = abs(low - close.shift())
    tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
    
    atr = tr.ewm(span=period, min_periods=period).mean()
    atr_diff = atr.diff()
    
    up_move = high - high.shift()
    down_move = low.shift() - low
    pos_dm = np.where((up_move > down_move) & (up_move > 0), up_move, 0)
    neg_dm = np.where((down_move > up_move) & (down_move > 0), down_move, 0)
    
    pos_di = pd.Series(pos_dm).ewm(span=period, min_periods=period).mean() / atr
    neg_di = pd.Series(neg_dm).ewm(span=period, min_periods=period).mean() / atr
    
    dx = np.abs(pos_di - neg_di) / (pos_di + neg_di) * 100
    adx = dx.ewm(span=period, min_periods=period).mean()
    
    data['ADX'] = adx
    data['+DI'] = pos_di
    data['-DI'] = neg_di

def candlestick_analysis(data):
  # closing - y_closing
  data["close-open"] = data["Close"] - data["Open"]
  data["high-low"] = data["High"] - data["Low"]
  data["close-high"] = data["Close"] - data["High"]
  data["close-low"] = data["Close"] - data["Low"]
  data["open-high"] = data["Open"] - data["High"]
  data["open-low"] = data["Open"] - data["Low"]

def add_date_features(data):        
    data['Day of Week'] = data.index.dayofweek
    data['Day of Month'] = data.index.day
    data['Month'] = data.index.month
    data['Week of Year'] = data.index.weekofyear

In [13]:
df = pd.read_csv(stock_data)
# calculate_RSI(df)
# calculate_ADX(df)
# calculate_MACD(df)
# calculate_OBV(df)
candlestick_analysis(df)
df.index = pd.to_datetime(df.Date+ ' ' + df['Time'])
df = df.drop(columns = ["Ticker","Date","Time"])
# add_date_features(df)
df = df.dropna()
df['target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, np.where(df['Close'].shift(-1) < df['Close'], -1, 0))

In [11]:
def candlestick_analysis(df):
    # Add columns for candlestick patterns
    df['CDL2CROWS'] = talib.CDL2CROWS(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDL3BLACKCROWS'] = talib.CDL3BLACKCROWS(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDL3INSIDE'] = talib.CDL3INSIDE(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDL3LINESTRIKE'] = talib.CDL3LINESTRIKE(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDL3OUTSIDE'] = talib.CDL3OUTSIDE(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDL3STARSINSOUTH'] = talib.CDL3STARSINSOUTH(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDL3WHITESOLDIERS'] = talib.CDL3WHITESOLDIERS(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDLABANDONEDBABY'] = talib.CDLABANDONEDBABY(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDLADVANCEBLOCK'] = talib.CDLADVANCEBLOCK(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDLBELTHOLD'] = talib.CDLBELTHOLD(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDLBREAKAWAY'] = talib.CDLBREAKAWAY(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDLCLOSINGMARUBOZU'] = talib.CDLCLOSINGMARUBOZU(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDLCONCEALBABYSWALL'] = talib.CDLCONCEALBABYSWALL(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDLCOUNTERATTACK'] = talib.CDLCOUNTERATTACK(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDLDARKCLOUDCOVER'] = talib.CDLDARKCLOUDCOVER(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDLDOJI'] = talib.CDLDOJI(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDLDOJISTAR'] = talib.CDLDOJISTAR(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDLDRAGONFLYDOJI'] = talib.CDLDRAGONFLYDOJI(df['Open'], df['High'], df['Low'], df['Close'])
    df['CDLENGULFING'] = talib.CDLENGULFING(df['Open'], df['High'], df['Low'], df['Close'])
df = pd.read_csv(stock_data)
df = df.iloc[600:]
df.index = pd.to_datetime(df.Date+ ' ' + df['Time'])
df = df.drop(columns = ["Ticker","Date","Time"])
candlestick_analysis(df)
df['target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, np.where(df['Close'].shift(-1) < df['Close'], -1, 0))

In [16]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split


X = df.drop(columns = ["target","Open","Close","High","Low"])
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.4222027972027972

In [19]:
from keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
from keras.utils import to_categorical

label_mapping = { 1: 0, -1: 1, 0: 2}
target_mapped = df['target'].map(label_mapping)
target = to_categorical(target_mapped)
# features = df.drop(columns = ["target"])
features = df.loc[:,["Open","High","Low","Close","Volume"]]

# Assuming your original features are stored in a Pandas DataFrame called 'features'
num_timesteps = 60
num_samples, num_features = features.shape
target = target[num_timesteps-1:]
# Convert the DataFrame to a NumPy array
features_array = features.values

# Create a new array to hold the reshaped features
reshaped_features = np.zeros((num_samples - num_timesteps + 1, num_timesteps, num_features))

# Loop over each sample in the original feature array and create a sliding window of 20 days
for i in range(num_timesteps, num_samples + 1):
    window = features_array[i - num_timesteps:i]
    reshaped_features[i - num_timesteps] = window



KeyboardInterrupt: ignored

In [None]:
# Define the LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(num_timesteps, num_features)))
model.add(Dense(20, activation='relu'))
model.add(Dense(3, activation='softmax'))

optimizer = Adam(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

model.fit(reshaped_features, target, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7ff23822c040>