In [1]:
import numpy as np
import os
from pathlib import Path
import pandas as pd
from typing import Dict, List

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences

import tensorflow as tf
from keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Masking, Conv1D, Flatten, MaxPooling1D
from tensorflow.keras import regularizers

from synthetic_data import gen_x_y
from get_real_data import data_augmentation

2023-09-05 15:39:00.047372: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-09-05 15:39:00.048774: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-05 15:39:00.074994: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-05 15:39:00.076220: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Load Data

In [2]:
def load_data(pattern_name: str) -> List[pd.DataFrame]:
    """
    Load data for a pattern and return them as a list of DataFrames.
    
    Args:
        pattern_names (List[str]): List of pattern names to load.
        
    Returns:
        List[pd.DataFrame]: A list of DataFrames containing data for the specified patterns.
    """
    dataframes = []
    DATA_DIR = 'data/patterns'
    
    dir_path = os.path.join(DATA_DIR, pattern_name)

    files = os.listdir(dir_path)
    
    csv_files = [file for file in files if file.endswith(".csv")]
    
    for csv_file in csv_files:
        file_path = os.path.join(dir_path, csv_file)
        df = pd.read_csv(file_path)
        dataframes.append(df)
    
    return dataframes

In [3]:
dfs = load_data('rising_wedge')
dfs[0]

Unnamed: 0,Date,Open,High,Low,Close,Start Date,End Date,Pattern,Pattern Present
0,2022-07-18,29.8,31.6,29.8,30.5,79,106,1,1
1,2022-07-19,30.8,30.9,29.9,30.5,79,106,1,1
2,2022-07-20,30.9,30.9,30.6,30.7,79,106,1,1
3,2022-07-21,31.0,31.6,30.7,30.7,79,106,1,1
4,2022-07-22,30.7,31.3,30.7,31.3,79,106,1,1
...,...,...,...,...,...,...,...,...,...
179,2023-03-31,30.5,30.6,30.0,30.0,79,106,1,1
180,2023-04-03,30.0,30.4,30.0,30.3,79,106,1,1
181,2023-04-04,30.4,30.8,30.2,30.3,79,106,1,1
182,2023-04-05,30.8,30.8,30.2,30.6,79,106,1,1


# One Hot Encode Data

In [4]:
def one_hot_encode(dataframes: list) -> List[pd.DataFrame]:
    """
    One Hot Encode data for multiple patterns and return them as a list of DataFrames.
    
    Args:
        dataframes (List[str]): List of pattern names to load.
        
    Returns:
        List[pd.DataFrame]: A list of DataFrames with One Hot Encoding for the 'Pattern' column.
    """
    pattern_mapping = {
        0: 'No Pattern',
        1: 'Rising Wedge',
        2: 'Falling Wedge',
        3: 'Double Top',
        4: 'Double Bottom'
    }
    dataframes_encoded = []
    
    for df in dataframes:
        for pattern_name in pattern_mapping.values():
            df[pattern_name] = 0
        
        df[pattern_mapping[df['Pattern'].iloc[0]]] = 1
        dataframes_encoded.append(df)
    
    return dataframes_encoded

In [5]:
dfs = one_hot_encode(dfs)
dfs[0]

Unnamed: 0,Date,Open,High,Low,Close,Start Date,End Date,Pattern,Pattern Present,No Pattern,Rising Wedge,Falling Wedge,Double Top,Double Bottom
0,2022-07-18,29.8,31.6,29.8,30.5,79,106,1,1,0,1,0,0,0
1,2022-07-19,30.8,30.9,29.9,30.5,79,106,1,1,0,1,0,0,0
2,2022-07-20,30.9,30.9,30.6,30.7,79,106,1,1,0,1,0,0,0
3,2022-07-21,31.0,31.6,30.7,30.7,79,106,1,1,0,1,0,0,0
4,2022-07-22,30.7,31.3,30.7,31.3,79,106,1,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179,2023-03-31,30.5,30.6,30.0,30.0,79,106,1,1,0,1,0,0,0
180,2023-04-03,30.0,30.4,30.0,30.3,79,106,1,1,0,1,0,0,0
181,2023-04-04,30.4,30.8,30.2,30.3,79,106,1,1,0,1,0,0,0
182,2023-04-05,30.8,30.8,30.2,30.6,79,106,1,1,0,1,0,0,0


# Select X and Y columns

In [6]:
def real_data_with_pattern(pattern: str, model_type) -> List[np.ndarray]:
    """
    Take a pattern name and model type and returns a list of arrays.

    Args: pattern name: The required pattern name
          model_type[Str]: A string to determine the y-output

    Returns:
        List[Arrays]: A list of arrays with features and target.
        
    """
    DATA_PATH_ = Path(f"data/patterns/{pattern}")
    DATA_PATH_.exists()

    data = {filepath.stem: pd.read_csv(filepath) for filepath in DATA_PATH_.glob("*.csv")}

    X_real = []
    y_real = []
    
    if model_type == "full":
        for key, df in data.items():
            X_real.append(df[['Open', 'High', 'Low', 'Close']].values)
            y_real.append(df.loc[0, ['Start Date', 'End Date', 'Pattern']].values)
            
    elif model_type == 'categorise':
        for key, df in data.items():
            X_real.append(df[['Open', 'High', 'Low', 'Close']].values)
            y_real.append(df.loc[0, ['Pattern Present']].values)

    return X_real, y_real

In [7]:
def synthetic_data(X_real, pattern, model_type, noise=True):
    """Takes the real X data and pattern and returns the required amount of synthetic data"""
    amount_40 = int(len(X_real) * 0.4)

    X_synthetic, y_synthetic = gen_x_y(l=amount_40, pattern=pattern, noise=noise, general=False, model_type=model_type)

    return X_synthetic, y_synthetic

In [8]:
def join_data(*args):
    """Takes data arrays as arguments and returns a combined list of arrays"""
    combined_data = []
    for data in args:
        if data is not None:
            combined_data.extend(data)
    return combined_data

In [9]:
def pad_arrays(X, dtype='float32', padding='post', value=-100):
    """Takes a list of arrays and pads to the minimum required length"""
    return pad_sequences(X, dtype=dtype, padding=padding, value=value)

# Testing Models to Find Dates

In [10]:
results_dict = {}

In [11]:
def initialize_model_CNN(input_shape):
    model = Sequential()

    model.add(Masking(mask_value=-100, input_shape=input_shape))
    model.add(Conv1D(32, activation='relu', kernel_size=3, kernel_regularizer=regularizers.L1L2(l1=1e-3, l2=1e-3)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(32, activation='relu', kernel_size=3, kernel_regularizer=regularizers.L1L2(l1=1e-3, l2=1e-3)))
    
    model.add(Flatten())
    model.add(Dense(units=200, activation='relu', kernel_regularizer=regularizers.L1L2(l1=1e-3, l2=1e-3)))
    model.add(Dense(units=100, activation='relu', kernel_regularizer=regularizers.L1L2(l1=1e-3, l2=1e-3)))
    model.add(Dense(units=16, activation='relu', kernel_regularizer=regularizers.L1L2(l1=1e-3, l2=1e-3)))
    model.add(Dense(units=16, activation='relu', kernel_regularizer=regularizers.L1L2(l1=1e-3, l2=1e-3)))
    model.add(Dense(units=2, activation='linear'))

    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    
    return model

## Rising Wedge

In [135]:
X_real, y_real = real_data_with_pattern(pattern='rising_wedge', model_type='full')
X_synthetic, y_synthetic = synthetic_data(X_real, 'rising_wedge', model_type='full', noise=False)
X_joined = join_data(X_real, X_synthetic)
y_joined = join_data(y_real, y_synthetic)
X_pad = pad_arrays(X_joined)
y_all = np.array(y_joined)
X_train, X_test, y_train, y_test = train_test_split(X_pad, y_all, test_size=0.30)

# X_train, y_train = data_augmentation(X_train, y_train)

y_train = [entry[:2] for entry in y_train]

X_train = tf.convert_to_tensor(X_train, np.float32)
y_train = tf.convert_to_tensor(y_train, np.int16)

In [136]:
es = EarlyStopping(patience = 20, restore_best_weights=True)

input_shape = X_train.shape[1:]
model = initialize_model_CNN(input_shape)

model.fit(
    X_train,
    y_train,
    validation_split = 0.2,
    shuffle = True,
    batch_size=64,
    epochs = 1000,
    callbacks = [es],
    verbose = 1
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.src.callbacks.History at 0x7f538f92da50>

In [137]:
y_test = [entry[:2] for entry in y_test]
X_test = tf.convert_to_tensor(X_test, np.float32)
y_test = tf.convert_to_tensor(y_test, np.int16)
res = model.evaluate(X_test, y_test)

results_dict['Rising Wedge'] = res[1]



## Falling Wedge

In [138]:
X_real, y_real = real_data_with_pattern(pattern='falling_wedge', model_type='full')
X_synthetic, y_synthetic = synthetic_data(X_real, 'falling_wedge', model_type='full', noise=False)
X_joined = join_data(X_real, X_synthetic)
y_joined = join_data(y_real, y_synthetic)

# X_joined, y_joined = data_augmentation(X_joined, y_joined)

X_pad = pad_arrays(X_joined)
y_all = np.array(y_joined)
X_train, X_test, y_train, y_test = train_test_split(X_pad, y_all, test_size=0.30)
y_train = [entry[:2] for entry in y_train]
X_train = tf.convert_to_tensor(X_train, np.float32)
y_train = tf.convert_to_tensor(y_train, np.int16)

In [139]:
es = EarlyStopping(patience = 20, restore_best_weights=True)

input_shape = X_train.shape[1:]
model = initialize_model_CNN(input_shape)

model.fit(
    X_train,
    y_train,
    validation_split = 0.2,
    shuffle = True,
    batch_size=64,
    epochs = 1000,
    callbacks = [es],
    verbose = 1
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.src.callbacks.History at 0x7f538faaee30>

In [140]:
y_test = [entry[:2] for entry in y_test]
X_test = tf.convert_to_tensor(X_test, np.float32)
y_test = tf.convert_to_tensor(y_test, np.int16)
res = model.evaluate(X_test, y_test)

results_dict['Falling Wedge'] = res[1]



## Double Top

In [141]:
X_real, y_real = real_data_with_pattern(pattern='double_top', model_type='full')
X_synthetic, y_synthetic = synthetic_data(X_real, 'double_top', model_type='full', noise=False)
X_joined = join_data(X_real, X_synthetic)
y_joined = join_data(y_real, y_synthetic)

# X_joined, y_joined = data_augmentation(X_joined, y_joined)

X_pad = pad_arrays(X_joined)
y_all = np.array(y_joined)
X_train, X_test, y_train, y_test = train_test_split(X_pad, y_all, test_size=0.30)
y_train = [entry[:2] for entry in y_train]
X_train = tf.convert_to_tensor(X_train, np.float32)
y_train = tf.convert_to_tensor(y_train, np.int16)

In [142]:
es = EarlyStopping(patience = 20, restore_best_weights=True)

input_shape = X_train.shape[1:]
model = initialize_model_CNN(input_shape)

model.fit(
    X_train,
    y_train,
    validation_split = 0.2,
    shuffle = True,
    batch_size=64,
    epochs = 1000,
    callbacks = [es],
    verbose = 1
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.src.callbacks.History at 0x7f538fc7fb50>

In [143]:
y_test = [entry[:2] for entry in y_test]
X_test = tf.convert_to_tensor(X_test, np.float32)
y_test = tf.convert_to_tensor(y_test, np.int16)
res = model.evaluate(X_test, y_test)

results_dict['Double Top'] = res[1]



## Double Bottom

In [144]:
X_real, y_real = real_data_with_pattern(pattern='double_bottom', model_type='full')
X_synthetic, y_synthetic = synthetic_data(X_real, 'double_bottom', model_type='full', noise=False)
X_joined = join_data(X_real, X_synthetic)
y_joined = join_data(y_real, y_synthetic)

# X_joined, y_joined = data_augmentation(X_joined, y_joined)

X_pad = pad_arrays(X_joined)
y_all = np.array(y_joined)
X_train, X_test, y_train, y_test = train_test_split(X_pad, y_all, test_size=0.30)
y_train = [entry[:2] for entry in y_train]
y_test = [entry[:2] for entry in y_test]
X_train = tf.convert_to_tensor(X_train, np.float32)
y_train = tf.convert_to_tensor(y_train, np.int16)

In [145]:
es = EarlyStopping(patience = 20, restore_best_weights=True)

input_shape = X_train.shape[1:]
model = initialize_model_CNN(input_shape)

model.fit(
    X_train,
    y_train,
    validation_split = 0.2,
    shuffle = True,
    batch_size=64,
    epochs = 1000,
    callbacks = [es],
    verbose = 1
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.src.callbacks.History at 0x7f538f0f3640>

In [146]:
y_pred = model.predict(X_test)
y_test[:5], y_pred[:5]




([array([16, 22], dtype=object),
  array([285, 439], dtype=object),
  array([29, 38], dtype=object),
  array([50, 64], dtype=object),
  array([56, 75], dtype=object)],
 array([[ 21.693808,  31.219471],
        [350.7031  , 510.3231  ],
        [127.51409 ,  59.4153  ],
        [ 50.592693,  72.68108 ],
        [ 78.906456, 103.257126]], dtype=float32))

In [None]:
import plotly.graph_objects as go

import pandas as pd
from datetime import datetime
import random



i = random.randint(0,100)
print(i)

df = pd.DataFrame(X_test[i], columns=['open', 'high', 'low', 'close'])
df = df[(df != -100).all(axis=1)]

fig = go.Figure(data=[go.Candlestick(x=df.index,
                open=df.open,
                high=df.high,
                low=df.low,
                close=df.close)])
    
y_min, y_max = y_test[i]
fig.add_vline(x=y_min, line_width=3, line_dash="dash", line_color="green")
fig.add_vline(x=y_max, line_width=3, line_dash="dash", line_color="green")

y_min, y_max = y_pred[i]
fig.add_vline(x=y_min, line_width=3, line_dash="dash", line_color="blue")
fig.add_vline(x=y_max, line_width=3, line_dash="dash", line_color="blue")

fig.show()

y_pred[i]


import matplotlib.pyplot as plt
plt.scatter(y_train[:, 0], y_train[:,1])

In [127]:
y_test[i]

array([27, 36], dtype=object)

In [128]:
y_test[i].numpy()

AttributeError: 'numpy.ndarray' object has no attribute 'numpy'

In [148]:
y_test = [entry[:2] for entry in y_test]
X_test = tf.convert_to_tensor(X_test, np.float32)
y_test = tf.convert_to_tensor(y_test, np.int16)
res = model.evaluate(X_test, y_test)

results_dict['Double Bottom'] = res[1]



## Results

In [149]:
results_df = pd.DataFrame(results_dict, index=[1])
display('Results DF')
display(results_df)

'Results DF'

Unnamed: 0,Rising Wedge,Falling Wedge,Double Top,Double Bottom
1,25.821136,41.822731,23.219362,15.324409
