In [78]:
import pandas as pd
import numpy as np
import tensorflow as tf
import talib
from utils import utils
import os

In [58]:
df = pd.read_csv('../data/USDJPY.csv')
df.sample(5)

Unnamed: 0,Gmt time,Open,High,Low,Close,Volume
71580,13.10.2014 13:00:00.000,107.286,107.401,107.201,107.305,8520.7998
38478,25.06.2009 20:00:00.000,95.865,96.014,95.823,95.947,15952.0
93084,27.03.2018 07:00:00.000,105.676,105.687,105.466,105.495,8117.9302
6351,09.05.2004 23:00:00.000,112.235,112.952,112.22,112.952,14878.5
57570,13.07.2012 08:00:00.000,79.298,79.316,79.25,79.258,3287.04


In [59]:
df = utils.rename_columns_and_format(df)
df = utils.get_all_indicators(df)

df.dropna(inplace=True)

## Ventana bidimensional

In [73]:
def get_model(x_input, y_input):
    model = tf.keras.models.Sequential()
    
    #kernel = 2,1 o 1,2 y maxpooling tmb
    #tamano kernel mas grande (dias)
    model.add(tf.keras.layers.Conv2D(32, (x_input, 3), input_shape=(x_input, y_input,1), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D((x_input, 2)))
    
    model.add(tf.keras.layers.Conv2D(32, (x_input, 3), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D((x_input, 2)))

    model.add(tf.keras.layers.Flatten())

    model.add(tf.keras.layers.Dense(32, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.2))

    model.add(tf.keras.layers.Dense(16, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.2))

    model.add(tf.keras.layers.Dense(8, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.2))

    model.add(tf.keras.layers.Dense(1, activation='linear'))

    model.compile(loss='mse', optimizer='adam', metrics=[tf.keras.metrics.RootMeanSquaredError()])
    
    #model.summary()
    
    return model

In [96]:
window = 30
p_train = 0.8

df = df.head(500)

columns = ['ema_12', 'ema_26', 'upper_bband', 
         'middle_bband', 'lower_bband', 'rsi', 
         'macd', 'macd_signal', 'macd_hist', 'k', 'd']

target_column = df['close'].to_numpy()
for column in columns:
    df_min = df[column]
    x, y = utils.create_windowed_dataset(df_min, target_column, window)
    x_train, x_test, y_train, y_test = utils.train_test_split(p_train, x, y)
    
    _, x_input, y_input = x.shape
    model = get_model(x_input, y_input)
    
    x_train = np.expand_dims(x_train, axis=3)
    x_test = np.expand_dims(x_test, axis=3)
    
    history = model.fit(x_train, 
                    y_train, 
                    validation_data=(x_test,y_test), 
                    batch_size=32, 
                    epochs=300,
                    verbose=0)
    
    df_history = pd.DataFrame(history.history)
    df_history.to_csv(f'../metrics/{column}_performance.csv', index=False) 

In [101]:
df_metrics = pd.DataFrame(columns=['loss','val_loss', 'root_mean_squared_error','val_root_mean_squared_error'])

files = os.listdir('../metrics')
for file in files:
    input_name = file.split('.csv')[0]
    
    path = os.path.join('../metrics', file)
    df_metric = pd.read_csv(path)
    
    df_metrics = df_metrics.append(df_metric.iloc[-1])

df_metrics.index = files
df_metrics.sort_values(by=['val_root_mean_squared_error'])


Unnamed: 0,loss,val_loss,root_mean_squared_error,val_root_mean_squared_error
macd_hist_performance.csv,741.427433,96.988541,27.229164,9.848276
rsi_performance.csv,1370.603669,178.270781,37.021664,13.351809
macd_signal_performance.csv,1316.304911,211.265315,36.280914,14.534969
macd_performance.csv,776.427255,243.148167,27.864445,15.593209
upper_bband_performance.csv,1060.467475,361.89777,32.564819,19.023611
ema_26_performance.csv,1155.982666,434.343574,33.999744,20.840912
k_performance.csv,1513.710686,484.772879,38.906433,22.017557
middle_bband_performance.csv,934.410641,1034.875629,30.56813,32.169483
d_performance.csv,876.738268,1062.121569,29.609766,32.590206
ema_12_performance.csv,1103.612474,1299.515739,33.220665,36.048798
