In [78]:
import pandas as pd
import numpy as np
import tensorflow as tf
import talib
from utils import utils
import os

In [58]:
df = pd.read_csv('../data/USDJPY.csv')
df.sample(5)

Unnamed: 0,Gmt time,Open,High,Low,Close,Volume
71580,13.10.2014 13:00:00.000,107.286,107.401,107.201,107.305,8520.7998
38478,25.06.2009 20:00:00.000,95.865,96.014,95.823,95.947,15952.0
93084,27.03.2018 07:00:00.000,105.676,105.687,105.466,105.495,8117.9302
6351,09.05.2004 23:00:00.000,112.235,112.952,112.22,112.952,14878.5
57570,13.07.2012 08:00:00.000,79.298,79.316,79.25,79.258,3287.04


In [59]:
df = utils.rename_columns_and_format(df)
df = utils.get_all_indicators(df)

df.dropna(inplace=True)

## Funcion para obtener el modelo

In [130]:
def get_model(x_input, y_input):
    model = tf.keras.models.Sequential()
    
    #kernel = 2,1 o 1,2 y maxpooling tmb
    #tamano kernel mas grande (dias)
    model.add(tf.keras.layers.Conv2D(32, (x_input, 3), input_shape=(x_input, y_input,1), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D((1, 2)))
    
    model.add(tf.keras.layers.Conv2D(32, (1, 3), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D((1, 2)))

    model.add(tf.keras.layers.Conv2D(32, (1, 3), activation='relu'))
    model.add(tf.keras.layers.MaxPooling2D((1, 2)))
    
    model.add(tf.keras.layers.Flatten())

    model.add(tf.keras.layers.Dense(32, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.2))

    model.add(tf.keras.layers.Dense(16, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.2))

    model.add(tf.keras.layers.Dense(8, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.2))

    model.add(tf.keras.layers.Dense(1, activation='linear'))

    model.compile(loss='mse', optimizer='adam', metrics=[tf.keras.metrics.RootMeanSquaredError()])
    
    #model.summary()
    
    return model

## Prueba utilizando indicadores individuales como inputs

In [138]:
WINDOW = 30
P_TRAIN = 0.8
EPOCHS = 100
BATCH_SIZE = 32
df = df.head(500)

columns = ['ema_12', 'ema_26', 'upper_bband', 
         'middle_bband', 'lower_bband', 'rsi', 
         'macd', 'macd_signal', 'macd_hist', 'k', 'd']

target_column = df['close'].to_numpy()
for column in columns:
    df_min = df[column]
    x, y = utils.create_windowed_dataset(df_min, target_column, WINDOW)
    x_train, x_test, y_train, y_test = utils.train_test_split(P_TRAIN, x, y)
    
    _, x_input, y_input = x.shape
    model = get_model(x_input, y_input)
    
    x_train = np.expand_dims(x_train, axis=3)
    x_test = np.expand_dims(x_test, axis=3)
    
    history = model.fit(x_train, 
                    y_train, 
                    validation_data=(x_test,y_test), 
                    batch_size=BATCH_SIZE, 
                    epochs=EPOCHS,
                    verbose=0)
    
    df_history = pd.DataFrame(history.history)
    df_history.to_csv(f'../metrics/{column}_performance.csv', index=False) 

In [134]:
df_metrics = pd.DataFrame(columns=['loss','val_loss', 'root_mean_squared_error','val_root_mean_squared_error'])

files = os.listdir('../metrics')
for file in files:
    input_name = file
    
    path = os.path.join('../metrics', file)
    df_metric = pd.read_csv(path)
    
    df_metrics = df_metrics.append(df_metric.iloc[-1])

files = [f.split('.csv')[0] for f in files]
df_metrics.index = files
df_metrics.sort_values(by=['val_root_mean_squared_error'])


Unnamed: 0,loss,val_loss,root_mean_squared_error,val_root_mean_squared_error
macd_performance,1365.581598,101.277877,36.953777,10.063691
macd_signal_performance,1283.728022,125.51348,35.829147,11.203279
macd_hist_performance,1304.552247,265.231765,36.118587,16.285936
middle_bband_performance,1568.428488,402.534807,39.60339,20.063271
lower_bband_performance,1401.959678,426.2295,37.442749,20.645327
rsi_performance,1289.459514,484.338536,35.909046,22.007692
k_performance,1146.817463,580.410933,33.864693,24.091719
ema_12_performance,1699.991889,717.158242,41.230957,26.77981
ema_26_performance,868.935446,1155.913964,29.477711,33.998734
upper_bband_performance,1309.53208,1209.742307,36.187458,34.781349


## Prueba utilizando todos los indicadores como inputs

In [137]:
df_min = df[columns]

x, y = utils.create_windowed_dataset(df_min, df['close'].to_numpy(), WINDOW)
x_train, x_test, y_train, y_test = utils.train_test_split(P_TRAIN, x, y)

_, x_input, y_input = x.shape
model = get_model(x_input, y_input)

x_train = np.expand_dims(x_train, axis=3)
x_test = np.expand_dims(x_test, axis=3)

history = model.fit(x_train, 
                y_train, 
                validation_data=(x_test,y_test), 
                batch_size=BATCH_SIZE, 
                epochs=EPOCHS)

Train on 376 samples, validate on 94 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100


Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100


Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
