In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LayerNormalization, MultiHeadAttention, Dropout, Dense, Reshape, Flatten
from sklearn.preprocessing import MinMaxScaler
from ib_insync import *
import requests
import datetime
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

In [2]:
util.startLoop()

In [3]:
ib = IB()
ib.connect('127.0.0.1', 7497, clientId=1)  # Ajusta según tu configuración


<IB connected to 127.0.0.1:7497 clientId=1>

In [4]:
# Define el par de divisas
symbol = Forex('GBPUSD')

# Obtén los datos de los últimos 30 días en velas de 5 minutos
end_date = datetime.datetime.now()
start_date = end_date - datetime.timedelta(days=30)

bars = ib.reqHistoricalData(
    symbol,
    endDateTime=end_date,
    durationStr="30 D",
    barSizeSetting="5 mins",
    whatToShow="MIDPOINT",
    useRTH=True
)

# Convertir los datos a DataFrame
df = pd.DataFrame(bars)
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)
print(df)

                               open      high       low     close  volume  \
date                                                                        
2024-10-09 16:15:00-05:00  1.307050  1.307260  1.306930  1.307150    -1.0   
2024-10-09 16:20:00-05:00  1.307150  1.307185  1.307030  1.307030    -1.0   
2024-10-09 16:25:00-05:00  1.307030  1.307480  1.307020  1.307215    -1.0   
2024-10-09 16:30:00-05:00  1.307215  1.307215  1.307025  1.307080    -1.0   
2024-10-09 16:35:00-05:00  1.307080  1.307100  1.306860  1.306895    -1.0   
...                             ...       ...       ...       ...     ...   
2024-11-19 16:45:00-05:00  1.268515  1.268520  1.268385  1.268390    -1.0   
2024-11-19 16:50:00-05:00  1.268390  1.268405  1.268135  1.268230    -1.0   
2024-11-19 16:55:00-05:00  1.268230  1.268280  1.268200  1.268235    -1.0   
2024-11-19 17:15:00-05:00  1.268025  1.268130  1.268015  1.268050    -1.0   
2024-11-19 17:20:00-05:00  1.268050  1.268050  1.268050  1.268050    -1.0   

Cálculo de indicadores técnicos

In [6]:
def calculate_technical_indicators(df):
    df['SMA_10'] = df['close'].rolling(window=10).mean()
    df['SMA_50'] = df['close'].rolling(window=50).mean()
    df['RSI'] = 100 - (100 / (1 + df['close'].pct_change().add(1).rolling(window=14).mean()))
    df['MACD'] = df['close'].ewm(span=12, adjust=False).mean() - df['close'].ewm(span=26, adjust=False).mean()
    df['Volatility'] = df['close'].rolling(window=10).std()
    return df

df = calculate_technical_indicators(df)
df.dropna(inplace=True)


Obtención de datos de noticias con News API

In [7]:
NEWS_API_KEY = 'b35c56d955ee45178c703f7f79c1dfca'

def fetch_news(query, from_date, to_date):
    url = f"https://newsapi.org/v2/everything?q={query}&from={from_date}&to={to_date}&sortBy=popularity&apiKey={NEWS_API_KEY}"
    response = requests.get(url)
    data = response.json()
    return pd.DataFrame(data['articles'])

news_df = fetch_news('GBP USD forex', start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))



Procesa las noticias para extraer su polaridad:

In [9]:
# Vectorizar y transformar los títulos de las noticias
vectorizer = CountVectorizer(stop_words='english')
tfidf = TfidfTransformer()

# Ajustar la matriz TF-IDF
news_tfidf = tfidf.fit_transform(vectorizer.fit_transform(news_df['title']))
news_scores = news_tfidf.sum(axis=1).A.flatten()

# Crear una columna 'news_sentiment' sincronizada con el índice de df
# Asegúrate de unir las noticias con las fechas en df para que los índices coincidan
news_df['date'] = pd.to_datetime(news_df['publishedAt']).dt.date  # Ajusta según el formato real de las fechas
news_sentiment = pd.DataFrame({
    'date': news_df['date'],
    'sentiment': news_scores
})

# Promediar el sentimiento diario para unirlo con los datos históricos
news_sentiment = news_sentiment.groupby('date').mean()

# Convertir el índice de news_sentiment a pandas DateTimeIndex
news_sentiment.index = pd.to_datetime(news_sentiment.index)

# Asegurar que df tiene un índice de tipo DateTimeIndex para alinear
df.index = pd.to_datetime(df.index)

# Agregar el sentimiento promedio a df usando un join por fecha
df['news_sentiment'] = df.index.map(lambda x: news_sentiment['sentiment'].get(x.date(), 0))


6. Simulación Monte Carlo

In [10]:
def monte_carlo_simulation(data, num_simulations=10):
    synthetic_data = []
    for _ in range(num_simulations):
        noise = np.random.normal(0, 0.02, size=len(data))
        synthetic_data.append(data + noise)
    return np.vstack(synthetic_data)

synthetic_prices = monte_carlo_simulation(df['close'].values, num_simulations=10)
synthetic_df = pd.DataFrame(synthetic_prices.T, columns=[f'sim_{i+1}' for i in range(10)], index=df.index)
df = pd.concat([df, synthetic_df], axis=1)


Modelo Transformer

In [11]:
# Define el modelo Transformer corregido
def create_transformer(input_shape, output_steps):
    inputs = Input(shape=input_shape)  # (sequence_length, features)
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(num_heads=4, key_dim=64)(x, x)
    x = Dropout(0.1)(x)
    x = LayerNormalization(epsilon=1e-6)(x)
    x = Flatten()(x)  # Aplana la salida
    x = Dense(64, activation='relu')(x)
    x = Dense(output_steps * 5, activation='linear')(x)  # Salida lineal para 24 pasos * 5 características
    outputs = Reshape((output_steps, 5))(x)  # Redimensiona a (24, 5)
    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# Simulación de datos de ejemplo
# df debería ser tu DataFrame original con datos financieros
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df.values)

# Crear secuencias de entrada (X) y salida (y)
sequence_length = 60  # Últimas 60 velas como entrada
output_steps = 24     # Predicción para 24 pasos (2 horas de datos de 5 minutos)
X, y = [], []

for i in range(sequence_length, len(scaled_data) - output_steps):
    X.append(scaled_data[i - sequence_length:i])  # Ventana de entrada
    y.append(scaled_data[i:i + output_steps, :5])  # Ventana de salida (5 características)

X, y = np.array(X), np.array(y)

In [14]:
# Crear y entrenar el modelo
model = create_transformer(X.shape[1:], output_steps)

history = model.fit(
    X, y,
    epochs=25,
    batch_size=32,
    validation_split=0.1
)

Epoch 1/25
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - loss: 0.2002 - mae: 0.3080 - val_loss: 0.0026 - val_mae: 0.0392
Epoch 2/25
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - loss: 0.0057 - mae: 0.0580 - val_loss: 0.0019 - val_mae: 0.0333
Epoch 3/25
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - loss: 0.0025 - mae: 0.0375 - val_loss: 0.0016 - val_mae: 0.0301
Epoch 4/25
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - loss: 0.0017 - mae: 0.0309 - val_loss: 0.0012 - val_mae: 0.0253
Epoch 5/25
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - loss: 0.0014 - mae: 0.0275 - val_loss: 0.0013 - val_mae: 0.0248
Epoch 6/25
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - loss: 0.0012 - mae: 0.0259 - val_loss: 0.0015 - val_mae: 0.0252
Epoch 7/25
[1m228/228[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/

In [None]:
# Crear un DataFrame con las predicciones
# Asegúrate de que las columnas coincidan con las 5 características (open, high, low, close, sentiment)
result_df = pd.DataFrame( columns=['open', 'high', 'low', 'close', 'news_sentiment'])

# Generar las fechas para las predicciones (24 predicciones cada 5 minutos)
predicted_dates = pd.date_range(start=end_date.replace(hour=11, minute=0), periods=24, freq='5T')

# Agregar las fechas al DataFrame
result_df['date'] = predicted_dates

# Mostrar las predicciones
print(result_df)


   open high  low close news_sentiment                       date
0   NaN  NaN  NaN   NaN            NaN 2024-11-19 18:00:07.515273
1   NaN  NaN  NaN   NaN            NaN 2024-11-19 18:05:07.515273
2   NaN  NaN  NaN   NaN            NaN 2024-11-19 18:10:07.515273
3   NaN  NaN  NaN   NaN            NaN 2024-11-19 18:15:07.515273
4   NaN  NaN  NaN   NaN            NaN 2024-11-19 18:20:07.515273
5   NaN  NaN  NaN   NaN            NaN 2024-11-19 18:25:07.515273
6   NaN  NaN  NaN   NaN            NaN 2024-11-19 18:30:07.515273
7   NaN  NaN  NaN   NaN            NaN 2024-11-19 18:35:07.515273
8   NaN  NaN  NaN   NaN            NaN 2024-11-19 18:40:07.515273
9   NaN  NaN  NaN   NaN            NaN 2024-11-19 18:45:07.515273
10  NaN  NaN  NaN   NaN            NaN 2024-11-19 18:50:07.515273
11  NaN  NaN  NaN   NaN            NaN 2024-11-19 18:55:07.515273
12  NaN  NaN  NaN   NaN            NaN 2024-11-19 19:00:07.515273
13  NaN  NaN  NaN   NaN            NaN 2024-11-19 19:05:07.515273
14  NaN  N

  predicted_dates = pd.date_range(start=end_date.replace(hour=18, minute=00), periods=24, freq='5T')


In [18]:
ib.disconnect()