Libreries

In [1]:
import datetime as dt
from datetime import timedelta
import numpy as np
import pandas as pd
from ib_insync import IB, Forex, util
from sentence_transformers import SentenceTransformer
from transformers import TFAutoModel, AutoTokenizer
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, Add, MultiHeadAttention
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import requests
import tensorflow as tf




In [2]:
util.startLoop()

In [3]:
# Paso 1: Conexión a Interactive Brokers
ib = IB()
ib.connect('127.0.0.1', 7497, clientId=1)

<IB connected to 127.0.0.1:7497 clientId=1>

In [4]:
# Función para obtener datos históricos
def fetch_forex_data(symbol, duration='30 D', bar_size='5 mins'):
    contract = Forex(symbol)
    ib.qualifyContracts(contract)
    bars = ib.reqHistoricalData(
        contract,
        endDateTime='',
        durationStr=duration,
        barSizeSetting=bar_size,
        whatToShow='MIDPOINT',
        useRTH=True
    )
    df = util.df(bars)
    df['date'] = pd.to_datetime(df['date'])
    return df

forex_data = fetch_forex_data('GBPUSD')



In [5]:
# Paso 2: Obtener noticias relevantes y análisis de sentimiento con embeddings
def fetch_news_and_embeddings(api_key, query='GBP USD'):
    url = f'https://newsapi.org/v2/everything?q={query}&from={dt.datetime.now() - dt.timedelta(hours=1)}&sortBy=publishedAt&apiKey={api_key}'
    response = requests.get(url)
    news_data = response.json()
    articles = [article['title'] for article in news_data.get('articles', [])]
    
    # Modelo de embeddings para noticias
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    embeddings = model.encode(articles)
    return np.mean(embeddings, axis=0) if embeddings.size else np.zeros(384)

news_api_key = 'b35c56d955ee45178c703f7f79c1dfca'
news_embeddings = fetch_news_and_embeddings(news_api_key)

In [6]:
# Paso 3: Calcular indicadores técnicos avanzados
def calculate_indicators(data):
    data['SMA'] = data['close'].rolling(window=14).mean()
    data['RSI'] = 100 - (100 / (1 + data['close'].pct_change().rolling(window=14).mean()))
    data['MACD'] = data['close'].ewm(span=12, adjust=False).mean() - data['close'].ewm(span=26, adjust=False).mean()
    data['Ichimoku'] = (data['high'].rolling(9).max() + data['low'].rolling(9).min()) / 2
    data['Momentum'] = data['close'].diff(10)
    return data

forex_data = calculate_indicators(forex_data)

In [7]:
# Paso 4: Simulación Monte Carlo Mejorada
def monte_carlo_simulation(data, num_simulations=100):
    returns = data['close'].pct_change().dropna()
    results = []
    for _ in range(num_simulations):
        simulated_prices = [data['close'].iloc[-1]]
        for _ in range(len(data)):
            simulated_prices.append(simulated_prices[-1] * (1 + np.random.choice(returns)))
        results.append(simulated_prices)
    return np.array(results)

simulated_data = monte_carlo_simulation(forex_data)

In [8]:
# Paso 5: K-means clustering con manejo de desalineación
def apply_kmeans(data, n_clusters=4):
    scaler = StandardScaler()

    # Escalar características seleccionadas
    filtered_data = data[['close', 'SMA', 'RSI', 'MACD', 'Ichimoku', 'Momentum']].dropna()
    features = scaler.fit_transform(filtered_data)

    # Aplicar K-means clustering
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(features)

    # Asegurar alineación con el índice original
    data = data.copy()
    data['Cluster'] = np.nan
    data.loc[filtered_data.index, 'Cluster'] = clusters

    return data

# Aplicar K-means clustering al DataFrame
forex_data = apply_kmeans(forex_data)


In [11]:
from tensorflow.keras.layers import Reshape

In [12]:
# Paso 6: Construcción del modelo Transformer corregido y mejorado
def build_advanced_transformer(input_shape_tabular, embedding_dim):
    # Entrada de datos tabulares
    tabular_input = Input(shape=input_shape_tabular, name="tabular_input")
    tabular_projected = Dense(embedding_dim, activation='linear', name="project_tabular")(tabular_input)
    tabular_projected = Reshape((1, embedding_dim))(tabular_projected)  # Asegurar forma 3D

    # Entrada de embeddings de texto
    text_embedding = Input(shape=(1, embedding_dim), name="text_embedding")  # Directamente 3D

    # Atención cruzada
    attention_layer = MultiHeadAttention(num_heads=4, key_dim=embedding_dim, name="cross_attention")
    cross_attention = attention_layer(query=tabular_projected, value=text_embedding, key=text_embedding)

    # Residual y normalización
    x = Add()([tabular_projected, cross_attention])
    x = LayerNormalization()(x)

    # Transformaciones adicionales
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = LayerNormalization()(x)

    # Salida final
    x = Dense(4, activation='linear', name="output")(x)  # open, high, low, close
    output_layer = Reshape((4,))(x)  # Convertir a 2D para la salida final

    # Construcción del modelo
    return Model(inputs=[tabular_input, text_embedding], outputs=output_layer)

# Simulación de datos tabulares y embeddings
num_samples = 500  # Ajusta según tu dataset real
tabular_features = ['SMA', 'RSI', 'MACD', 'Ichimoku', 'Momentum', 'Cluster']
embedding_dim = 384

# Simulación de datos tabulares
forex_data = np.random.rand(num_samples, len(tabular_features))
scaler = MinMaxScaler()
tabular_data = scaler.fit_transform(forex_data)

# Simulación de embeddings de texto
news_embeddings = np.random.rand(num_samples, embedding_dim)
news_embeddings = np.expand_dims(news_embeddings, axis=1)  # Ajustar a (None, 1, embedding_dim)

# Verificar formas
print(f"Forma de tabular_data: {tabular_data.shape}")
print(f"Forma de news_embeddings: {news_embeddings.shape}")

# Crear modelo
model = build_advanced_transformer(input_shape_tabular=(tabular_data.shape[1],), embedding_dim=embedding_dim)

# Compilar modelo
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss='mse')

# Generar datos ficticios para la salida esperada `y`
y = np.random.rand(len(tabular_data), 4)  # Datos de ejemplo para entrenamiento

# Ajustar el tamaño del lote para evitar desajustes
batch_size = 32
num_samples = (len(tabular_data) // batch_size) * batch_size  # Asegurar múltiplo del batch size
tabular_data = tabular_data[:num_samples]
news_embeddings = news_embeddings[:num_samples]
y = y[:num_samples]

# Entrenar el modelo
history = model.fit(
    [tabular_data, news_embeddings], 
    y, 
    epochs=20,  # Cambia según tus necesidades
    batch_size=batch_size, 
    validation_split=0.2
)

# Verificar la arquitectura del modelo
model.summary()

Forma de tabular_data: (500, 6)
Forma de news_embeddings: (500, 1, 384)
Epoch 1/20




[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 90ms/step - loss: 2.0601 - val_loss: 0.2701
Epoch 2/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 80ms/step - loss: 0.7496 - val_loss: 0.1256
Epoch 3/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 87ms/step - loss: 0.5814 - val_loss: 0.1378
Epoch 4/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 83ms/step - loss: 0.4364 - val_loss: 0.1067
Epoch 5/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step - loss: 0.3909 - val_loss: 0.1013
Epoch 6/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step - loss: 0.3310 - val_loss: 0.0873
Epoch 7/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step - loss: 0.2799 - val_loss: 0.1020
Epoch 8/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 80ms/step - loss: 0.2537 - val_loss: 0.0963
Epoch 9/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [13]:
from datetime import datetime, timedelta

In [16]:
# Paso 7: Generación de predicciones y ajuste lógico

# Asegurar que las últimas 12 muestras de `news_embeddings` coincidan con las de `tabular_data`
# Seleccionamos las últimas 12 muestras de ambos
tabular_data_sample = tabular_data[-12:]
news_embeddings_sample = news_embeddings[-12:]

# Verificar formas
print(f"Forma de tabular_data_sample: {tabular_data_sample.shape}")
print(f"Forma de news_embeddings_sample: {news_embeddings_sample.shape}")

# Generar predicciones
predictions = model.predict([tabular_data_sample, news_embeddings_sample])

# Ajuste lógico: el `open` de cada vela es el `close` de la vela anterior
for i in range(1, len(predictions)):
    predictions[i][0] = predictions[i - 1][3]

# Generar DataFrame con las predicciones
start_time = datetime.now().replace(hour=9, minute=0, second=0, microsecond=0)
prediction_times = [start_time + timedelta(minutes=5 * i) for i in range(len(predictions))]
predicted_df = pd.DataFrame(predictions, columns=['open', 'high', 'low', 'close'], index=prediction_times)
predicted_df.index.name = 'date'

# Mostrar las predicciones
print("\n--- Predicciones de Precios (DataFrame) ---")
print(predicted_df)

Forma de tabular_data_sample: (12, 6)
Forma de news_embeddings_sample: (12, 1, 384)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step

--- Predicciones de Precios (DataFrame) ---
                         open      high       low     close
date                                                       
2024-12-06 09:00:00  0.477173  0.396058  0.605836  0.561162
2024-12-06 09:05:00  0.561162  0.512960  0.631344  0.613525
2024-12-06 09:10:00  0.613525  0.508869  0.610589  0.605866
2024-12-06 09:15:00  0.605866  0.439185  0.642806  0.580281
2024-12-06 09:20:00  0.580281  0.507694  0.634457  0.593082
2024-12-06 09:25:00  0.593082  0.454068  0.594844  0.612303
2024-12-06 09:30:00  0.612303  0.487758  0.645957  0.574656
2024-12-06 09:35:00  0.574656  0.488366  0.615879  0.569754
2024-12-06 09:40:00  0.569754  0.412815  0.592222  0.587232
2024-12-06 09:45:00  0.587232  0.372919  0.596673  0.589783
2024-12-06 09:50:00  0.589783  0.437669  0.613370  0.578840
2024-12-06 09:55:00

In [17]:
ib.disconnect()