In [1]:
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Attention, Conv1D, Flatten, Dropout

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.set_visible_devices(gpus[0], 'GPU')  # Utilisez uniquement le premier GPU
        print("GPU is being used")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU available")

No GPU available


In [3]:
F_LIST_TO_NORM = ['volume', 'rsi', 'macd', 'macd_signal', 'macd_hist', 'sma_50', 'bbl', 'bbm', 'bbu', 'bbb', 'bbp']
F_LIST = ['open','high','low','close','volume', 'rsi', 'macd', 'macd_signal', 'macd_hist', 'sma_50', 'bbl', 'bbm', 'bbu', 'bbb', 'bbp']
NB_DATA = 1_000_000
WIN_LEN = 100
PRED_LEN = 10
LEARNING_RATE = 0.001

In [54]:
def preprocess_data():
    print('-- Start preprocess_data')
    data = pd.read_csv(os.path.join('data', 'ohlc_and_indicators_1s.csv')).tail(NB_DATA)
    data.fillna(method='ffill', inplace=True)
    scaler = MinMaxScaler()
    features = F_LIST_TO_NORM
    data[features] = scaler.fit_transform(data[features])
    data.drop(columns=['date'], inplace=True)
    print('-- End preprocess_data')
    return data

In [55]:
data = preprocess_data()
print(data)

-- Start preprocess_data


  data.fillna(method='ffill', inplace=True)


-- End preprocess_data
               open       high        low      close    volume       rsi  \
48999950  106100.56  106100.56  106100.56  106100.56  0.000490  0.560402   
48999951  106100.56  106121.10  106100.55  106121.10  0.005376  0.779712   
48999952  106121.10  106139.99  106121.10  106139.99  0.009149  0.852574   
48999953  106139.99  106149.99  106139.99  106149.98  0.003071  0.875948   
48999954  106149.99  106165.24  106149.98  106165.24  0.003148  0.901614   
...             ...        ...        ...        ...       ...       ...   
49999945   94311.76   94311.76   94311.76   94311.76  0.000001  0.265609   
49999946   94311.76   94311.76   94311.76   94311.76  0.000003  0.265609   
49999947   94311.75   94311.76   94311.75   94311.76  0.000009  0.265609   
49999948   94311.76   94311.76   94311.75   94311.75  0.000037  0.265537   
49999949   94311.76   94311.76   94311.76   94311.76  0.000002  0.265751   

              macd  macd_signal  macd_hist    sma_50       bbl  

In [58]:
def create_sequences(data, input_length=100, output_length=10):
    sequences = []
    labels = []
    for i in range(len(data) - input_length - output_length):
        seq = data.iloc[i:i+input_length].values
        label = data.iloc[i+input_length:i+input_length+output_length]['close'].values
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

In [59]:
X, y  = create_sequences(data, WIN_LEN, PRED_LEN)

ValueError: cannot reshape array of size 1499835000 into shape (999890,100,1)

In [60]:
print(f"X shape: {X.shape}, y shape: {y.shape}")
print(f"First sequence shape: {X[0].shape}, First label shape: {y[0].shape}")

X shape: (999890, 100, 15), y shape: (999890, 10)
First sequence shape: (100, 15), First label shape: (10,)


In [61]:
model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(WIN_LEN, len(F_LIST))),
    GRU(256, return_sequences=True),
    GRU(128),
    Dense(256, activation='relu'),
    Dropout(0.2),
    Dense(512, activation='relu'),
    Dropout(0.2),
    Dense(256, activation='relu'),
    Dropout(0.2),
    Dense(PRED_LEN)
])

optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [62]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.1, patience=3, verbose=1
    )

history =  model.fit(
    X_train,
    y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    callbacks=[lr_callback, early_stopping],
    batch_size=256)

Epoch 1/10
[1m  88/3125[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m47:15[0m 934ms/step - loss: 8478554112.0000 - mae: 90256.9766

KeyboardInterrupt: 