In [6]:
import tech_lib as tech
%load_ext autoreload
%autoreload 2
import itertools
import pandas as pd
import winsound
import os
from datetime import datetime
import json
import torch
print(f'Cuda available: {torch.cuda.is_available()}')
print(f'Cuda version: {torch.version.cuda}')
print(f'Cuda device name: {torch.cuda.get_device_name(0)}')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Cuda available: True
Cuda version: 12.6
Cuda device name: NVIDIA GeForce RTX 4060


In [7]:
# Import dataset
ticker = 'BTCUSDT'
freq = '1m'
start_date_str = datetime(*(2024, 1, 15, 0, 0, 0)).strftime('%Y%m%d')
end_date_str = datetime(*(2025, 1, 15, 0, 0, 0)).strftime('%Y%m%d')

dataset = pd.read_csv(os.getcwd()+f'\\dataset\\\{ticker}-{freq}-{start_date_str}-{end_date_str}.csv', index_col='Open Time')

#Calcul des outputs
dataset['Close_target'] = dataset['Close'].shift(-1)
dataset['DirVar_target'] = pd.Series([1 if diff > 0 else 0 for diff in dataset['Close'].diff(1)], index=dataset.index, dtype=int).shift(-1)

# Boucle pour calculer les indicateurs avec différentes fenêtres
window = 60
dataset[f'cmf_{window}'] = tech.cmf(
    high=dataset.High, 
    low=dataset.Low, 
    close=dataset.Close, 
    volume=dataset.Volume, 
    window=window
)

for window in [60, 120]:
    dataset[f'vwma_{window}'] = tech.vwma(
        close=dataset.Close, 
        volume=dataset.Volume, 
        window=window
    )
    
dataset['obv'] = tech.obv(dataset.Close, dataset.Volume)

dataset = dataset.dropna()

In [8]:
param_grid = {
    "period_size"   : [10000],
    "hidden_size"   : [8],
    "num_layers"    : [2],
    "dropout"       : [0.15],
    "lr"            : [0.001],
    "batch_size"    : [128],
    "epochs"        : [3],
    "num_workers"   : [4],
    "delta"         : [1],
    "weight_decay"  : [1e-4],
    "train_size"    : [0.7],
    "val_size"      : [0.15],
    "test_size"     : [0.15],
    "seq_size"      : [60]
}

inputs = [
    'High', 'Low', 'Open', 'Close',
    'Volume', 'obv', 'cmf_60', 
    'vwma_60', 'vwma_120'
]
min_imputs = 9 # set len(dataset.columns) to disable inputs test
outputs = 'Close_target'

param_combinations = list(itertools.product(*param_grid.values()))
input_combinations = [list(itertools.combinations(inputs, r)) for r in range(min_imputs, len(inputs) + 1)]
input_combinations = [item for sublist in input_combinations for item in sublist]

In [11]:
from gru_lib import GRUModel, Pipeline
%load_ext autoreload
%autoreload 2

counter = 0
for param_comb in param_combinations:
    for input_comb in input_combinations:
        time_lauch = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        print(f'------------- Launch Time: {time_lauch:<10} ------------- Progress: {(counter):,}/{(len(param_combinations)*len(input_combinations)):,} -------------')

        # Extraire les paramètres pour cette combinaison
        params = dict(zip(param_grid.keys(), param_comb))
        inputs = list(input_comb)

        # Garder la période étudiée
        dataset_subset = dataset.iloc[-params["period_size"]:]
        dataset_subset = dataset_subset.dropna()

        # Initialiser le modèle
        model = GRUModel(
            input_size=len(inputs),
            output_size=1,
            hidden_size=params["hidden_size"], 
            num_layers=params["num_layers"],
            dropout=params["dropout"]
        )

        # Initialiser le pipeline
        pipeline = Pipeline(
            model=model, 
            dataset=dataset_subset,
            inputs=inputs,
            outputs=outputs,
            ticker=ticker,
            freq=freq
        )

        # Définir les hyperparamètres
        pipeline.hyper_param(
            lr=params["lr"], 
            batch_size=params["batch_size"], 
            epochs=params["epochs"],
            num_workers=params["num_workers"],
            delta=params["delta"],
            weight_decay=params["weight_decay"]
        )

        # Prétraiter des données
        pipeline.preprocess(
            train_size=params["train_size"],
            val_size=params["val_size"], 
            test_size=params["test_size"], 
            seq_size=params["seq_size"]
        )

        # Entraîner le modèle et évaluer les résultats
        pipeline.train()
        print(f"\nID: {str(pipeline.id)}")
        pipeline.eval()

        # create folder and files
        folder_path = os.getcwd() + '\\model\\' + str(pipeline.id)
        os.makedirs(folder_path, exist_ok=True)

        metadata = pipeline.metadata()
        with open(folder_path + '\\metada.json', 'w', encoding='utf-8') as f:
            json.dump(metadata, f, ensure_ascii=False, indent=4)

        loss = pipeline.loss()
        loss.to_csv(folder_path + '\\loss.csv', index=False)

        pred = pipeline.pred()
        pred.to_csv(folder_path + '\\pred.csv', index=False)

        torch.save(pipeline.model, folder_path + '\\model.pth') # revoir l'optimisation --> sauvegarder les poids et le modèle séparément

        print("\n")
        counter += 1

winsound.PlaySound("SystemExit", winsound.SND_ALIAS)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
------------- Launch Time: 2025-01-26 23:18:42 ------------- Progress: 0/1 -------------
Epoch 1/3, Train Loss: 90,193.467652, Validation Loss: 89,482.664866, Time: 0:00:01.577580
Epoch 2/3, Train Loss: 89,481.785584, Validation Loss: 89,470.215303, Time: 0:00:01.582819
Epoch 3/3, Train Loss: 89,471.027632, Validation Loss: 89,460.639213, Time: 0:00:01.606287

ID: d0af0365-d30c-49ce-8dde-ae200baa7717
Time elapsed: 4.7687
Train Loss: 89,471.027632
Val Loss: 89,460.639213
Test Loss: 89,460.863042
R^2 : 0.5503
R^2 ajusté : 0.5476
MAPE : 0.0064
MSE : 827.954482
RMSE : 28.774198




In [10]:
torch.save(pipeline.model, folder_path + '\\model.pth')