In [2]:
import sys
import subprocess
import pkg_resources
import os

sys.path.append(os.path.abspath('../scripts'))

from myFunctions import install_packages, save_table 
install_packages()

import pandas as pd
import numpy as np
from tabulate import tabulate
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('display.max_columns', None)


Installing required packages: ['numpy', 'pandas', 'scikit-learn', 'joblib', 'pyarrow', 'fastparquet', 'plotly', 'matplotlib', 'MetaTrader5', 'tabulate', 'optuna', 'torch']
numpy is already installed.
pandas is already installed.
scikit-learn is already installed.
joblib is already installed.
pyarrow is already installed.
fastparquet is already installed.
plotly is already installed.
matplotlib is already installed.
MetaTrader5 is already installed.
tabulate is already installed.
optuna is already installed.
torch is already installed.
All packages are verified.


In [3]:

### folders 
input_dir = os.path.join('..', 'data', 'features')
output_dir = os.path.join('..', 'data', 'target')


In [5]:
df = pd.read_parquet(f'{input_dir}/features.parquet')
print('df shape', df.shape)


df shape (22356, 169)


In [6]:
df.columns

Index(['time', 'open_AGFS', 'high_AGFS', 'low_AGFS', 'close_AGFS',
       'tick_volume_AGFS', 'spread_AGFS', 'real_volume_AGFS', 'open_BGI$',
       'high_BGI$',
       ...
       'EMA55_DI1$', 'EMA9_CCM$', 'EMA21_CCM$', 'EMA55_CCM$', 'EMA9_AGFS',
       'EMA21_AGFS', 'EMA55_AGFS', 'EMA9_ICF$', 'EMA21_ICF$', 'EMA55_ICF$'],
      dtype='object', length=169)

In [7]:
def generate_targets(df, asset, timeframe=None):
    """
    Generate targets based on the closing and opening prices of the specified asset.
    
    Parameters:
    df (pd.DataFrame): DataFrame containing asset data.
    asset (str): Name of the asset for which to calculate the targets.
    timeframe (str): Timeframe for grouping data. If 'day', group by day; 
                     otherwise, calculate targets for all timestamps.
    
    Returns:
    pd.DataFrame: DataFrame with opening and closing prices, and targets.

    Example:
    With `timeframe='day'`:
    | time                | open_BGI$ | close_BGI$ |
    |---------------------|-----------|------------|
    | 2024-12-01 09:00:00 | 323.57    | 322.07     |
    | 2024-12-01 17:45:00 | 313.94    | 288.87     |
    | 2024-12-02 09:00:00 | 318.39    | 287.36     |

    Returns:
    | day        | open_BGI$ | close_BGI$ | close_price_target | open_price_target | behavior_target |
    |------------|-----------|------------|--------------------|-------------------|-----------------|
    | 2024-12-01 | 323.57    | 288.87     | 287.36             | 318.39            | 0               |
    | 2024-12-02 | 318.39    | 287.36     | ...                | ...               | ...             |
    
    With `timeframe=None`:
    | time                | open_BGI$ | close_BGI$ |
    |---------------------|-----------|------------|
    | 2024-12-01 09:00:00 | 323.57    | 322.07     |
    | 2024-12-01 17:45:00 | 313.94    | 288.87     |
    | 2024-12-02 09:00:00 | 318.39    | 287.36     |

    Returns:
    | time                | open_BGI$ | close_BGI$ | close_price_target | open_price_target | behavior_target |
    |---------------------|-----------|------------|--------------------|-------------------|-----------------|
    | 2024-12-01 09:00:00 | 323.57    | 322.07     | 288.87             | 313.94            | 0               |
    | 2024-12-01 17:45:00 | 313.94    | 288.87     | 287.36             | 318.39            | 0               |
    """
    close_col = f'close_{asset}'
    open_col = f'open_{asset}'
    
    if close_col not in df.columns or open_col not in df.columns:
        raise KeyError(f'Columns for {asset} ({open_col}, {close_col}) not found in the DataFrame')

    if timeframe == 'day':
        df['day'] = pd.to_datetime(df['time']).dt.date
        aux_open = df.groupby('day').first()[[open_col]]
        aux_close = df.groupby('day').last()[[close_col]]
        target_df = pd.concat([aux_open, aux_close], axis=1)
        target_df['close_price_target'] = target_df[close_col].shift(-1)
        target_df['open_price_target'] = target_df[open_col].shift(-1)
        target_df['behavior_target'] = (target_df['close_price_target'] > target_df[close_col]).astype(int)
    else:
        target_df = df[['time', open_col, close_col]].copy()
        target_df['close_price_target'] = target_df[close_col].shift(-1)
        target_df['open_price_target'] = target_df[open_col].shift(-1)
        target_df['behavior_target'] = (target_df['close_price_target'] > target_df[close_col]).astype(int)

    return target_df

In [8]:
target_day_df = generate_targets(df, asset='BGI$', timeframe='day')

In [9]:
target_day_df.head()

Unnamed: 0_level_0,open_BGI$,close_BGI$,close_price_target,open_price_target,behavior_target
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-06-02,320.52,325.46,332.23,325.07,1
2022-06-03,325.07,332.23,331.88,331.88,0
2022-06-06,331.88,331.88,330.89,331.49,0
2022-06-07,331.49,330.89,330.8,331.88,0
2022-06-08,331.88,330.8,332.82,331.34,1


In [18]:
target_df = generate_targets(df, asset='BGI$')

In [19]:
target_df.head()

Unnamed: 0,time,open_BGI$,close_BGI$,close_price_target,open_price_target,behavior_target
96,2022-06-02 09:00:00,313.94,313.75,312.73,313.17,0
97,2022-06-02 09:15:00,313.17,312.73,312.73,313.17,0
98,2022-06-02 09:30:00,313.17,312.73,312.78,313.41,1
99,2022-06-02 09:45:00,313.41,312.78,313.56,312.83,1
100,2022-06-02 10:00:00,312.83,313.56,313.94,313.89,1


In [20]:
os.makedirs(output_dir, exist_ok=True)
target_df.to_parquet(f'{output_dir}/timestamp_target.parquet')
target_day_df.to_parquet(f'{output_dir}/daily_target.parquet')

In [21]:
save_table(target_day_df.head(6), title = 'Exemplo do Target diário para o fechamento, abertura e comportamento do mercado')
save_table(target_df.head(6), title = 'Exemplo do Target timestamp para o fechamento, abertura e comportamento do mercado')

Tabela saved as CSV: ../results/tables/csv\Tabela_2_Exemplo do Target diário para o fechamento, abertura e comportamento do mercado.csv
Tabela saved as CSV: ../results/tables/csv\Tabela_3_Exemplo do Target timestamp para o fechamento, abertura e comportamento do mercado.csv
