In [2]:
import sys
import subprocess
import pkg_resources
import os

sys.path.append(os.path.abspath('../scripts'))

from myFunctions import install_packages, save_table 
install_packages()

import pandas as pd
import numpy as np
from tabulate import tabulate
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('display.max_columns', None)


Installing required packages: ['numpy', 'pandas', 'scikit-learn', 'joblib', 'pyarrow', 'fastparquet', 'plotly', 'matplotlib', 'seaborn', 'MetaTrader5', 'tabulate', 'optuna', 'torch', 'tqdm', 'shap', 'kaleido', 'statsmodels', 're']
numpy is already installed.
pandas is already installed.
scikit-learn is already installed.
joblib is already installed.
pyarrow is already installed.
fastparquet is already installed.
plotly is already installed.
matplotlib is already installed.
Installing seaborn...
MetaTrader5 is already installed.
tabulate is already installed.
optuna is already installed.
torch is already installed.
tqdm is already installed.
shap is already installed.
kaleido is already installed.
Installing statsmodels...
Installing re...
Error installing re: Command '['c:\\Users\\guitz\\anaconda3\\envs\\pytorch_env\\python.exe', '-m', 'pip', 'install', 're']' returned non-zero exit status 1.
All packages are verified.


In [3]:

### folders 
input_dir = os.path.join('..', 'data', 'features')
output_dir = os.path.join('..', 'data', 'target')


In [4]:
df = pd.read_parquet(f'{input_dir}/features.parquet')
print('df shape', df.shape)


df shape (22356, 169)


In [6]:
def generate_targets(df, asset, timeframe=None):
    """
    Generate targets based on the closing and opening prices of the specified asset.
    
    Parameters:
    df (pd.DataFrame): DataFrame containing asset data.
    asset (str): Name of the asset for which to calculate the targets.
    timeframe (str): Timeframe for grouping data. If 'day', group by day; 
                     otherwise, calculate targets for all timestamps.
    
    Returns:
    pd.DataFrame: DataFrame with opening and closing prices, and targets.

    Example:
    With `timeframe='day'`:
    | time                | open_BGI$ | close_BGI$ |
    |---------------------|-----------|------------|
    | 2024-12-01 09:00:00 | 323.57    | 322.07     |
    | 2024-12-01 17:45:00 | 313.94    | 288.87     |
    | 2024-12-02 09:00:00 | 318.39    | 287.36     |

    Returns:
    | day        | open_BGI$ | close_BGI$ | close_price_target | open_price_target | behavior_target |
    |------------|-----------|------------|--------------------|-------------------|-----------------|
    | 2024-12-01 | 323.57    | 288.87     | 287.36             | 318.39            | 0               |
    | 2024-12-02 | 318.39    | 287.36     | ...                | ...               | ...             |
    
    With `timeframe=None`:
    | time                | open_BGI$ | close_BGI$ |
    |---------------------|-----------|------------|
    | 2024-12-01 09:00:00 | 323.57    | 322.07     |
    | 2024-12-01 17:45:00 | 313.94    | 288.87     |
    | 2024-12-02 09:00:00 | 318.39    | 287.36     |

    Returns:
    | time                | open_BGI$ | close_BGI$ | close_price_target | open_price_target | behavior_target |
    |---------------------|-----------|------------|--------------------|-------------------|-----------------|
    | 2024-12-01 09:00:00 | 323.57    | 322.07     | 288.87             | 313.94            | 0               |
    | 2024-12-01 17:45:00 | 313.94    | 288.87     | 287.36             | 318.39            | 0               |
    """
    close_col = f'close_{asset}'
    open_col = f'open_{asset}'
    
    if close_col not in df.columns or open_col not in df.columns:
        raise KeyError(f'Columns for {asset} ({open_col}, {close_col}) not found in the DataFrame')

    if timeframe == 'day':
        df['day'] = pd.to_datetime(df['time']).dt.date
        aux_open = df.groupby('day').first()[[open_col]]
        aux_close = df.groupby('day').last()[[close_col]]
        target_df = pd.concat([aux_open, aux_close], axis=1)
        target_df['close_price_target'] = target_df[close_col].shift(-1)
        target_df['open_price_target'] = target_df[open_col].shift(-1)
        target_df['behavior_target'] = (target_df['close_price_target'] > target_df[close_col]).astype(int)
    else:
        target_df = df[['time', open_col, close_col]].copy()
        target_df['close_price_target'] = target_df[close_col].shift(-1)
        target_df['open_price_target'] = target_df[open_col].shift(-1)
        target_df['behavior_target'] = (target_df['close_price_target'] > target_df[close_col]).astype(int)

    return target_df

In [7]:
target_day_df = generate_targets(df, asset='BGI$', timeframe='day')

In [8]:
target_day_df.behavior_target.value_counts(normalize=True)

behavior_target
0    0.52496
1    0.47504
Name: proportion, dtype: float64

In [11]:
target_df = generate_targets(df, asset='BGI$')

In [12]:
target_validation = df[['time','open_BGI$', 'close_BGI$']]


In [15]:
target_validation.head()


Unnamed: 0,time,open_BGI$,close_BGI$
96,2022-06-02 09:00:00,320.52,320.33
97,2022-06-02 09:15:00,319.73,319.29
98,2022-06-02 09:30:00,319.73,319.29
99,2022-06-02 09:45:00,319.98,319.34
100,2022-06-02 10:00:00,319.39,320.13


In [16]:
target_day_df.head()

Unnamed: 0_level_0,open_BGI$,close_BGI$,close_price_target,open_price_target,behavior_target
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-06-02,320.52,325.46,332.23,325.07,1
2022-06-03,325.07,332.23,331.88,331.88,0
2022-06-06,331.88,331.88,330.89,331.49,0
2022-06-07,331.49,330.89,330.8,331.88,0
2022-06-08,331.88,330.8,332.82,331.34,1


In [17]:
os.makedirs(output_dir, exist_ok=True)
target_df.to_parquet(f'{output_dir}/timestamp_target.parquet')
target_day_df.to_parquet(f'{output_dir}/daily_target.parquet')

In [18]:
save_table(target_day_df.head(6), title = 'Exemplo do Target diário para o fechamento, abertura e comportamento do mercado')
save_table(target_df.head(6), title = 'Exemplo do Target timestamp para o fechamento, abertura e comportamento do mercado')

Table saved as CSV: ..\results\tables\csv\Tabela_3_Exemplo do Target diário para o fechamento, abertura e comportamento do mercado.csv
Table saved as CSV: ..\results\tables\csv\Tabela_2_Exemplo do Target timestamp para o fechamento, abertura e comportamento do mercado.csv
