# Imports

In [2]:
import numpy as np
import pandas as pd
from data.features.feature_generator import FeatureGenerator

# Creating dummy/test dataset

In [8]:
dates = pd.date_range(start='2023-01-01', periods=250, freq='1D')

np.random.seed(42)  # For reproducibility
sample_data = pd.DataFrame({
    'date': dates,
    'open': np.random.normal(100, 5, 250).cumsum(),
    'High': np.random.normal(102, 5, 250).cumsum(),
    'Low': np.random.normal(98, 5, 250).cumsum(),
    'Close': np.random.normal(101, 5, 250).cumsum(),
    'Volume': np.random.randint(1000, 5000, 250)
})

print(sample_data)

          date          open          High           Low         Close  Volume
0   2023-01-01    102.483571     95.695580    102.630888     96.383834    1886
1   2023-01-02    201.792249    202.284890    210.177971    190.625411    1427
2   2023-01-03    305.030692    314.895671    301.185133    286.746044    2328
3   2023-01-04    412.645841    422.057997    401.999979    393.014253    2895
4   2023-01-05    511.475074    516.461147    496.746766    489.267259    2994
..         ...           ...           ...           ...           ...     ...
245 2023-09-03  24591.760563  25129.553631  24036.762411  24997.537191    2184
246 2023-09-04  24689.385836  25226.367400  24135.696245  25107.525623    2008
247 2023-09-05  24786.119190  25327.415707  24229.919331  25211.729838    3738
248 2023-09-06  24894.946461  25425.037615  24324.861742  25309.873943    2229
249 2023-09-07  24996.971370  25520.123617  24415.828436  25413.736857    2035

[250 rows x 6 columns]


In [10]:
generator_preserve = FeatureGenerator(preserve_original_case=True)
result_preserve = generator_preserve.transform(sample_data)
# Check that original column names are preserved
print(result_preserve.columns)

# Without case preservation
generator_no_preserve = FeatureGenerator(preserve_original_case=False)
result_no_preserve = generator_no_preserve.transform(sample_data)
# # Check that columns are lowercase
print(result_no_preserve.columns)

Index(['date', 'open', 'High', 'Low', 'Close', 'Volume', 'sma_5', 'ema_5',
       'sma_10', 'ema_10', 'sma_20', 'ema_20', 'sma_50', 'ema_50', 'sma_200',
       'ema_200', 'rsi_14', 'macd', 'macd_signal', 'macd_hist',
       'bollinger_upper', 'bollinger_lower', 'bollinger_middle', 'roc_1',
       'roc_5', 'roc_10', 'roc_20', 'stoch_k', 'stoch_d', 'atr_14',
       'volatility_5', 'volatility_10', 'volatility_20', 'volatility_30',
       'gk_volatility', 'normalized_atr', 'doji', 'hammer',
       'bullish_engulfing', 'bearish_engulfing', 'resistance_10', 'support_10',
       'dist_to_resistance_10', 'dist_to_support_10', 'resistance_20',
       'support_20', 'dist_to_resistance_20', 'dist_to_support_20',
       'price_accel', 'day_of_week', 'hour_of_day', 'month', 'quarter',
       'asian_session', 'european_session', 'us_session', 'market_overlap',
       'day_0', 'day_1', 'day_2', 'day_3', 'day_4'],
      dtype='object')
Index(['date', 'open', 'High', 'Low', 'Close', 'Volume', 'day_of_