# Imports

In [23]:
import numpy as np
import pandas as pd
from data.features.feature_generator import FeatureGenerator

# Creating dummy/test dataset

In [24]:
# Create hourly data with different column names
hourly_dates = pd.date_range(start='2023-01-01', periods=500, freq='1h')
hourly_data = pd.DataFrame({
    'timestamp': hourly_dates,
    'price_open': np.random.normal(100, 2, 500).cumsum(),
    'price_high': np.random.normal(101, 2, 500).cumsum(),
    'price_low': np.random.normal(99, 2, 500).cumsum(),
    'price_close': np.random.normal(100.5, 2, 500).cumsum(),
    'volume': np.random.randint(100, 500, 500)
})
print(hourly_data)

              timestamp    price_open    price_high     price_low  \
0   2023-01-01 00:00:00    100.042821    100.917904     95.492034   
1   2023-01-01 01:00:00    202.410944    202.428671    194.954177   
2   2023-01-01 02:00:00    297.890707    304.937299    295.927559   
3   2023-01-01 03:00:00    397.570245    408.536300    396.753059   
4   2023-01-01 04:00:00    500.973093    509.633398    494.090963   
..                  ...           ...           ...           ...   
495 2023-01-21 15:00:00  49593.093729  50076.411103  49117.332036   
496 2023-01-21 16:00:00  49694.818441  50178.273639  49213.794050   
497 2023-01-21 17:00:00  49794.048929  50280.814493  49313.695481   
498 2023-01-21 18:00:00  49893.876014  50380.065132  49414.614940   
499 2023-01-21 19:00:00  49992.765277  50477.912313  49512.747029   

      price_close  volume  
0      103.004401     492  
1      203.383247     399  
2      301.631292     450  
3      399.588174     286  
4      501.657793     446  
.. 

In [25]:
# Create generator with custom column names
custom_generator = FeatureGenerator(
    price_cols=['price_open', 'price_high', 'price_low', 'price_close'],
    volume_col='volume',
    timestamp_col='timestamp',
    preserve_original_case=True
)

# Generate features for hourly data
result = custom_generator.transform(hourly_data)

# Check that features were created despite different column names
expected_features = [
    'sma_5', 'ema_10', 'rsi_14', 'macd',
    'atr_14', 'volatility_10',
    'doji', 'hammer',
    'day_of_week', 'hour_of_day'
]

print(result.columns)

Index(['timestamp', 'price_open', 'price_high', 'price_low', 'price_close',
       'volume', 'day_of_week', 'hour_of_day', 'month', 'quarter',
       'asian_session', 'european_session', 'us_session', 'market_overlap',
       'day_0', 'day_1', 'day_2', 'day_3', 'day_4'],
      dtype='object')
