In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib

def create_and_save_time_scaler(data_path, scaler_path, year):
    # Load data
    ohlcv_data = pd.read_csv(data_path, parse_dates=["time"])
    ohlcv_data.set_index("time", inplace=True)
    
    # Filter for the specified year
    ohlcv_data = ohlcv_data[ohlcv_data.index.year == year]
    
    # Generate time features
    time_features = pd.DataFrame({
        'day_of_week': ohlcv_data.index.dayofweek,
        'hour': ohlcv_data.index.hour,
        'minute': ohlcv_data.index.minute
    })
    
    # Fit scaler
    ts_scaler = StandardScaler()
    ts_scaler.fit(time_features)
    
    print(ts_scaler)
    
    # Save scaler
    joblib.dump(ts_scaler, scaler_path)
    
    return ts_scaler

# Usage
data_path = "/projects/genomic-ml/da2343/ml_project_2/data/gen_oanda_data/GBP_USD_M15_raw_data.csv"
scaler_path = "time_standard_scaler_2018.joblib"
year = 2018  # Specify the year you want to process

ts_scaler = create_and_save_time_scaler(data_path, scaler_path, year)

# You can now use scaled_features in your main script
# print(scaled_features)