In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Ensure 'date' column is in datetime format
df['date'] = pd.to_datetime(df['date'])

# Extract time-based features
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['hour'] = df['date'].dt.hour
df['minute'] = df['date'].dt.minute

# Standardize 'currency_name' to lowercase for consistency
df['currency_name'] = df['currency_name'].str.lower()

# Apply label encoding to the 'currency_name' column
label_encoder = LabelEncoder()
df['currency_name_encoded'] = label_encoder.fit_transform(df['currency_name'])

# Create lag features for 'low', 'high', 'close', and 'open' prices (7, 14, 30, 60 days)
lags = [7, 14, 30, 60]
for lag in lags:
    df[f'low_lag_{lag}'] = df['low'].shift(lag)
    df[f'high_lag_{lag}'] = df['high'].shift(lag)
    df[f'close_lag_{lag}'] = df['close'].shift(lag)
    df[f'open_lag_{lag}'] = df['open'].shift(lag)

# Create rolling mean features for 'low', 'high', 'close', and 'open' prices (7, 14, 30, 60 days)
for window in [7, 14, 30, 60]:
    df[f'low_rollmean_{window}'] = df['low'].rolling(window=window).mean()
    df[f'high_rollmean_{window}'] = df['high'].rolling(window=window).mean()
    df[f'close_rollmean_{window}'] = df['close'].rolling(window=window).mean()
    df[f'open_rollmean_{window}'] = df['open'].rolling(window=window).mean()

# Create price difference features (e.g., 'high' - 'low', 'close' - 'open')
df['high_low_diff'] = df['high'] - df['low']
df['close_open_diff'] = df['close'] - df['open']
df['close_high_diff'] = df['close'] - df['high']
df['close_low_diff'] = df['close'] - df['low']

# Create percentage change features for 'high', 'low', 'close', and 'open' prices (daily returns)
df['high_pct_change'] = df['high'].pct_change() * 100
df['low_pct_change'] = df['low'].pct_change() * 100
df['close_pct_change'] = df['close'].pct_change() * 100
df['open_pct_change'] = df['open'].pct_change() * 100

# Create rolling statistics (e.g., standard deviation) for 'high', 'low', 'close', and 'open' prices
for window in [7, 14, 30, 60]:
    df[f'low_rollstd_{window}'] = df['low'].rolling(window=window).std()
    df[f'high_rollstd_{window}'] = df['high'].rolling(window=window).std()
    df[f'close_rollstd_{window}'] = df['close'].rolling(window=window).std()
    df[f'open_rollstd_{window}'] = df['open'].rolling(window=window).std()

# Drop rows with missing values (due to lag, rolling features, or percentage changes)
df.dropna(inplace=True)

# Show the first few rows to verify
print(df.head())
