In [13]:
import pandas as pd

# Load the cleaned data
df = pd.read_csv("../data/processed/cleaned_crypto_price.csv")

# Drop rows with missing values in important columns
df = df.dropna(subset=['price', '1h', '24h', '7d', '24h_volume', 'mkt_cap'])

# Feature Engineering
df['price_change_1h'] = df['price'] * df['1h'] / 100
df['price_change_24h'] = df['price'] * df['24h'] / 100
df['price_change_7d'] = df['price'] * df['7d'] / 100
df['volatility_24h'] = df[['1h', '24h']].std(axis=1)
df['market_cap_to_volume_ratio'] = df['mkt_cap'] / df['24h_volume']
df['is_stablecoin'] = df['symbol'].str.lower().isin(['usdt', 'usdc', 'busd', 'dai', 'tusd']).astype(int)

# One-hot encoding for 'symbol'
symbol_dummies = pd.get_dummies(df['symbol'], prefix='symbol')
df = pd.concat([df, symbol_dummies], axis=1)

# Drop the raw % change columns
df = df.drop(columns=['1h', '24h', '7d'])

# Reorder for clarity
cols_to_front = ['date', 'coin', 'symbol', 'price', 'price_change_1h', 'price_change_24h', 'price_change_7d',
                 'volatility_24h', 'market_cap_to_volume_ratio', 'is_stablecoin']
remaining_cols = [col for col in df.columns if col not in cols_to_front]
df = df[cols_to_front + remaining_cols]

# Save to CSV
output_path = "../data/processed/features.csv"
df.to_csv(output_path, index=False)

print(f"✅ Features saved to {output_path}")


✅ Features saved to ../data/processed/features.csv
