# Feature Engineering
I added new columns that the model can use:
- daily and weekly returns
- volatility
- technical indicators: RSI, SMA, MACD
These features will help the model decide Buy, Hold, or Sell.


In [1]:
import pandas as pd
import ta


In [3]:
# Load raw data
df = pd.read_csv("C:\\Users\\Convenience\\Downloads\\CRYPTO\\crypto-classifier\\data\\raw\\BTCUSDT.csv")


In [4]:
# Returns
df["return_1d"] = df["close"].pct_change()
df["return_7d"] = df["close"].pct_change(7)

In [6]:

# Volatility
df["volatility_7d"] = df["close"].rolling(7).std()


In [7]:
# Technical indicators
df["rsi"] = ta.momentum.RSIIndicator(df["close"]).rsi()
df["sma_20"] = df["close"].rolling(20).mean()
df["sma_50"] = df["close"].rolling(50).mean()
df["sma_200"] = df["close"].rolling(200).mean()
df["macd"] = ta.trend.MACD(df["close"]).macd()


In [9]:
# Drop rows with NaN
df = df.dropna()

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 801 entries, 199 to 999
Data columns (total 20 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   open_time           801 non-null    object 
 1   open                801 non-null    float64
 2   high                801 non-null    float64
 3   low                 801 non-null    float64
 4   close               801 non-null    float64
 5   volume              801 non-null    float64
 6   close_time          801 non-null    int64  
 7   quote_asset_volume  801 non-null    float64
 8   num_trades          801 non-null    int64  
 9   taker_base_volume   801 non-null    float64
 10  taker_quote_volume  801 non-null    float64
 11  ignore              801 non-null    int64  
 12  return_1d           801 non-null    float64
 13  return_7d           801 non-null    float64
 14  volatility_7d       801 non-null    float64
 15  rsi                 801 non-null    float64
 16  sma_20     

In [13]:
# Save feature CSV
df.to_csv("C:\\Users\\Convenience\\Downloads\\CRYPTO\\crypto-classifier\\data\\processed\\BTCUSDT_features.csv", index=False)
print("Feature-engineered data saved!")

Feature-engineered data saved!


In [14]:
df.head()


Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,num_trades,taker_base_volume,taker_quote_volume,ignore,return_1d,return_7d,volatility_7d,rsi,sma_20,sma_50,sma_200,macd
199,2023-09-15,26522.73,26888.0,26224.0,26600.0,26227.29369,1694822399999,695203300.0,654094,12600.39115,334045900.0,0,0.002913,0.026611,490.419472,51.80965,26122.9395,27407.6106,27676.0648,-297.767044
200,2023-09-16,26599.99,26777.0,26445.0,26559.67,13960.93351,1694908799999,370889900.0,526416,6508.91352,172926800.0,0,-0.001516,0.025406,527.213662,51.29701,26145.8345,27352.5212,27693.1553,-241.089637
201,2023-09-17,26559.67,26623.25,26399.0,26527.51,12998.10277,1694995199999,344703700.0,476775,6359.89459,168675600.0,0,-0.001211,0.026542,533.383288,50.864804,26166.21,27296.0134,27707.648,-196.502272
202,2023-09-18,26527.5,27409.0,26377.35,26762.51,43000.43256,1695081599999,1158780000.0,938705,21551.71605,580912500.0,0,0.008859,0.063586,307.013676,53.920059,26118.5185,27245.6418,27724.13395,-140.58337
203,2023-09-19,26762.5,27483.57,26667.79,27210.26,36190.52187,1695167999999,981539300.0,898076,17851.15558,484061300.0,0,0.01673,0.053025,302.373358,59.134034,26114.032,27205.202,27748.41355,-59.452226
