## Calculate Technical Features

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("Cleaned_Crypto.csv", parse_dates=["datetime"])
grouped = df.groupby('symbol')

### High-low

In [2]:
# High–Low Spread 
df['high_low_spread'] = df['high'] - df['low']

### Returns (r_{t-1}, ..., r{t-5})

In [3]:
# Returns (r_{t−1},...,r_{t−5})
for lag in range(1, 6):
    df[f'return_lag_{lag}'] = grouped['close'].pct_change(periods=lag)

### Moving Average

In [4]:
# Moving Average (5-hour)
df['ma_5'] = grouped['close'].transform(lambda x: x.rolling(window=5).mean())

### Σₖ = rₜ₋₁ + ... + rₜ₋ₖ (k=3,5) and Σ₅ − Σ₃

In [5]:
# Σₖ = rₜ₋₁ + ... + rₜ₋ₖ (k=3,5) and Σ₅ − Σ₃
df['sum_ret_3'] = df[[f'return_lag_{i}' for i in range(1, 4)]].sum(axis=1)
df['sum_ret_5'] = df[[f'return_lag_{i}' for i in range(1, 6)]].sum(axis=1)
df['sum_diff_5_3'] = df['sum_ret_5'] - df['sum_ret_3']

### Relative Strength Index

In [6]:
# Relative Strength Index (RSI)
def compute_rsi(series, window):
    delta = series.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window).mean()
    avg_loss = loss.rolling(window).mean()
    rs = avg_gain / avg_loss
    return 100 - (100 / (1 + rs))

df['rsi_6'] = grouped['close'].transform(lambda x: compute_rsi(x, 6))
df['rsi_14'] = grouped['close'].transform(lambda x: compute_rsi(x, 14))

### Moving Average Convergence Divergence

In [7]:
# MACD
df['ema_12'] = grouped['close'].transform(lambda x: x.ewm(span=12, adjust=False).mean())
df['ema_26'] = grouped['close'].transform(lambda x: x.ewm(span=26, adjust=False).mean())
df['macd'] = df['ema_12'] - df['ema_26']
df['macd_signal'] = grouped['macd'].transform(lambda x: x.ewm(span=9, adjust=False).mean())
df['macd_hist'] = df['macd'] - df['macd_signal']

### Rate of Change

In [8]:
# Rate of Change (ROC)
df['roc_9'] = grouped['close'].transform(lambda x: x.pct_change(periods=9))
df['roc_14'] = grouped['close'].transform(lambda x: x.pct_change(periods=14))

### Exponential Weighted Moving Average

In [9]:
# Exponential Weighted Moving Average (EWMA)
df['ewma_0.9'] = grouped['close'].transform(lambda x: x.ewm(alpha=0.9, adjust=False).mean())

### Momentum Indicator

In [10]:
# Momentum Indicator
df['momentum_5'] = grouped['close'].transform(lambda x: x - x.shift(5))

### Average True Range

In [12]:
# Average True Range (ATR)
df['prev_close'] = grouped['close'].shift(1)
tr1 = df['high'] - df['low']
tr2 = (df['high'] - df['prev_close']).abs()
tr3 = (df['low'] - df['prev_close']).abs()
df['tr'] = np.maximum.reduce([tr1, tr2, tr3])
df['atr_5'] = grouped['tr'].transform(lambda x: x.rolling(window=5).mean())

### Williams’ %R

In [13]:
# Williams’ %R
high_14 = grouped['high'].transform(lambda x: x.rolling(14).max())
low_14 = grouped['low'].transform(lambda x: x.rolling(14).min())
df['williams_r'] = -100 * (high_14 - df['close']) / (high_14 - low_14)

### Aroon Oscillator

In [14]:
# Aroon Oscillator
def aroon_oscillator(highs, lows, window=14):
    def aroon_up(series):
        return series.rolling(window).apply(lambda x: 100 * (window - np.argmax(x[::-1])) / window, raw=True)
    def aroon_down(series):
        return series.rolling(window).apply(lambda x: 100 * (window - np.argmin(x[::-1])) / window, raw=True)
    return aroon_up(highs) - aroon_down(lows)

df['aroon_oscillator_high'] = grouped['high'].transform(lambda x: aroon_oscillator(x, x))
df['aroon_oscillator_low'] = grouped['low'].transform(lambda x: aroon_oscillator(x, x))

### Commodity Channel Index

In [15]:
# Commodity Channel Index (CCI)
df['tp'] = (df['high'] + df['low'] + df['close']) / 3
df['tp_sma_14'] = grouped['tp'].transform(lambda x: x.rolling(14).mean())
df['tp_std_14'] = grouped['tp'].transform(lambda x: x.rolling(14).std())
df['cci'] = (df['tp'] - df['tp_sma_14']) / (0.015 * df['tp_std_14'])

### Double EMA

In [16]:
# Double EMA (DEMA)
ema = grouped['close'].transform(lambda x: x.ewm(span=10, adjust=False).mean())
ema_of_ema = ema.ewm(span=10, adjust=False).mean()
df['dema_10'] = 2 * ema - ema_of_ema

### Final Cleanup

In [17]:
# Final cleanup
df.drop(columns=['ema_12', 'ema_26', 'tp', 'tp_sma_14', 'tp_std_14'], inplace=True)
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

df.to_csv("crypto_hourly_technical_features.csv", index=False)

df.head()

Unnamed: 0,id,open,high,low,close,symbol,name,datetime,hourly_return,log_return,...,ewma_0.9,momentum_5,prev_close,tr,atr_5,williams_r,aroon_oscillator_high,aroon_oscillator_low,cci,dema_10
0,1,366.4,370.54,366.12,369.83,AAVE,Aave,2021-04-09 10:00:00,0.007876,0.007845,...,369.555505,2.29,366.94,4.42,3.71,-25.294525,35.714286,28.571429,23.518502,369.018318
1,1,371.12,374.01,370.85,372.67,AAVE,Aave,2021-04-09 11:00:00,0.007679,0.00765,...,372.35855,4.66,369.83,4.18,4.06,-8.957219,92.857143,92.857143,96.225832,370.33613
2,1,371.38,373.25,371.03,372.8,AAVE,Aave,2021-04-09 12:00:00,0.000349,0.000349,...,372.755855,4.86,372.67,2.22,4.042,-11.040146,85.714286,92.857143,98.569018,371.33843
3,1,370.57,371.17,367.28,367.54,AAVE,Aave,2021-04-09 13:00:00,-0.014109,-0.01421,...,368.061586,-0.9,372.8,5.52,4.376,-71.650055,42.857143,21.428571,-31.414732,370.318865
4,1,367.92,367.95,366.45,367.21,AAVE,Aave,2021-04-09 14:00:00,-0.000898,-0.000898,...,367.295159,0.27,367.54,1.5,3.568,-75.30454,-21.428571,21.428571,-76.209441,369.435391
