# Directional Forecasting in Cryptocurrencies
Author: Sai Nithish

This code predicts the next-minute price direction of a cryptocurrency using historical OHLCV data. 
It calculates various technical indicators (like RSI, Moving Averages, Bollinger Bands, MACD, and Pivot Points) 
and lagged features (previous open, high, low, close, and volume) to capture market trends and momentum. 
These enriched features are then used to train and test a model for price movement classification.


## Importing neccessary libraries

In [9]:
import pandas as pd
import numpy as np
import ta  
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer

## Loading data

In [14]:

train_data = pd.read_csv('data/train.csv')
test_data = pd.read_csv('data/test.csv')

## Feature engineering and data preparation

In [None]:
# Copy data
data = train_data.copy()
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')

# Calculate Technical Indicators
# RSI (Relative Strength Index)
data['rsi'] = ta.momentum.RSIIndicator(close=data['close'], window=14).rsi()

# Moving Averages
data['sma_10'] = data['close'].rolling(window=10).mean()
data['sma_30'] = data['close'].rolling(window=30).mean()

# Exponential Moving Average (EMA)
data['ema_10'] = data['close'].ewm(span=10, adjust=False).mean()
data['ema_30'] = data['close'].ewm(span=30, adjust=False).mean()

# Bollinger Bands
data['bollinger_mavg'] = data['close'].rolling(window=20).mean()
data['bollinger_std'] = data['close'].rolling(window=20).std()
data['bollinger_upper'] = data['bollinger_mavg'] + (data['bollinger_std'] * 2)
data['bollinger_lower'] = data['bollinger_mavg'] - (data['bollinger_std'] * 2)

# MACD (Moving Average Convergence Divergence)
data['macd'] = ta.trend.MACD(data['close']).macd()
data['macd_signal'] = ta.trend.MACD(data['close']).macd_signal()
data['macd_diff'] = ta.trend.MACD(data['close']).macd_diff()

# Pivot Points
data['pivot'] = (data['high'] + data['low'] + data['close']) / 3

# Lagged Features
data['lag_close'] = data['close'].shift(1)
data['lag_open'] = data['open'].shift(1)
data['lag_high'] = data['high'].shift(1)
data['lag_low'] = data['low'].shift(1)
data['lag_volume'] = data['volume'].shift(1)

# Additional Indicators
data['roc'] = ta.momentum.ROCIndicator(close=data['close'], window=10).roc()
data['obv'] = ta.volume.OnBalanceVolumeIndicator(close=data['close'], volume=data['volume']).on_balance_volume()
data['stoch_k'] = ta.momentum.StochasticOscillator(high=data['high'], low=data['low'], close=data['close'], window=14).stoch()
data['atr'] = ta.volatility.AverageTrueRange(high=data['high'], low=data['low'], close=data['close'], window=14).average_true_range()
data['williams_r'] = ta.momentum.WilliamsRIndicator(high=data['high'], low=data['low'], close=data['close'], lbp=14).williams_r()
data['adi'] = ta.volume.AccDistIndexIndicator(high=data['high'], low=data['low'], close=data['close'], volume=data['volume']).acc_dist_index()
data['cci'] = ta.trend.CCIIndicator(high=data['high'], low=data['low'], close=data['close'], window=20).cci()
data['vol_sma_10'] = data['volume'].rolling(window=10).mean()
data['vol_sma_30'] = data['volume'].rolling(window=30).mean()
data['rel_vol'] = data['volume'] / data['volume'].rolling(window=30).mean()

data.dropna(inplace=True)

# Define features and target
features = ['open', 'high', 'low', 'close', 'volume', 'rsi', 'sma_10', 'sma_30', 'ema_10', 'ema_30', 
            'bollinger_upper', 'bollinger_lower', 'macd', 'macd_signal', 'macd_diff', 'pivot', 
            'lag_close', 'lag_open', 'lag_high', 'lag_low', 'lag_volume', 'roc', 'obv', 
            'stoch_k', 'atr', 'williams_r', 'adi', 'cci', 'vol_sma_10', 'vol_sma_30', 'rel_vol']
target = 'target'

# Split data
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Logistic Regression

In [None]:

logistic_model = LogisticRegression(max_iter=1000)
logistic_model.fit(X_train, y_train)
y_pred_logistic = logistic_model.predict(X_test)

print("Logistic Regression Results:")
print(confusion_matrix(y_test, y_pred_logistic))
print(classification_report(y_test, y_pred_logistic))



## Random Forest Classifier

In [None]:

rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

print("Random Forest Results:")
print(confusion_matrix(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))


## Gradient Boosting Classifier

In [None]:

gb_model = GradientBoostingClassifier(random_state=42)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)

print("Gradient Boosting Results:")
print(confusion_matrix(y_test, y_pred_gb))
print(classification_report(y_test, y_pred_gb))

## Hyperparameter Tuning for Random Forest

In [None]:

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=3)
grid_search.fit(X_train, y_train)

best_rf_model = grid_search.best_estimator_
y_pred_best_rf = best_rf_model.predict(X_test)

print("Best Random Forest Results after Hyperparameter Tuning:")
print(confusion_matrix(y_test, y_pred_best_rf))
print(classification_report(y_test, y_pred_best_rf))



## Testing

In [None]:

test_data = test_data.copy()
test_data['timestamp'] = pd.to_datetime(test_data['timestamp'], unit='s')

# Calculate Technical Indicators
test_data['rsi'] = ta.momentum.RSIIndicator(close=test_data['close'], window=14).rsi()
test_data['sma_10'] = test_data['close'].rolling(window=10).mean()
test_data['sma_30'] = test_data['close'].rolling(window=30).mean()
test_data['ema_10'] = test_data['close'].ewm(span=10, adjust=False).mean()
test_data['ema_30'] = test_data['close'].ewm(span=30, adjust=False).mean()

# Bollinger Bands
test_data['bollinger_mavg'] = test_data['close'].rolling(window=20).mean()
test_data['bollinger_std'] = test_data['close'].rolling(window=20).std()
test_data['bollinger_upper'] = test_data['bollinger_mavg'] + (test_data['bollinger_std'] * 2)
test_data['bollinger_lower'] = test_data['bollinger_mavg'] - (test_data['bollinger_std'] * 2)

# MACD
test_data['macd'] = ta.trend.MACD(test_data['close']).macd()
test_data['macd_signal'] = ta.trend.MACD(test_data['close']).macd_signal()
test_data['macd_diff'] = ta.trend.MACD(test_data['close']).macd_diff()

# Pivot Points
test_data['pivot'] = (test_data['high'] + test_data['low'] + test_data['close']) / 3

# Lagged Features
test_data['lag_close'] = test_data['close'].shift(1)
test_data['lag_open'] = test_data['open'].shift(1)
test_data['lag_high'] = test_data['high'].shift(1)
test_data['lag_low'] = test_data['low'].shift(1)
test_data['lag_volume'] = test_data['volume'].shift(1)

# Additional Indicators
test_data['roc'] = ta.momentum.ROCIndicator(close=test_data['close'], window=10).roc()
test_data['obv'] = ta.volume.OnBalanceVolumeIndicator(close=test_data['close'], volume=test_data['volume']).on_balance_volume()
test_data['stoch_k'] = ta.momentum.StochasticOscillator(high=test_data['high'], low=test_data['low'], close=test_data['close'], window=14).stoch()
test_data['atr'] = ta.volatility.AverageTrueRange(high=test_data['high'], low=test_data['low'], close=test_data['close'], window=14).average_true_range()
test_data['williams_r'] = ta.momentum.WilliamsRIndicator(high=test_data['high'], low=test_data['low'], close=test_data['close'], lbp=14).williams_r()
test_data['adi'] = ta.volume.AccDistIndexIndicator(high=test_data['high'], low=test_data['low'], close=test_data['close'], volume=test_data['volume']).acc_dist_index()
test_data['cci'] = ta.trend.CCIIndicator(high=test_data['high'], low=test_data['low'], close=test_data['close'], window=20).cci()
test_data['vol_sma_10'] = test_data['volume'].rolling(window=10).mean()
test_data['vol_sma_30'] = test_data['volume'].rolling(window=30).mean()
test_data['rel_vol'] = test_data['volume'] / test_data['volume'].rolling(window=30).mean()


test_data.dropna(inplace=True)


X_test_final = test_data[features]


imputer = SimpleImputer(strategy='mean')  
X_test_final_imputed = imputer.fit_transform(X_test_final)


y_pred_test = logistic_model.predict(X_test_final_imputed)


test_data['target'] = y_pred_test


test_data[['row_id', 'target']].to_csv('predictions.csv', index=False)

print("Predictions saved to predictions.csv.")
