In [None]:
# 03_model_training.ipynb

# Install necessary libraries
!pip install xgboost lightgbm tensorflow optuna scikit-learn joblib

# Import libraries
import pandas as pd
import numpy as np
import xgboost as xgb
import lightgbm as lgb
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import optuna
import joblib
from datetime import datetime

# Load the processed data (features)
data = pd.read_csv("data/processed/combined_features.csv")

# Define target: Here we assume we want to predict Buy/Sell/Hold signals
# For example, let's use 'rsi' to define whether the market is overbought (Sell) or oversold (Buy)
def create_target(df):
    df['target'] = np.where(df['rsi'] > 70, 'Sell', np.where(df['rsi'] < 30, 'Buy', 'Hold'))
    return df

data = create_target(data)

# Split data into features and target
X = data[['close', 'rsi', 'macd', 'macd_signal', 'ema', 'sma', 'sentiment', 'bitcoin_trend', 'eth_trend', 'bnb_trend']]
y = data['target']

# Encode target variable: Sell -> 0, Hold -> 1, Buy -> 2
y = y.map({'Sell': 0, 'Hold': 1, 'Buy': 2})

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1. Train XGBoost Model
def train_xgb_model(X_train, y_train, X_test, y_test):
    model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print("XGBoost Model Evaluation:")
    print(classification_report(y_test, y_pred))
    return model

# 2. Train LightGBM Model
def train_lgbm_model(X_train, y_train, X_test, y_test):
    model = lgb.LGBMClassifier()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print("LightGBM Model Evaluation:")
    print(classification_report(y_test, y_pred))
    return model

# 3. Train LSTM Model (Deep Learning)
def train_lstm_model(X_train, y_train, X_test, y_test):
    X_train_reshaped = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))  # LSTM expects 3D input
    X_test_reshaped = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))
    
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])))
    model.add(LSTM(units=50))
    model.add(Dense(units=3, activation='softmax'))  # 3 classes (Buy, Sell, Hold)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    model.fit(X_train_reshaped, y_train, epochs=10, batch_size=32, validation_data=(X_test_reshaped, y_test))
    
    y_pred = model.predict(X_test_reshaped)
    y_pred_class = np.argmax(y_pred, axis=1)
    
    print("LSTM Model Evaluation:")
    print(classification_report(y_test, y_pred_class))
    return model

# Train and evaluate the models
print("Training XGBoost model...")
xgb_model = train_xgb_model(X_train, y_train, X_test, y_test)

print("Training LightGBM model...")
lgbm_model = train_lgbm_model(X_train, y_train, X_test, y_test)

print("Training LSTM model...")
lstm_model = train_lstm_model(X_train, y_train, X_test, y_test)

# Save the best model (XGBoost in this case)
joblib.dump(xgb_model, 'models/xgb_model.pkl')

# Output summary
print(f"Model Training Complete at {datetime.now()}")
