In [1]:
import numpy as np
import pandas as pd
import mysql.connector
from datetime import datetime
import logging
import joblib
import os
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import Lasso, Ridge
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import xgboost as xgb
from statsmodels.tsa.arima.model import ARIMA
import warnings
warnings.filterwarnings('ignore')

class EnhancedModelTrainer:
    def __init__(self, db_config, model_save_dir='enhanced_models'):
        self.db_config = db_config
        self.model_save_dir = model_save_dir
        self.setup_logging()
        self.create_model_directory()
        
    def setup_logging(self):
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler('enhanced_model_training.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger(__name__)
    
    def create_model_directory(self):
        if not os.path.exists(self.model_save_dir):
            os.makedirs(self.model_save_dir)
            self.logger.info(f"Created model directory: {self.model_save_dir}")
    
    def connect_db(self):
        try:
            conn = mysql.connector.connect(**self.db_config)
            return conn
        except mysql.connector.Error as e:
            self.logger.error(f"Database connection error: {e}")
            return None
    
    def calculate_directional_accuracy(self, y_true, y_pred):
        """Calculate percentage of correct directional predictions"""
        # For % change predictions, direction is simply the sign
        true_direction = y_true > 0
        pred_direction = y_pred > 0
        
        correct = np.sum(true_direction == pred_direction)
        accuracy = (correct / len(true_direction)) * 100
        
        return accuracy
    
    def calculate_metrics(self, y_true, y_pred, is_percentage=True):
        """Calculate performance metrics"""
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        mae = mean_absolute_error(y_true, y_pred)
        
        # R² can be negative, so we'll track it but not use it as primary metric
        try:
            r2 = r2_score(y_true, y_pred)
        except:
            r2 = -999
        
        dir_acc = self.calculate_directional_accuracy(y_true, y_pred)
        
        return {
            'rmse': rmse,
            'mae': mae,
            'r2_score': r2,
            'directional_accuracy': dir_acc
        }
    
    def train_random_forest_tuned(self, X_train, y_train, X_test, y_test, quick_tune=False):
        """Train Random Forest with hyperparameter tuning"""
        self.logger.info("Training Random Forest with tuning...")
        
        if quick_tune:
            # Quick tuning for faster iteration
            param_grid = {
                'n_estimators': [100, 200],
                'max_depth': [15, 25],
                'min_samples_split': [3, 5],
                'min_samples_leaf': [1, 2]
            }
        else:
            # Comprehensive tuning
            param_grid = {
                'n_estimators': [100, 200, 300],
                'max_depth': [10, 20, 30, None],
                'min_samples_split': [2, 5, 10],
                'min_samples_leaf': [1, 2, 4],
                'max_features': ['sqrt', 'log2']
            }
        
        rf = RandomForestRegressor(random_state=42, n_jobs=-1)
        
        # Use RandomizedSearchCV for speed
        grid_search = RandomizedSearchCV(
            rf, param_grid, n_iter=10, cv=3, 
            scoring='neg_mean_squared_error', n_jobs=-1, random_state=42
        )
        
        grid_search.fit(X_train, y_train)
        
        best_model = grid_search.best_estimator_
        y_pred = best_model.predict(X_test)
        metrics = self.calculate_metrics(y_test, y_pred)
        
        self.logger.info(f"Best RF params: {grid_search.best_params_}")
        
        return best_model, y_pred, metrics
    
    def train_xgboost_tuned(self, X_train, y_train, X_test, y_test, quick_tune=False):
        """Train XGBoost with hyperparameter tuning"""
        self.logger.info("Training XGBoost with tuning...")
        
        if quick_tune:
            param_grid = {
                'n_estimators': [100, 200],
                'max_depth': [6, 10],
                'learning_rate': [0.05, 0.1],
                'subsample': [0.8]
            }
        else:
            param_grid = {
                'n_estimators': [100, 200, 300],
                'max_depth': [5, 7, 10, 15],
                'learning_rate': [0.01, 0.05, 0.1],
                'subsample': [0.7, 0.8, 0.9],
                'colsample_bytree': [0.7, 0.8, 0.9]
            }
        
        xgb_model = xgb.XGBRegressor(random_state=42, n_jobs=-1)
        
        grid_search = RandomizedSearchCV(
            xgb_model, param_grid, n_iter=10, cv=3,
            scoring='neg_mean_squared_error', n_jobs=-1, random_state=42
        )
        
        grid_search.fit(X_train, y_train)
        
        best_model = grid_search.best_estimator_
        y_pred = best_model.predict(X_test)
        metrics = self.calculate_metrics(y_test, y_pred)
        
        self.logger.info(f"Best XGB params: {grid_search.best_params_}")
        
        return best_model, y_pred, metrics
    
    def train_mlp_tuned(self, X_train, y_train, X_test, y_test, quick_tune=False):
        """Train MLP with hyperparameter tuning"""
        self.logger.info("Training MLP with tuning...")
        
        if quick_tune:
            param_grid = {
                'hidden_layer_sizes': [(100,), (100, 50)],
                'alpha': [0.0001, 0.001],
                'learning_rate_init': [0.001]
            }
        else:
            param_grid = {
                'hidden_layer_sizes': [(50,), (100,), (100, 50), (100, 100), (150, 75)],
                'alpha': [0.0001, 0.001, 0.01],
                'learning_rate_init': [0.001, 0.01]
            }
        
        mlp = MLPRegressor(
            max_iter=1000, early_stopping=True, 
            validation_fraction=0.1, random_state=42
        )
        
        grid_search = RandomizedSearchCV(
            mlp, param_grid, n_iter=8, cv=3,
            scoring='neg_mean_squared_error', n_jobs=-1, random_state=42
        )
        
        grid_search.fit(X_train, y_train)
        
        best_model = grid_search.best_estimator_
        y_pred = best_model.predict(X_test)
        metrics = self.calculate_metrics(y_test, y_pred)
        
        self.logger.info(f"Best MLP params: {grid_search.best_params_}")
        
        return best_model, y_pred, metrics
    
    def train_lasso_tuned(self, X_train, y_train, X_test, y_test):
        """Train Lasso with hyperparameter tuning"""
        self.logger.info("Training Lasso with tuning...")
        
        param_grid = {
            'alpha': [0.001, 0.01, 0.1, 1.0, 10.0]
        }
        
        lasso = Lasso(max_iter=5000, random_state=42)
        
        grid_search = GridSearchCV(
            lasso, param_grid, cv=3,
            scoring='neg_mean_squared_error', n_jobs=-1
        )
        
        grid_search.fit(X_train, y_train)
        
        best_model = grid_search.best_estimator_
        y_pred = best_model.predict(X_test)
        metrics = self.calculate_metrics(y_test, y_pred)
        
        self.logger.info(f"Best Lasso alpha: {grid_search.best_params_['alpha']}")
        
        return best_model, y_pred, metrics
    
    def train_ridge_tuned(self, X_train, y_train, X_test, y_test):
        """Train Ridge Regression with hyperparameter tuning"""
        self.logger.info("Training Ridge with tuning...")
        
        param_grid = {
            'alpha': [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]
        }
        
        ridge = Ridge(max_iter=5000, random_state=42)
        
        grid_search = GridSearchCV(
            ridge, param_grid, cv=3,
            scoring='neg_mean_squared_error', n_jobs=-1
        )
        
        grid_search.fit(X_train, y_train)
        
        best_model = grid_search.best_estimator_
        y_pred = best_model.predict(X_test)
        metrics = self.calculate_metrics(y_test, y_pred)
        
        self.logger.info(f"Best Ridge alpha: {grid_search.best_params_['alpha']}")
        
        return best_model, y_pred, metrics
    
    def train_arima(self, y_train, y_test):
        """Train ARIMA model (no hyperparameter tuning for now)"""
        self.logger.info("Training ARIMA...")
        
        try:
            model = ARIMA(y_train, order=(2, 1, 1))
            fitted_model = model.fit()
            
            forecast = fitted_model.forecast(steps=len(y_test))
            y_pred = np.array(forecast)
            
            metrics = self.calculate_metrics(y_test, y_pred)
            
            return fitted_model, y_pred, metrics
        except Exception as e:
            self.logger.error(f"ARIMA training failed: {e}")
            return None, None, None
    
    def save_model(self, model, symbol, model_type):
        """Save trained model to disk"""
        filename = f"{symbol}_{model_type}_enhanced.pkl"
        filepath = os.path.join(self.model_save_dir, filename)
        joblib.dump(model, filepath)
        return filepath
    
    def save_performance_to_db(self, symbol, model_type, metrics, train_samples, 
                               test_samples, feature_count, model_path, has_sentiment):
        """Save model performance metrics to database"""
        conn = self.connect_db()
        if not conn:
            return False
        
        cursor = conn.cursor()
        
        # Use existing model_performance table
        query = """
        INSERT INTO model_performance 
        (symbol, model_type, rmse, mae, r2_score, directional_accuracy,
         train_samples, test_samples, feature_count, model_path)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """
        
        model_type_label = f"{model_type}_Enhanced_{'Sentiment' if has_sentiment else 'NoSentiment'}"
        
        values = (
            symbol,
            model_type_label,
            metrics['rmse'],
            metrics['mae'],
            metrics['r2_score'],
            metrics['directional_accuracy'],
            train_samples,
            test_samples,
            feature_count,
            model_path
        )
        
        try:
            cursor.execute(query, values)
            conn.commit()
            return True
        except mysql.connector.Error as e:
            self.logger.error(f"Error saving performance: {e}")
            return False
        finally:
            cursor.close()
            conn.close()
    
    def train_all_models(self, stock_data, quick_tune=True):
        """
        Train all models with hyperparameter tuning
        quick_tune: If True, use faster parameter grids
        """
        symbol = stock_data['symbol']
        X_train = stock_data['X_train']
        X_test = stock_data['X_test']
        y_train = stock_data['y_train']
        y_test = stock_data['y_test']
        has_sentiment = stock_data.get('has_sentiment', False)
        
        results = {}
        
        # 1. Random Forest (Tuned)
        try:
            model, y_pred, metrics = self.train_random_forest_tuned(
                X_train, y_train, X_test, y_test, quick_tune
            )
            if model:
                model_path = self.save_model(model, symbol, 'RandomForest')
                self.save_performance_to_db(
                    symbol, 'RandomForest', metrics,
                    len(X_train), len(X_test), X_train.shape[1], model_path, has_sentiment
                )
                results['RandomForest'] = {'model': model, 'metrics': metrics, 'predictions': y_pred}
        except Exception as e:
            self.logger.error(f"Random Forest failed for {symbol}: {e}")
        
        # 2. XGBoost (Tuned)
        try:
            model, y_pred, metrics = self.train_xgboost_tuned(
                X_train, y_train, X_test, y_test, quick_tune
            )
            if model:
                model_path = self.save_model(model, symbol, 'XGBoost')
                self.save_performance_to_db(
                    symbol, 'XGBoost', metrics,
                    len(X_train), len(X_test), X_train.shape[1], model_path, has_sentiment
                )
                results['XGBoost'] = {'model': model, 'metrics': metrics, 'predictions': y_pred}
        except Exception as e:
            self.logger.error(f"XGBoost failed for {symbol}: {e}")
        
        # 3. MLP (Tuned)
        try:
            model, y_pred, metrics = self.train_mlp_tuned(
                X_train, y_train, X_test, y_test, quick_tune
            )
            if model:
                model_path = self.save_model(model, symbol, 'MLP')
                self.save_performance_to_db(
                    symbol, 'MLP', metrics,
                    len(X_train), len(X_test), X_train.shape[1], model_path, has_sentiment
                )
                results['MLP'] = {'model': model, 'metrics': metrics, 'predictions': y_pred}
        except Exception as e:
            self.logger.error(f"MLP failed for {symbol}: {e}")
        
        # 4. Lasso (Tuned)
        try:
            model, y_pred, metrics = self.train_lasso_tuned(X_train, y_train, X_test, y_test)
            if model:
                model_path = self.save_model(model, symbol, 'Lasso')
                self.save_performance_to_db(
                    symbol, 'Lasso', metrics,
                    len(X_train), len(X_test), X_train.shape[1], model_path, has_sentiment
                )
                results['Lasso'] = {'model': model, 'metrics': metrics, 'predictions': y_pred}
        except Exception as e:
            self.logger.error(f"Lasso failed for {symbol}: {e}")
        
        # 5. Ridge (Tuned) - NEW
        try:
            model, y_pred, metrics = self.train_ridge_tuned(X_train, y_train, X_test, y_test)
            if model:
                model_path = self.save_model(model, symbol, 'Ridge')
                self.save_performance_to_db(
                    symbol, 'Ridge', metrics,
                    len(X_train), len(X_test), X_train.shape[1], model_path, has_sentiment
                )
                results['Ridge'] = {'model': model, 'metrics': metrics, 'predictions': y_pred}
        except Exception as e:
            self.logger.error(f"Ridge failed for {symbol}: {e}")
        
        # 6. ARIMA
        try:
            model, y_pred, metrics = self.train_arima(y_train, y_test)
            if model:
                model_path = self.save_model(model, symbol, 'ARIMA')
                self.save_performance_to_db(
                    symbol, 'ARIMA', metrics,
                    len(y_train), len(y_test), 0, model_path, has_sentiment
                )
                results['ARIMA'] = {'model': model, 'metrics': metrics, 'predictions': y_pred}
        except Exception as e:
            self.logger.error(f"ARIMA failed for {symbol}: {e}")
        
        return results
    
    def select_best_model(self, symbol, results):
        """Select best model based on directional accuracy (primary) and RMSE (secondary)"""
        if not results:
            self.logger.warning(f"No valid models for {symbol}")
            return None
        
        # Sort by directional accuracy first, then RMSE
        sorted_models = sorted(
            results.items(),
            key=lambda x: (-x[1]['metrics']['directional_accuracy'], x[1]['metrics']['rmse'])
        )
        
        best_model_type = sorted_models[0][0]
        best_metrics = sorted_models[0][1]['metrics']
        best_model_path = f"{self.model_save_dir}/{symbol}_{best_model_type}_enhanced.pkl"
        
        # Save selection to database
        conn = self.connect_db()
        if not conn:
            return None
        
        cursor = conn.cursor()
        
        query = """
        INSERT INTO model_selection (symbol, selected_model_type, model_path, rmse, notes)
        VALUES (%s, %s, %s, %s, %s)
        ON DUPLICATE KEY UPDATE
            selected_model_type = VALUES(selected_model_type),
            model_path = VALUES(model_path),
            rmse = VALUES(rmse),
            selection_date = CURRENT_TIMESTAMP,
            notes = VALUES(notes)
        """
        
        notes = f"Enhanced model. Dir Acc: {best_metrics['directional_accuracy']:.1f}%, RMSE: {best_metrics['rmse']:.2f}%"
        
        try:
            cursor.execute(query, (symbol, best_model_type + '_Enhanced', best_model_path, 
                                  best_metrics['rmse'], notes))
            conn.commit()
            self.logger.info(f"{symbol}: Selected {best_model_type} "
                           f"(Dir Acc={best_metrics['directional_accuracy']:.1f}%, "
                           f"RMSE={best_metrics['rmse']:.2f}%)")
        except mysql.connector.Error as e:
            self.logger.error(f"Error saving model selection: {e}")
        finally:
            cursor.close()
            conn.close()
        
        return best_model_type

In [5]:
db_config = {
    'host': '127.0.0.1',
    'user': 'root',
    'password': '',
    'database': 'trading_system'
}

enhanced_trainer = EnhancedModelTrainer(db_config)
print("✓ Enhanced Model Trainer initialized")
print("✓ Will train with hyperparameter tuning")
print("✓ Target: Price Change % (better for directional accuracy)")


✓ Enhanced Model Trainer initialized
✓ Will train with hyperparameter tuning
✓ Target: Price Change % (better for directional accuracy)


In [6]:
# Load prepared data
import pickle

with open('enhanced_stock_data.pkl', 'rb') as f:
    enhanced_stock_data = pickle.load(f)

print(f"✓ Loaded data for {len(enhanced_stock_data)} stocks")


✓ Loaded data for 24 stocks


In [7]:
print("\nTesting enhanced model training on AAPL...")
print("="*60)

test_symbol = 'AAPL'
if test_symbol in enhanced_stock_data:
    print(f"Training 6 tuned models for {test_symbol}...")
    print("This will take 5-10 minutes due to hyperparameter tuning...\n")
    
    results = enhanced_trainer.train_all_models(enhanced_stock_data[test_symbol], quick_tune=True)
    
    print("\nEnhanced Model Performance:")
    print("-"*70)
    print(f"{'Model':<15} {'RMSE (%)':<12} {'MAE (%)':<12} {'Dir Acc':<12}")
    print("-"*70)
    
    for model_type, data in results.items():
        metrics = data['metrics']
        print(f"{model_type:<15} {metrics['rmse']:>8.3f}%    "
              f"{metrics['mae']:>8.3f}%    "
              f"{metrics['directional_accuracy']:>8.1f}%")
    
    best = enhanced_trainer.select_best_model(test_symbol, results)
    print(f"\n✓ Best model for {test_symbol}: {best}")
    print(f"✓ Directional Accuracy: {results[best]['metrics']['directional_accuracy']:.1f}%")
else:
    print(f"No data available for {test_symbol}")


2025-10-09 03:07:47,785 - INFO - Training Random Forest with tuning...



Testing enhanced model training on AAPL...
Training 6 tuned models for AAPL...
This will take 5-10 minutes due to hyperparameter tuning...



2025-10-09 03:14:23,052 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 25}
2025-10-09 03:14:23,568 - INFO - package: mysql.connector.plugins
2025-10-09 03:14:23,571 - INFO - plugin_name: caching_sha2_password
2025-10-09 03:14:23,573 - INFO - AUTHENTICATION_PLUGIN_CLASS: MySQLCachingSHA2PasswordAuthPlugin
2025-10-09 03:14:23,655 - INFO - Training XGBoost with tuning...
2025-10-09 03:21:32,225 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 10, 'learning_rate': 0.05}
2025-10-09 03:21:32,592 - INFO - Training MLP with tuning...
2025-10-09 03:21:37,344 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 03:21:37,532 - INFO - Training Lasso with tuning...
2025-10-09 03:21:46,541 - INFO - Best Lasso alpha: 1.0
2025-10-09 03:21:46,727 - INFO - Training Ridge with tuning...
2025-10-09 03:21:47,209 - INFO - Best Ridge alpha: 100.0
2025-10-09 03:


Enhanced Model Performance:
----------------------------------------------------------------------
Model           RMSE (%)     MAE (%)      Dir Acc     
----------------------------------------------------------------------
RandomForest       1.516%       1.047%        65.8%
XGBoost            1.593%       1.158%        46.8%
MLP                1.816%       1.385%        55.7%
Lasso              1.480%       1.035%        57.0%
Ridge              2.885%       2.255%        41.8%
ARIMA              1.480%       1.032%        58.2%


2025-10-09 03:21:48,638 - INFO - AAPL: Selected RandomForest (Dir Acc=65.8%, RMSE=1.52%)



✓ Best model for AAPL: RandomForest
✓ Directional Accuracy: 65.8%


In [8]:
print("\n" + "="*70)
print("Training enhanced models for all stocks with hyperparameter tuning...")
print("This will take 30-60 minutes depending on your CPU")
print("="*70 + "\n")

enhanced_results = {}
enhanced_summary = []

for idx, (symbol, stock_data) in enumerate(enhanced_stock_data.items(), 1):
    print(f"\n[{idx}/{len(enhanced_stock_data)}] Training models for {symbol}...")
    print("-"*70)
    
    try:
        results = enhanced_trainer.train_all_models(stock_data, quick_tune=True)
        
        if results:
            enhanced_results[symbol] = results
            
            # Print performance
            for model_type, data in results.items():
                metrics = data['metrics']
                print(f"  {model_type:12s} | RMSE: {metrics['rmse']:6.2f}% | "
                      f"Dir Acc: {metrics['directional_accuracy']:5.1f}%")
                
                enhanced_summary.append({
                    'symbol': symbol,
                    'model': model_type,
                    'rmse': metrics['rmse'],
                    'dir_acc': metrics['directional_accuracy'],
                    'has_sentiment': stock_data.get('has_sentiment', False)
                })
            
            # Select best model
            best = enhanced_trainer.select_best_model(symbol, results)
            best_metrics = results[best]['metrics']
            print(f"  → Best: {best} (Dir Acc: {best_metrics['directional_accuracy']:.1f}%)")
        else:
            print(f"  ✗ No models succeeded for {symbol}")
            
    except Exception as e:
        print(f"  ✗ Training failed: {str(e)[:50]}")

print("\n" + "="*70)
print("ENHANCED TRAINING COMPLETE!")
print("="*70)


2025-10-09 03:33:33,676 - INFO - Training Random Forest with tuning...



Training enhanced models for all stocks with hyperparameter tuning...
This will take 30-60 minutes depending on your CPU


[1/24] Training models for AAPL...
----------------------------------------------------------------------


2025-10-09 03:40:00,883 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 25}
2025-10-09 03:40:01,285 - INFO - Training XGBoost with tuning...
2025-10-09 03:46:24,759 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 10, 'learning_rate': 0.05}
2025-10-09 03:46:24,891 - INFO - Training MLP with tuning...
2025-10-09 03:46:28,366 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 03:46:28,453 - INFO - Training Lasso with tuning...
2025-10-09 03:46:31,999 - INFO - Best Lasso alpha: 1.0
2025-10-09 03:46:32,106 - INFO - Training Ridge with tuning...
2025-10-09 03:46:32,500 - INFO - Best Ridge alpha: 100.0
2025-10-09 03:46:32,601 - INFO - Training ARIMA...
2025-10-09 03:46:32,993 - INFO - AAPL: Selected RandomForest (Dir Acc=65.8%, RMSE=1.52%)
2025-10-09 03:46:33,006 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   1.52% | Dir Acc:  65.8%
  XGBoost      | RMSE:   1.59% | Dir Acc:  46.8%
  MLP          | RMSE:   1.82% | Dir Acc:  55.7%
  Lasso        | RMSE:   1.48% | Dir Acc:  57.0%
  Ridge        | RMSE:   2.89% | Dir Acc:  41.8%
  ARIMA        | RMSE:   1.48% | Dir Acc:  58.2%
  → Best: RandomForest (Dir Acc: 65.8%)

[2/24] Training models for AMD...
----------------------------------------------------------------------


2025-10-09 03:51:50,149 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_depth': 25}
2025-10-09 03:51:50,294 - INFO - Training XGBoost with tuning...
2025-10-09 03:58:53,934 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 03:58:54,123 - INFO - Training MLP with tuning...
2025-10-09 03:58:57,296 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 03:58:57,467 - INFO - Training Lasso with tuning...
2025-10-09 03:59:07,266 - INFO - Best Lasso alpha: 1.0
2025-10-09 03:59:07,519 - INFO - Training Ridge with tuning...
2025-10-09 03:59:07,961 - INFO - Best Ridge alpha: 100.0
2025-10-09 03:59:08,161 - INFO - Training ARIMA...
2025-10-09 03:59:09,053 - INFO - AMD: Selected Ridge (Dir Acc=46.8%, RMSE=4.17%)
2025-10-09 03:59:09,063 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   2.99% | Dir Acc:  41.8%
  XGBoost      | RMSE:   3.02% | Dir Acc:  44.3%
  MLP          | RMSE:   3.24% | Dir Acc:  44.3%
  Lasso        | RMSE:   2.76% | Dir Acc:  44.3%
  Ridge        | RMSE:   4.17% | Dir Acc:  46.8%
  ARIMA        | RMSE:   2.76% | Dir Acc:  43.0%
  → Best: Ridge (Dir Acc: 46.8%)

[3/24] Training models for AMZN...
----------------------------------------------------------------------


2025-10-09 04:04:55,159 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_depth': 15}
2025-10-09 04:04:55,448 - INFO - Training XGBoost with tuning...
2025-10-09 11:29:49,406 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 11:29:49,830 - INFO - Training MLP with tuning...
2025-10-09 11:29:52,335 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 11:29:52,504 - INFO - Training Lasso with tuning...
2025-10-09 11:29:56,527 - INFO - Best Lasso alpha: 1.0
2025-10-09 11:29:56,620 - INFO - Training Ridge with tuning...
2025-10-09 11:29:57,022 - INFO - Best Ridge alpha: 100.0
2025-10-09 11:29:57,114 - INFO - Training ARIMA...
2025-10-09 11:29:57,689 - INFO - AMZN: Selected ARIMA (Dir Acc=53.2%, RMSE=1.71%)
2025-10-09 11:29:57,692 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   1.69% | Dir Acc:  50.6%
  XGBoost      | RMSE:   1.65% | Dir Acc:  50.6%
  MLP          | RMSE:   1.91% | Dir Acc:  49.4%
  Lasso        | RMSE:   1.71% | Dir Acc:  53.2%
  Ridge        | RMSE:   2.63% | Dir Acc:  49.4%
  ARIMA        | RMSE:   1.71% | Dir Acc:  53.2%
  → Best: ARIMA (Dir Acc: 53.2%)

[4/24] Training models for BLK...
----------------------------------------------------------------------


2025-10-09 11:35:40,464 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_depth': 25}
2025-10-09 11:35:40,692 - INFO - Training XGBoost with tuning...
2025-10-09 11:41:05,165 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 11:41:05,297 - INFO - Training MLP with tuning...
2025-10-09 11:41:08,565 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 11:41:08,674 - INFO - Training Lasso with tuning...
2025-10-09 11:41:11,602 - INFO - Best Lasso alpha: 1.0
2025-10-09 11:41:11,706 - INFO - Training Ridge with tuning...
2025-10-09 11:41:12,051 - INFO - Best Ridge alpha: 100.0
2025-10-09 11:41:12,131 - INFO - Training ARIMA...
2025-10-09 11:41:12,709 - INFO - BLK: Selected Ridge (Dir Acc=59.5%, RMSE=1.60%)
2025-10-09 11:41:12,710 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   1.82% | Dir Acc:  45.6%
  XGBoost      | RMSE:   1.68% | Dir Acc:  50.6%
  MLP          | RMSE:   1.67% | Dir Acc:  57.0%
  Lasso        | RMSE:   1.34% | Dir Acc:  54.4%
  Ridge        | RMSE:   1.60% | Dir Acc:  59.5%
  ARIMA        | RMSE:   1.34% | Dir Acc:  54.4%
  → Best: Ridge (Dir Acc: 59.5%)

[5/24] Training models for CHN...
----------------------------------------------------------------------


2025-10-09 11:46:34,071 - INFO - Best RF params: {'n_estimators': 100, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 25}
2025-10-09 11:46:34,179 - INFO - Training XGBoost with tuning...
2025-10-09 11:52:09,737 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 11:52:09,924 - INFO - Training MLP with tuning...
2025-10-09 11:52:14,594 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.0001}
2025-10-09 11:52:14,779 - INFO - Training Lasso with tuning...
2025-10-09 11:52:23,983 - INFO - Best Lasso alpha: 1.0
2025-10-09 11:52:24,168 - INFO - Training Ridge with tuning...
2025-10-09 11:52:24,647 - INFO - Best Ridge alpha: 100.0
2025-10-09 11:52:24,806 - INFO - Training ARIMA...
2025-10-09 11:52:25,733 - INFO - CHN: Selected Lasso (Dir Acc=62.0%, RMSE=1.57%)
2025-10-09 11:52:25,737 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   4.07% | Dir Acc:  34.2%
  XGBoost      | RMSE:   2.82% | Dir Acc:  34.2%
  MLP          | RMSE:   3.33% | Dir Acc:  41.8%
  Lasso        | RMSE:   1.57% | Dir Acc:  62.0%
  Ridge        | RMSE:   6.38% | Dir Acc:  32.9%
  ARIMA        | RMSE:   1.57% | Dir Acc:  62.0%
  → Best: Lasso (Dir Acc: 62.0%)

[6/24] Training models for ERO...
----------------------------------------------------------------------


2025-10-09 11:59:07,498 - INFO - Best RF params: {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 15}
2025-10-09 11:59:07,768 - INFO - Training XGBoost with tuning...
2025-10-09 12:06:00,577 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 12:06:00,889 - INFO - Training MLP with tuning...
2025-10-09 12:06:03,994 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.0001}
2025-10-09 12:06:04,222 - INFO - Training Lasso with tuning...
2025-10-09 12:06:15,400 - INFO - Best Lasso alpha: 1.0
2025-10-09 12:06:15,592 - INFO - Training Ridge with tuning...
2025-10-09 12:06:16,126 - INFO - Best Ridge alpha: 100.0
2025-10-09 12:06:16,296 - INFO - Training ARIMA...
2025-10-09 12:06:17,414 - INFO - ERO: Selected RandomForest (Dir Acc=62.0%, RMSE=2.86%)


  RandomForest | RMSE:   2.86% | Dir Acc:  62.0%
  XGBoost      | RMSE:   2.86% | Dir Acc:  60.8%
  MLP          | RMSE:   3.32% | Dir Acc:  43.0%
  Lasso        | RMSE:   2.94% | Dir Acc:  57.0%
  Ridge        | RMSE:   6.55% | Dir Acc:  45.6%
  ARIMA        | RMSE:   2.94% | Dir Acc:  57.0%


2025-10-09 12:06:17,419 - INFO - Training Random Forest with tuning...


  → Best: RandomForest (Dir Acc: 62.0%)

[7/24] Training models for FXP...
----------------------------------------------------------------------


2025-10-09 12:14:01,421 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 25}
2025-10-09 12:14:01,722 - INFO - Training XGBoost with tuning...
2025-10-09 14:22:32,022 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 14:22:32,192 - INFO - Training MLP with tuning...
2025-10-09 14:22:42,592 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100, 50), 'alpha': 0.001}
2025-10-09 14:22:42,710 - INFO - Training Lasso with tuning...
2025-10-09 14:23:02,100 - INFO - Best Lasso alpha: 10.0
2025-10-09 14:23:02,236 - INFO - Training Ridge with tuning...
2025-10-09 14:23:02,801 - INFO - Best Ridge alpha: 100.0
2025-10-09 14:23:02,922 - INFO - Training ARIMA...
2025-10-09 14:23:03,514 - INFO - FXP: Selected MLP (Dir Acc=60.8%, RMSE=3.53%)
2025-10-09 14:23:03,516 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   6.24% | Dir Acc:  41.8%
  XGBoost      | RMSE:   4.72% | Dir Acc:  43.0%
  MLP          | RMSE:   3.53% | Dir Acc:  60.8%
  Lasso        | RMSE:   2.22% | Dir Acc:  57.0%
  Ridge        | RMSE:   4.99% | Dir Acc:  54.4%
  ARIMA        | RMSE:   2.22% | Dir Acc:  57.0%
  → Best: MLP (Dir Acc: 60.8%)

[8/24] Training models for GOOGL...
----------------------------------------------------------------------


2025-10-09 14:28:38,525 - INFO - Best RF params: {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 25}
2025-10-09 14:28:38,694 - INFO - Training XGBoost with tuning...
2025-10-09 14:33:40,566 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 14:33:40,715 - INFO - Training MLP with tuning...
2025-10-09 14:33:43,056 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.0001}
2025-10-09 14:33:43,151 - INFO - Training Lasso with tuning...
2025-10-09 14:33:49,124 - INFO - Best Lasso alpha: 1.0
2025-10-09 14:33:49,231 - INFO - Training Ridge with tuning...
2025-10-09 14:33:49,677 - INFO - Best Ridge alpha: 100.0
2025-10-09 14:33:49,773 - INFO - Training ARIMA...
2025-10-09 14:33:50,363 - INFO - GOOGL: Selected Ridge (Dir Acc=59.5%, RMSE=3.37%)
2025-10-09 14:33:50,363 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   2.13% | Dir Acc:  48.1%
  XGBoost      | RMSE:   1.94% | Dir Acc:  48.1%
  MLP          | RMSE:   2.25% | Dir Acc:  41.8%
  Lasso        | RMSE:   1.69% | Dir Acc:  58.2%
  Ridge        | RMSE:   3.37% | Dir Acc:  59.5%
  ARIMA        | RMSE:   1.69% | Dir Acc:  58.2%
  → Best: Ridge (Dir Acc: 59.5%)

[9/24] Training models for GXC...
----------------------------------------------------------------------


2025-10-09 14:39:45,644 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 25}
2025-10-09 14:39:45,769 - INFO - Training XGBoost with tuning...
2025-10-09 14:44:17,347 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 14:44:17,440 - INFO - Training MLP with tuning...
2025-10-09 14:44:20,320 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 14:44:20,377 - INFO - Training Lasso with tuning...
2025-10-09 14:44:23,512 - INFO - Best Lasso alpha: 1.0
2025-10-09 14:44:23,578 - INFO - Training Ridge with tuning...
2025-10-09 14:44:23,828 - INFO - Best Ridge alpha: 100.0
2025-10-09 14:44:23,896 - INFO - Training ARIMA...
2025-10-09 14:44:24,129 - INFO - GXC: Selected Lasso (Dir Acc=59.5%, RMSE=1.07%)
2025-10-09 14:44:24,129 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   3.44% | Dir Acc:  41.8%
  XGBoost      | RMSE:   2.21% | Dir Acc:  43.0%
  MLP          | RMSE:   1.30% | Dir Acc:  48.1%
  Lasso        | RMSE:   1.07% | Dir Acc:  59.5%
  Ridge        | RMSE:   2.17% | Dir Acc:  46.8%
  ARIMA        | RMSE:   1.08% | Dir Acc:  59.5%
  → Best: Lasso (Dir Acc: 59.5%)

[10/24] Training models for JPM...
----------------------------------------------------------------------


2025-10-09 14:50:02,283 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_depth': 25}
2025-10-09 14:50:02,376 - INFO - Training XGBoost with tuning...
2025-10-09 14:54:31,666 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 14:54:31,780 - INFO - Training MLP with tuning...
2025-10-09 14:54:35,768 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 14:54:35,815 - INFO - Training Lasso with tuning...
2025-10-09 14:54:39,216 - INFO - Best Lasso alpha: 1.0
2025-10-09 14:54:39,308 - INFO - Training Ridge with tuning...
2025-10-09 14:54:39,587 - INFO - Best Ridge alpha: 100.0
2025-10-09 14:54:39,637 - INFO - Training ARIMA...
2025-10-09 14:54:39,886 - INFO - JPM: Selected MLP (Dir Acc=64.6%, RMSE=1.28%)
2025-10-09 14:54:39,888 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   1.94% | Dir Acc:  40.5%
  XGBoost      | RMSE:   1.31% | Dir Acc:  48.1%
  MLP          | RMSE:   1.28% | Dir Acc:  64.6%
  Lasso        | RMSE:   1.07% | Dir Acc:  60.8%
  Ridge        | RMSE:   2.57% | Dir Acc:  43.0%
  ARIMA        | RMSE:   1.07% | Dir Acc:  60.8%
  → Best: MLP (Dir Acc: 64.6%)

[11/24] Training models for KR...
----------------------------------------------------------------------


2025-10-09 14:59:45,386 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 25}
2025-10-09 14:59:45,552 - INFO - Training XGBoost with tuning...
2025-10-09 15:05:48,918 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 15:05:49,160 - INFO - Training MLP with tuning...
2025-10-09 15:05:54,282 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.0001}
2025-10-09 15:05:54,469 - INFO - Training Lasso with tuning...
2025-10-09 15:06:02,730 - INFO - Best Lasso alpha: 1.0
2025-10-09 15:06:02,872 - INFO - Training Ridge with tuning...
2025-10-09 15:06:03,318 - INFO - Best Ridge alpha: 100.0
2025-10-09 15:06:03,459 - INFO - Training ARIMA...
2025-10-09 15:06:04,123 - INFO - KR: Selected Ridge (Dir Acc=62.0%, RMSE=2.42%)
2025-10-09 15:06:04,128 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   1.69% | Dir Acc:  60.8%
  XGBoost      | RMSE:   1.70% | Dir Acc:  54.4%
  MLP          | RMSE:   2.09% | Dir Acc:  51.9%
  Lasso        | RMSE:   1.68% | Dir Acc:  49.4%
  Ridge        | RMSE:   2.42% | Dir Acc:  62.0%
  ARIMA        | RMSE:   1.68% | Dir Acc:  49.4%
  → Best: Ridge (Dir Acc: 62.0%)

[12/24] Training models for MDT...
----------------------------------------------------------------------


2025-10-09 15:11:36,491 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 25}
2025-10-09 15:11:36,691 - INFO - Training XGBoost with tuning...
2025-10-09 15:16:29,162 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 15:16:29,263 - INFO - Training MLP with tuning...
2025-10-09 15:16:32,181 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.0001}
2025-10-09 15:16:32,227 - INFO - Training Lasso with tuning...
2025-10-09 15:16:35,035 - INFO - Best Lasso alpha: 1.0
2025-10-09 15:16:35,098 - INFO - Training Ridge with tuning...
2025-10-09 15:16:35,349 - INFO - Best Ridge alpha: 100.0
2025-10-09 15:16:35,409 - INFO - Training ARIMA...
2025-10-09 15:16:35,712 - INFO - MDT: Selected Lasso (Dir Acc=57.0%, RMSE=1.11%)
2025-10-09 15:16:35,712 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   1.19% | Dir Acc:  49.4%
  XGBoost      | RMSE:   1.63% | Dir Acc:  49.4%
  MLP          | RMSE:   1.46% | Dir Acc:  49.4%
  Lasso        | RMSE:   1.11% | Dir Acc:  57.0%
  Ridge        | RMSE:   1.69% | Dir Acc:  46.8%
  ARIMA        | RMSE:   1.11% | Dir Acc:  57.0%
  → Best: Lasso (Dir Acc: 57.0%)

[13/24] Training models for META...
----------------------------------------------------------------------


2025-10-09 15:21:59,054 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 25}
2025-10-09 15:21:59,232 - INFO - Training XGBoost with tuning...
2025-10-09 15:27:37,342 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 10, 'learning_rate': 0.05}
2025-10-09 15:27:37,558 - INFO - Training MLP with tuning...
2025-10-09 15:27:40,671 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.0001}
2025-10-09 15:27:40,758 - INFO - Training Lasso with tuning...
2025-10-09 15:27:45,084 - INFO - Best Lasso alpha: 1.0
2025-10-09 15:27:45,151 - INFO - Training Ridge with tuning...
2025-10-09 15:27:45,407 - INFO - Best Ridge alpha: 100.0
2025-10-09 15:27:45,457 - INFO - Training ARIMA...
2025-10-09 15:27:45,700 - INFO - META: Selected MLP (Dir Acc=54.4%, RMSE=2.99%)
2025-10-09 15:27:45,707 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   2.22% | Dir Acc:  46.8%
  XGBoost      | RMSE:   3.19% | Dir Acc:  50.6%
  MLP          | RMSE:   2.99% | Dir Acc:  54.4%
  Lasso        | RMSE:   1.90% | Dir Acc:  44.3%
  Ridge        | RMSE:   3.99% | Dir Acc:  53.2%
  ARIMA        | RMSE:   1.89% | Dir Acc:  44.3%
  → Best: MLP (Dir Acc: 54.4%)

[14/24] Training models for MSFT...
----------------------------------------------------------------------


2025-10-09 15:34:24,598 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_depth': 25}
2025-10-09 15:34:24,770 - INFO - Training XGBoost with tuning...
2025-10-09 15:41:03,055 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 10, 'learning_rate': 0.05}
2025-10-09 15:41:03,276 - INFO - Training MLP with tuning...
2025-10-09 15:41:06,775 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 15:41:06,880 - INFO - Training Lasso with tuning...
2025-10-09 15:41:10,157 - INFO - Best Lasso alpha: 1.0
2025-10-09 15:41:10,244 - INFO - Training Ridge with tuning...
2025-10-09 15:41:10,630 - INFO - Best Ridge alpha: 100.0
2025-10-09 15:41:10,695 - INFO - Training ARIMA...
2025-10-09 15:41:11,085 - INFO - MSFT: Selected Lasso (Dir Acc=54.4%, RMSE=0.99%)
2025-10-09 15:41:11,087 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   1.12% | Dir Acc:  45.6%
  XGBoost      | RMSE:   1.30% | Dir Acc:  43.0%
  MLP          | RMSE:   4.29% | Dir Acc:  43.0%
  Lasso        | RMSE:   0.99% | Dir Acc:  54.4%
  Ridge        | RMSE:   8.61% | Dir Acc:  45.6%
  ARIMA        | RMSE:   0.99% | Dir Acc:  54.4%
  → Best: Lasso (Dir Acc: 54.4%)

[15/24] Training models for NFLX...
----------------------------------------------------------------------


2025-10-09 15:46:05,846 - INFO - Best RF params: {'n_estimators': 100, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 25}
2025-10-09 15:46:05,988 - INFO - Training XGBoost with tuning...
2025-10-09 15:51:06,688 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 15:51:06,841 - INFO - Training MLP with tuning...
2025-10-09 15:51:10,418 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 15:51:10,557 - INFO - Training Lasso with tuning...
2025-10-09 15:51:15,203 - INFO - Best Lasso alpha: 1.0
2025-10-09 15:51:15,300 - INFO - Training Ridge with tuning...
2025-10-09 15:51:15,598 - INFO - Best Ridge alpha: 100.0
2025-10-09 15:51:15,690 - INFO - Training ARIMA...
2025-10-09 15:51:16,243 - INFO - NFLX: Selected Ridge (Dir Acc=50.6%, RMSE=2.62%)
2025-10-09 15:51:16,245 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   2.21% | Dir Acc:  40.5%
  XGBoost      | RMSE:   1.81% | Dir Acc:  49.4%
  MLP          | RMSE:   1.82% | Dir Acc:  43.0%
  Lasso        | RMSE:   1.57% | Dir Acc:  48.1%
  Ridge        | RMSE:   2.62% | Dir Acc:  50.6%
  ARIMA        | RMSE:   1.57% | Dir Acc:  48.1%
  → Best: Ridge (Dir Acc: 50.6%)

[16/24] Training models for NVDA...
----------------------------------------------------------------------


2025-10-09 15:58:28,284 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 2, 'max_depth': 15}
2025-10-09 15:58:28,625 - INFO - Training XGBoost with tuning...
2025-10-09 16:05:12,109 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 16:05:12,677 - INFO - Training MLP with tuning...
2025-10-09 16:05:17,765 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 16:05:17,951 - INFO - Training Lasso with tuning...
2025-10-09 16:05:26,412 - INFO - Best Lasso alpha: 1.0
2025-10-09 16:05:26,604 - INFO - Training Ridge with tuning...
2025-10-09 16:05:27,140 - INFO - Best Ridge alpha: 100.0
2025-10-09 16:05:27,303 - INFO - Training ARIMA...
2025-10-09 16:05:28,571 - INFO - NVDA: Selected ARIMA (Dir Acc=58.2%, RMSE=1.73%)
2025-10-09 16:05:28,587 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   3.35% | Dir Acc:  44.3%
  XGBoost      | RMSE:   5.76% | Dir Acc:  41.8%
  MLP          | RMSE:   5.05% | Dir Acc:  49.4%
  Lasso        | RMSE:   1.74% | Dir Acc:  58.2%
  Ridge        | RMSE:  11.70% | Dir Acc:  45.6%
  ARIMA        | RMSE:   1.73% | Dir Acc:  58.2%
  → Best: ARIMA (Dir Acc: 58.2%)

[17/24] Training models for OXY...
----------------------------------------------------------------------


2025-10-09 16:11:55,212 - INFO - Best RF params: {'n_estimators': 100, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 15}
2025-10-09 16:11:55,418 - INFO - Training XGBoost with tuning...
2025-10-09 16:17:59,264 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 200, 'max_depth': 10, 'learning_rate': 0.1}
2025-10-09 16:17:59,430 - INFO - Training MLP with tuning...
2025-10-09 16:18:02,600 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 16:18:02,687 - INFO - Training Lasso with tuning...
2025-10-09 16:18:05,730 - INFO - Best Lasso alpha: 1.0
2025-10-09 16:18:05,781 - INFO - Training Ridge with tuning...
2025-10-09 16:18:06,046 - INFO - Best Ridge alpha: 100.0
2025-10-09 16:18:06,099 - INFO - Training ARIMA...
2025-10-09 16:18:06,409 - INFO - OXY: Selected XGBoost (Dir Acc=54.4%, RMSE=2.16%)
2025-10-09 16:18:06,413 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   2.25% | Dir Acc:  54.4%
  XGBoost      | RMSE:   2.16% | Dir Acc:  54.4%
  MLP          | RMSE:   2.69% | Dir Acc:  49.4%
  Lasso        | RMSE:   2.02% | Dir Acc:  48.1%
  Ridge        | RMSE:   3.58% | Dir Acc:  48.1%
  ARIMA        | RMSE:   2.02% | Dir Acc:  48.1%
  → Best: XGBoost (Dir Acc: 54.4%)

[18/24] Training models for PGJ...
----------------------------------------------------------------------


2025-10-09 16:23:41,472 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 15}
2025-10-09 16:23:41,576 - INFO - Training XGBoost with tuning...
2025-10-09 16:28:56,687 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 16:28:56,826 - INFO - Training MLP with tuning...
2025-10-09 16:29:00,302 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.0001}
2025-10-09 16:29:00,455 - INFO - Training Lasso with tuning...
2025-10-09 16:29:06,979 - INFO - Best Lasso alpha: 1.0
2025-10-09 16:29:07,165 - INFO - Training Ridge with tuning...
2025-10-09 16:29:07,637 - INFO - Best Ridge alpha: 100.0
2025-10-09 16:29:07,776 - INFO - Training ARIMA...
2025-10-09 16:29:08,320 - INFO - PGJ: Selected Lasso (Dir Acc=55.7%, RMSE=1.41%)
2025-10-09 16:29:08,323 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   2.11% | Dir Acc:  41.8%
  XGBoost      | RMSE:   2.02% | Dir Acc:  53.2%
  MLP          | RMSE:   2.40% | Dir Acc:  44.3%
  Lasso        | RMSE:   1.41% | Dir Acc:  55.7%
  Ridge        | RMSE:   2.28% | Dir Acc:  49.4%
  ARIMA        | RMSE:   1.41% | Dir Acc:  55.7%
  → Best: Lasso (Dir Acc: 55.7%)

[19/24] Training models for RSP...
----------------------------------------------------------------------


2025-10-09 16:34:35,834 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 15}
2025-10-09 16:34:36,052 - INFO - Training XGBoost with tuning...
2025-10-09 16:39:19,714 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 16:39:19,827 - INFO - Training MLP with tuning...
2025-10-09 16:39:24,633 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 16:39:24,730 - INFO - Training Lasso with tuning...
2025-10-09 16:39:29,292 - INFO - Best Lasso alpha: 1.0
2025-10-09 16:39:29,407 - INFO - Training Ridge with tuning...
2025-10-09 16:39:29,757 - INFO - Best Ridge alpha: 100.0
2025-10-09 16:39:29,874 - INFO - Training ARIMA...
2025-10-09 16:39:30,154 - INFO - RSP: Selected MLP (Dir Acc=64.6%, RMSE=1.35%)
2025-10-09 16:39:30,154 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   1.50% | Dir Acc:  48.1%
  XGBoost      | RMSE:   0.90% | Dir Acc:  45.6%
  MLP          | RMSE:   1.35% | Dir Acc:  64.6%
  Lasso        | RMSE:   0.67% | Dir Acc:  53.2%
  Ridge        | RMSE:   1.61% | Dir Acc:  54.4%
  ARIMA        | RMSE:   0.67% | Dir Acc:  53.2%
  → Best: MLP (Dir Acc: 64.6%)

[20/24] Training models for SPY...
----------------------------------------------------------------------


2025-10-09 16:45:04,801 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': 25}
2025-10-09 16:45:05,061 - INFO - Training XGBoost with tuning...
2025-10-09 16:49:35,362 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 16:49:35,475 - INFO - Training MLP with tuning...
2025-10-09 16:49:40,167 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 16:49:40,238 - INFO - Training Lasso with tuning...
2025-10-09 16:49:45,193 - INFO - Best Lasso alpha: 0.1
2025-10-09 16:49:45,299 - INFO - Training Ridge with tuning...
2025-10-09 16:49:45,593 - INFO - Best Ridge alpha: 100.0
2025-10-09 16:49:45,716 - INFO - Training ARIMA...
2025-10-09 16:49:46,115 - INFO - SPY: Selected ARIMA (Dir Acc=57.0%, RMSE=0.57%)
2025-10-09 16:49:46,115 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   1.36% | Dir Acc:  51.9%
  XGBoost      | RMSE:   2.66% | Dir Acc:  45.6%
  MLP          | RMSE:   2.26% | Dir Acc:  44.3%
  Lasso        | RMSE:   0.58% | Dir Acc:  54.4%
  Ridge        | RMSE:   2.85% | Dir Acc:  41.8%
  ARIMA        | RMSE:   0.57% | Dir Acc:  57.0%
  → Best: ARIMA (Dir Acc: 57.0%)

[21/24] Training models for TSLA...
----------------------------------------------------------------------


2025-10-09 16:54:45,113 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 25}
2025-10-09 16:54:45,236 - INFO - Training XGBoost with tuning...
2025-10-09 16:59:31,585 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 16:59:31,695 - INFO - Training MLP with tuning...
2025-10-09 16:59:33,777 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.0001}
2025-10-09 16:59:33,810 - INFO - Training Lasso with tuning...
2025-10-09 16:59:38,150 - INFO - Best Lasso alpha: 10.0
2025-10-09 16:59:38,268 - INFO - Training Ridge with tuning...
2025-10-09 16:59:38,536 - INFO - Best Ridge alpha: 100.0
2025-10-09 16:59:38,594 - INFO - Training ARIMA...
2025-10-09 16:59:38,844 - INFO - TSLA: Selected Lasso (Dir Acc=54.4%, RMSE=3.00%)
2025-10-09 16:59:38,846 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   3.04% | Dir Acc:  51.9%
  XGBoost      | RMSE:   3.03% | Dir Acc:  54.4%
  MLP          | RMSE:   4.00% | Dir Acc:  48.1%
  Lasso        | RMSE:   3.00% | Dir Acc:  54.4%
  Ridge        | RMSE:   5.19% | Dir Acc:  48.1%
  ARIMA        | RMSE:   3.00% | Dir Acc:  54.4%
  → Best: Lasso (Dir Acc: 54.4%)

[22/24] Training models for VGK...
----------------------------------------------------------------------


2025-10-09 17:04:31,997 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 15}
2025-10-09 17:04:32,213 - INFO - Training XGBoost with tuning...
2025-10-09 17:10:16,936 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 17:10:17,076 - INFO - Training MLP with tuning...
2025-10-09 17:10:20,642 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.0001}
2025-10-09 17:10:20,780 - INFO - Training Lasso with tuning...
2025-10-09 17:10:25,885 - INFO - Best Lasso alpha: 1.0
2025-10-09 17:10:26,015 - INFO - Training Ridge with tuning...
2025-10-09 17:10:26,371 - INFO - Best Ridge alpha: 100.0
2025-10-09 17:10:26,501 - INFO - Training ARIMA...
2025-10-09 17:10:27,285 - INFO - VGK: Selected ARIMA (Dir Acc=55.7%, RMSE=0.78%)
2025-10-09 17:10:27,287 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   1.00% | Dir Acc:  53.2%
  XGBoost      | RMSE:   1.36% | Dir Acc:  45.6%
  MLP          | RMSE:   1.92% | Dir Acc:  48.1%
  Lasso        | RMSE:   0.78% | Dir Acc:  55.7%
  Ridge        | RMSE:   2.39% | Dir Acc:  45.6%
  ARIMA        | RMSE:   0.78% | Dir Acc:  55.7%
  → Best: ARIMA (Dir Acc: 55.7%)

[23/24] Training models for XPP...
----------------------------------------------------------------------


2025-10-09 17:15:57,222 - INFO - Best RF params: {'n_estimators': 200, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 25}
2025-10-09 17:15:57,333 - INFO - Training XGBoost with tuning...
2025-10-09 18:01:04,550 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.05}
2025-10-09 18:01:04,673 - INFO - Training MLP with tuning...
2025-10-09 18:01:08,070 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.001}
2025-10-09 18:01:08,154 - INFO - Training Lasso with tuning...
2025-10-09 18:01:11,773 - INFO - Best Lasso alpha: 10.0
2025-10-09 18:01:11,875 - INFO - Training Ridge with tuning...
2025-10-09 18:01:12,188 - INFO - Best Ridge alpha: 100.0
2025-10-09 18:01:12,261 - INFO - Training ARIMA...
2025-10-09 18:01:12,674 - INFO - XPP: Selected Ridge (Dir Acc=54.4%, RMSE=4.59%)
2025-10-09 18:01:12,674 - INFO - Training Random Forest with tuning...


  RandomForest | RMSE:   3.60% | Dir Acc:  44.3%
  XGBoost      | RMSE:   3.11% | Dir Acc:  43.0%
  MLP          | RMSE:   2.75% | Dir Acc:  45.6%
  Lasso        | RMSE:   2.25% | Dir Acc:  51.9%
  Ridge        | RMSE:   4.59% | Dir Acc:  54.4%
  ARIMA        | RMSE:   2.25% | Dir Acc:  51.9%
  → Best: Ridge (Dir Acc: 54.4%)

[24/24] Training models for YINN...
----------------------------------------------------------------------


2025-10-09 18:07:36,776 - INFO - Best RF params: {'n_estimators': 100, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_depth': 15}
2025-10-09 18:07:37,042 - INFO - Training XGBoost with tuning...
2025-10-09 18:13:08,102 - INFO - Best XGB params: {'subsample': 0.8, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.1}
2025-10-09 18:13:08,223 - INFO - Training MLP with tuning...
2025-10-09 18:13:10,299 - INFO - Best MLP params: {'learning_rate_init': 0.001, 'hidden_layer_sizes': (100,), 'alpha': 0.0001}
2025-10-09 18:13:10,406 - INFO - Training Lasso with tuning...
2025-10-09 18:13:14,549 - INFO - Best Lasso alpha: 10.0
2025-10-09 18:13:14,625 - INFO - Training Ridge with tuning...
2025-10-09 18:13:14,948 - INFO - Best Ridge alpha: 100.0
2025-10-09 18:13:15,011 - INFO - Training ARIMA...
2025-10-09 18:13:15,612 - INFO - YINN: Selected Lasso (Dir Acc=51.9%, RMSE=3.38%)


  RandomForest | RMSE:   6.29% | Dir Acc:  43.0%
  XGBoost      | RMSE:   4.95% | Dir Acc:  38.0%
  MLP          | RMSE:   4.34% | Dir Acc:  45.6%
  Lasso        | RMSE:   3.38% | Dir Acc:  51.9%
  Ridge        | RMSE:   6.24% | Dir Acc:  51.9%
  ARIMA        | RMSE:   3.39% | Dir Acc:  51.9%
  → Best: Lasso (Dir Acc: 51.9%)

ENHANCED TRAINING COMPLETE!


In [10]:
import pandas as pd

summary_df = pd.DataFrame(enhanced_summary)

if not summary_df.empty:
    print("\n" + "="*70)
    print("ENHANCED MODEL PERFORMANCE SUMMARY")
    print("="*70)
    
    # Average directional accuracy by model type
    print("\nDirectional Accuracy by Model Type:")
    print("-"*70)
    dir_acc_avg = summary_df.groupby('model')['dir_acc'].agg(['mean', 'std', 'min', 'max'])
    print(dir_acc_avg.round(2))
    
    # Average RMSE by model type
    print("\nRMSE (%) by Model Type:")
    print("-"*70)
    rmse_avg = summary_df.groupby('model')['rmse'].agg(['mean', 'std', 'min', 'max'])
    print(rmse_avg.round(3))
    
    # Best performers
    print("\nTop 5 Stocks by Directional Accuracy (Best Model):")
    print("-"*70)
    best_per_stock = summary_df.loc[summary_df.groupby('symbol')['dir_acc'].idxmax()]
    top_stocks = best_per_stock.nlargest(5, 'dir_acc')[['symbol', 'model', 'dir_acc', 'rmse']]
    for _, row in top_stocks.iterrows():
        print(f"{row['symbol']:6s} | {row['model']:12s} | "
              f"Dir Acc: {row['dir_acc']:5.1f}% | RMSE: {row['rmse']:6.2f}%")
    
    # Sentiment impact
    print("\nSentiment Impact:")
    print("-"*70)
    sentiment_comparison = summary_df.groupby('has_sentiment').agg({
        'dir_acc': 'mean',
        'rmse': 'mean'
    }).round(2)
    
    # Safely rename index based on actual values
    if len(sentiment_comparison) == 2:
        sentiment_comparison.index = ['No Sentiment', 'With Sentiment']
    elif len(sentiment_comparison) == 1:
        has_sent = sentiment_comparison.index[0]
        sentiment_comparison.index = ['With Sentiment' if has_sent else 'No Sentiment (All stocks)']
    
    print(sentiment_comparison)
    
    # Show distribution
    sent_counts = summary_df.groupby('has_sentiment')['symbol'].nunique()
    print(f"\nStocks with sentiment data: {sent_counts.get(True, 0)}")
    print(f"Stocks without sentiment data: {sent_counts.get(False, 0)}")
    
    # Model selection distribution
    conn = mysql.connector.connect(**db_config)
    selection_df = pd.read_sql("""
        SELECT symbol, selected_model_type, rmse, notes
        FROM model_selection
        WHERE selected_model_type LIKE '%Enhanced%'
        ORDER BY rmse ASC
    """, conn)
    conn.close()
    
    if not selection_df.empty:
        print("\nEnhanced Model Selection:")
        print("-"*70)
        model_counts = selection_df['selected_model_type'].value_counts()
        for model, count in model_counts.items():
            print(f"{model:30s}: {count} stocks")
    
    # Overall improvement
    print("\n" + "="*70)
    print("KEY IMPROVEMENTS:")
    print("="*70)
    avg_dir_acc = summary_df['dir_acc'].mean()
    avg_rmse = summary_df['rmse'].mean()
    print(f"Average Directional Accuracy: {avg_dir_acc:.1f}% (Target: >55%)")
    print(f"Average RMSE: {avg_rmse:.2f}%")
    print(f"Models with >55% Dir Acc: {(summary_df['dir_acc'] > 55).sum()} / {len(summary_df)}")
    
    print("\n✓ All enhanced models trained!")
    print(f"✓ Model files saved in: {enhanced_trainer.model_save_dir}/")
    print("✓ Performance metrics saved to database")
    print("\nIf directional accuracy > 55%, ready for Week 4: Trading System!")
    print("If still < 55%, we may need more feature engineering or longer sentiment history.")
else:
    print("No training summary available")


ENHANCED MODEL PERFORMANCE SUMMARY

Directional Accuracy by Model Type:
----------------------------------------------------------------------
               mean   std    min    max
model                                  
ARIMA         54.27  4.89  43.04  62.03
Lasso         54.17  4.69  44.30  62.03
MLP           49.47  6.81  41.77  64.56
RandomForest  47.84  7.54  34.18  65.82
Ridge         49.05  6.49  32.91  62.03
XGBoost       47.42  5.87  34.18  60.76

RMSE (%) by Model Type:
----------------------------------------------------------------------
               mean    std    min     max
model                                    
ARIMA         1.704  0.753  0.566   3.385
Lasso         1.705  0.752  0.584   3.384
MLP           2.628  1.067  1.280   5.055
RandomForest  2.568  1.413  1.001   6.290
Ridge         4.044  2.447  1.600  11.696
XGBoost       2.474  1.229  0.902   5.764

Top 5 Stocks by Directional Accuracy (Best Model):
----------------------------------------------------