In [1]:
import numpy as np
import pandas as pd
import mysql.connector
from datetime import datetime
import logging
import joblib
import os
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import xgboost as xgb
from statsmodels.tsa.arima.model import ARIMA
import warnings
warnings.filterwarnings('ignore')

class ModelTrainer:
    def __init__(self, db_config, model_save_dir='trained_models'):
        self.db_config = db_config
        self.model_save_dir = model_save_dir
        self.setup_logging()
        self.create_model_directory()
        
    def setup_logging(self):
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler('model_training.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger(__name__)
    
    def create_model_directory(self):
        """Create directory to save trained models"""
        if not os.path.exists(self.model_save_dir):
            os.makedirs(self.model_save_dir)
            self.logger.info(f"Created model directory: {self.model_save_dir}")
    
    def connect_db(self):
        try:
            conn = mysql.connector.connect(**self.db_config)
            return conn
        except mysql.connector.Error as e:
            self.logger.error(f"Database connection error: {e}")
            return None
    
    def create_model_tables(self):
        """Create tables to store model performance and metadata"""
        conn = self.connect_db()
        if not conn:
            return False
        
        cursor = conn.cursor()
        
        # Model performance tracking
        performance_table = """
        CREATE TABLE IF NOT EXISTS model_performance (
            id INT AUTO_INCREMENT PRIMARY KEY,
            symbol VARCHAR(10),
            model_type VARCHAR(50),
            rmse DECIMAL(10,4),
            mae DECIMAL(10,4),
            r2_score DECIMAL(10,6),
            directional_accuracy DECIMAL(5,2),
            train_samples INT,
            test_samples INT,
            feature_count INT,
            model_path VARCHAR(255),
            trained_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            INDEX idx_symbol (symbol),
            INDEX idx_model_type (model_type)
        )
        """
        
        # Model selection (which model to use for each stock)
        selection_table = """
        CREATE TABLE IF NOT EXISTS model_selection (
            id INT AUTO_INCREMENT PRIMARY KEY,
            symbol VARCHAR(10) UNIQUE,
            selected_model_type VARCHAR(50),
            model_path VARCHAR(255),
            rmse DECIMAL(10,4),
            selection_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            notes TEXT
        )
        """
        
        try:
            cursor.execute(performance_table)
            cursor.execute(selection_table)
            conn.commit()
            self.logger.info("Model tables created successfully")
            return True
        except mysql.connector.Error as e:
            self.logger.error(f"Error creating model tables: {e}")
            return False
        finally:
            cursor.close()
            conn.close()
    
    def calculate_directional_accuracy(self, y_true, y_pred):
        """Calculate percentage of correct directional predictions"""
        if len(y_true) <= 1:
            return 0.0
        
        true_direction = np.diff(y_true) > 0
        pred_direction = np.diff(y_pred) > 0
        
        correct = np.sum(true_direction == pred_direction)
        accuracy = (correct / len(true_direction)) * 100
        
        return accuracy
    
    def train_random_forest(self, X_train, y_train, X_test, y_test):
        """Train Random Forest model"""
        self.logger.info("Training Random Forest...")
        
        model = RandomForestRegressor(
            n_estimators=100,
            max_depth=20,
            min_samples_split=5,
            min_samples_leaf=2,
            random_state=42,
            n_jobs=-1
        )
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        metrics = self.calculate_metrics(y_test, y_pred)
        
        return model, y_pred, metrics
    
    def train_xgboost(self, X_train, y_train, X_test, y_test):
        """Train XGBoost model"""
        self.logger.info("Training XGBoost...")
        
        model = xgb.XGBRegressor(
            n_estimators=100,
            max_depth=10,
            learning_rate=0.1,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42,
            n_jobs=-1
        )
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        metrics = self.calculate_metrics(y_test, y_pred)
        
        return model, y_pred, metrics
    
    def train_arima(self, y_train, y_test):
        """Train ARIMA model (uses only target values, not features)"""
        self.logger.info("Training ARIMA...")
        
        try:
            # ARIMA works on the time series directly
            model = ARIMA(y_train, order=(2, 1, 1))
            fitted_model = model.fit()
            
            # Forecast for test period
            forecast = fitted_model.forecast(steps=len(y_test))
            y_pred = np.array(forecast)
            
            metrics = self.calculate_metrics(y_test, y_pred)
            
            return fitted_model, y_pred, metrics
        except Exception as e:
            self.logger.error(f"ARIMA training failed: {e}")
            return None, None, None
    
    def train_mlp(self, X_train, y_train, X_test, y_test):
        """Train MLP Neural Network"""
        self.logger.info("Training MLP...")
        
        model = MLPRegressor(
            hidden_layer_sizes=(100, 50),
            activation='relu',
            solver='adam',
            max_iter=500,
            random_state=42,
            early_stopping=True,
            validation_fraction=0.1
        )
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        metrics = self.calculate_metrics(y_test, y_pred)
        
        return model, y_pred, metrics
    
    def train_lasso(self, X_train, y_train, X_test, y_test):
        """Train Lasso Regression"""
        self.logger.info("Training Lasso...")
        
        model = Lasso(alpha=1.0, random_state=42, max_iter=5000)
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        metrics = self.calculate_metrics(y_test, y_pred)
        
        return model, y_pred, metrics
    
    def calculate_metrics(self, y_true, y_pred):
        """Calculate performance metrics"""
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        mae = mean_absolute_error(y_true, y_pred)
        r2 = r2_score(y_true, y_pred)
        dir_acc = self.calculate_directional_accuracy(y_true, y_pred)
        
        return {
            'rmse': rmse,
            'mae': mae,
            'r2_score': r2,
            'directional_accuracy': dir_acc
        }
    
    def save_model(self, model, symbol, model_type):
        """Save trained model to disk"""
        filename = f"{symbol}_{model_type}.pkl"
        filepath = os.path.join(self.model_save_dir, filename)
        joblib.dump(model, filepath)
        return filepath
    
    def save_performance_to_db(self, symbol, model_type, metrics, train_samples, 
                               test_samples, feature_count, model_path):
        """Save model performance metrics to database"""
        conn = self.connect_db()
        if not conn:
            return False
        
        cursor = conn.cursor()
        
        query = """
        INSERT INTO model_performance 
        (symbol, model_type, rmse, mae, r2_score, directional_accuracy,
         train_samples, test_samples, feature_count, model_path)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """
        
        values = (
            symbol,
            model_type,
            metrics['rmse'],
            metrics['mae'],
            metrics['r2_score'],
            metrics['directional_accuracy'],
            train_samples,
            test_samples,
            feature_count,
            model_path
        )
        
        try:
            cursor.execute(query, values)
            conn.commit()
            return True
        except mysql.connector.Error as e:
            self.logger.error(f"Error saving performance: {e}")
            return False
        finally:
            cursor.close()
            conn.close()
    
    def train_all_models(self, stock_data):
        """
        Train all 5 model types for a given stock
        Returns: dict of results for each model
        """
        symbol = stock_data['symbol']
        X_train = stock_data['X_train']
        X_test = stock_data['X_test']
        y_train = stock_data['y_train']
        y_test = stock_data['y_test']
        
        results = {}
        
        # 1. Random Forest
        try:
            model, y_pred, metrics = self.train_random_forest(X_train, y_train, X_test, y_test)
            if model:
                model_path = self.save_model(model, symbol, 'RandomForest')
                self.save_performance_to_db(
                    symbol, 'RandomForest', metrics,
                    len(X_train), len(X_test), X_train.shape[1], model_path
                )
                results['RandomForest'] = {'model': model, 'metrics': metrics, 'predictions': y_pred}
        except Exception as e:
            self.logger.error(f"Random Forest failed for {symbol}: {e}")
        
        # 2. XGBoost
        try:
            model, y_pred, metrics = self.train_xgboost(X_train, y_train, X_test, y_test)
            if model:
                model_path = self.save_model(model, symbol, 'XGBoost')
                self.save_performance_to_db(
                    symbol, 'XGBoost', metrics,
                    len(X_train), len(X_test), X_train.shape[1], model_path
                )
                results['XGBoost'] = {'model': model, 'metrics': metrics, 'predictions': y_pred}
        except Exception as e:
            self.logger.error(f"XGBoost failed for {symbol}: {e}")
        
        # 3. ARIMA
        try:
            model, y_pred, metrics = self.train_arima(y_train, y_test)
            if model:
                model_path = self.save_model(model, symbol, 'ARIMA')
                self.save_performance_to_db(
                    symbol, 'ARIMA', metrics,
                    len(y_train), len(y_test), 0, model_path
                )
                results['ARIMA'] = {'model': model, 'metrics': metrics, 'predictions': y_pred}
        except Exception as e:
            self.logger.error(f"ARIMA failed for {symbol}: {e}")
        
        # 4. MLP
        try:
            model, y_pred, metrics = self.train_mlp(X_train, y_train, X_test, y_test)
            if model:
                model_path = self.save_model(model, symbol, 'MLP')
                self.save_performance_to_db(
                    symbol, 'MLP', metrics,
                    len(X_train), len(X_test), X_train.shape[1], model_path
                )
                results['MLP'] = {'model': model, 'metrics': metrics, 'predictions': y_pred}
        except Exception as e:
            self.logger.error(f"MLP failed for {symbol}: {e}")
        
        # 5. Lasso
        try:
            model, y_pred, metrics = self.train_lasso(X_train, y_train, X_test, y_test)
            if model:
                model_path = self.save_model(model, symbol, 'Lasso')
                self.save_performance_to_db(
                    symbol, 'Lasso', metrics,
                    len(X_train), len(X_test), X_train.shape[1], model_path
                )
                results['Lasso'] = {'model': model, 'metrics': metrics, 'predictions': y_pred}
        except Exception as e:
            self.logger.error(f"Lasso failed for {symbol}: {e}")
        
        return results
    
    def select_best_model(self, symbol, results):
        """Select best model based on RMSE and save to database"""
        if not results:
            self.logger.warning(f"No valid models for {symbol}")
            return None
        
        best_model_type = None
        best_rmse = float('inf')
        best_model_path = None
        
        for model_type, data in results.items():
            rmse = data['metrics']['rmse']
            if rmse < best_rmse:
                best_rmse = rmse
                best_model_type = model_type
                best_model_path = f"{self.model_save_dir}/{symbol}_{model_type}.pkl"
        
        # Save selection to database
        conn = self.connect_db()
        if not conn:
            return None
        
        cursor = conn.cursor()
        
        query = """
        INSERT INTO model_selection (symbol, selected_model_type, model_path, rmse, notes)
        VALUES (%s, %s, %s, %s, %s)
        ON DUPLICATE KEY UPDATE
            selected_model_type = VALUES(selected_model_type),
            model_path = VALUES(model_path),
            rmse = VALUES(rmse),
            selection_date = CURRENT_TIMESTAMP,
            notes = VALUES(notes)
        """
        
        notes = f"Best of {len(results)} models. Dir Acc: {results[best_model_type]['metrics']['directional_accuracy']:.1f}%"
        
        try:
            cursor.execute(query, (symbol, best_model_type, best_model_path, best_rmse, notes))
            conn.commit()
            self.logger.info(f"{symbol}: Selected {best_model_type} (RMSE={best_rmse:.2f})")
        except mysql.connector.Error as e:
            self.logger.error(f"Error saving model selection: {e}")
        finally:
            cursor.close()
            conn.close()
        
        return best_model_type

In [2]:
db_config = {
    'host': '127.0.0.1',
    'user': 'root',
    'password': '',
    'database': 'trading_system'
}

trainer = ModelTrainer(db_config)
trainer.create_model_tables()
print("✓ Model Trainer initialized")
print("✓ Model performance tables created")

2025-10-05 21:23:04,653 - INFO - Created model directory: trained_models
2025-10-05 21:23:04,814 - INFO - package: mysql.connector.plugins
2025-10-05 21:23:04,815 - INFO - plugin_name: caching_sha2_password
2025-10-05 21:23:04,816 - INFO - AUTHENTICATION_PLUGIN_CLASS: MySQLCachingSHA2PasswordAuthPlugin
2025-10-05 21:23:04,980 - INFO - Model tables created successfully


✓ Model Trainer initialized
✓ Model performance tables created


In [5]:
print("Testing model training on AAPL...")
print("="*60)

# Load prepared data
import pickle

with open('prepared_stock_data.pkl', 'rb') as f:
    all_stock_data = pickle.load(f)

print(f"✓ Loaded data for {len(all_stock_data)} stocks")

# Use the prepared data from feature engineering
test_symbol = 'AAPL'
if test_symbol in all_stock_data:
    print(f"Training all 5 models for {test_symbol}...")
    results = trainer.train_all_models(all_stock_data[test_symbol])
    
    print("\nModel Performance:")
    print("-"*60)
    for model_type, data in results.items():
        metrics = data['metrics']
        print(f"{model_type:15s} | RMSE: ${metrics['rmse']:8.2f} | "
              f"MAE: ${metrics['mae']:8.2f} | "
              f"Dir Acc: {metrics['directional_accuracy']:5.1f}%")
    
    # Select best model
    best = trainer.select_best_model(test_symbol, results)
    print(f"\n✓ Best model for {test_symbol}: {best}")
else:
    print(f"No data available for {test_symbol}")

2025-10-05 23:45:48,191 - INFO - Training Random Forest...


Testing model training on AAPL...
✓ Loaded data for 24 stocks
Training all 5 models for AAPL...


2025-10-05 23:45:56,394 - INFO - Training XGBoost...
2025-10-05 23:46:26,973 - INFO - Training ARIMA...
2025-10-05 23:46:27,531 - INFO - Training MLP...
2025-10-05 23:46:41,576 - INFO - Training Lasso...
2025-10-05 23:46:42,096 - INFO - AAPL: Selected RandomForest (RMSE=5.51)



Model Performance:
------------------------------------------------------------
RandomForest    | RMSE: $    5.51 | MAE: $    4.13 | Dir Acc:  48.7%
XGBoost         | RMSE: $    7.16 | MAE: $    5.07 | Dir Acc:  51.3%
ARIMA           | RMSE: $   30.46 | MAE: $   24.80 | Dir Acc:  52.6%
MLP             | RMSE: $    7.80 | MAE: $    6.17 | Dir Acc:  47.4%
Lasso           | RMSE: $    5.71 | MAE: $    4.46 | Dir Acc:  51.3%

✓ Best model for AAPL: RandomForest


In [6]:
print("\n" + "="*60)
print("Training models for all stocks...")
print("This will take 10-20 minutes depending on your CPU")
print("="*60 + "\n")

all_results = {}
training_summary = []

for idx, (symbol, stock_data) in enumerate(all_stock_data.items(), 1):
    print(f"\n[{idx}/{len(all_stock_data)}] Training models for {symbol}...")
    print("-"*60)
    
    try:
        results = trainer.train_all_models(stock_data)
        
        if results:
            all_results[symbol] = results
            
            # Print performance
            for model_type, data in results.items():
                metrics = data['metrics']
                print(f"  {model_type:12s} | RMSE: ${metrics['rmse']:7.2f} | Dir Acc: {metrics['directional_accuracy']:5.1f}%")
                
                training_summary.append({
                    'symbol': symbol,
                    'model': model_type,
                    'rmse': metrics['rmse'],
                    'dir_acc': metrics['directional_accuracy']
                })
            
            # Select best model
            best = trainer.select_best_model(symbol, results)
            print(f"  → Best: {best}")
        else:
            print(f"  ✗ No models succeeded for {symbol}")
            
    except Exception as e:
        print(f"  ✗ Training failed: {str(e)[:50]}")

print("\n" + "="*60)
print("TRAINING COMPLETE!")
print("="*60)


2025-10-05 23:51:30,029 - INFO - Training Random Forest...



Training models for all stocks...
This will take 10-20 minutes depending on your CPU


[1/24] Training models for AAPL...
------------------------------------------------------------


2025-10-05 23:51:37,550 - INFO - Training XGBoost...
2025-10-05 23:52:04,329 - INFO - Training ARIMA...
2025-10-05 23:52:04,656 - INFO - Training MLP...
2025-10-05 23:52:17,413 - INFO - Training Lasso...
2025-10-05 23:52:17,898 - INFO - AAPL: Selected RandomForest (RMSE=5.51)
2025-10-05 23:52:17,898 - INFO - Training Random Forest...


  RandomForest | RMSE: $   5.51 | Dir Acc:  48.7%
  XGBoost      | RMSE: $   7.16 | Dir Acc:  51.3%
  ARIMA        | RMSE: $  30.46 | Dir Acc:  52.6%
  MLP          | RMSE: $   7.80 | Dir Acc:  47.4%
  Lasso        | RMSE: $   5.71 | Dir Acc:  51.3%
  → Best: RandomForest

[2/24] Training models for AMD...
------------------------------------------------------------


2025-10-05 23:52:25,120 - INFO - Training XGBoost...
2025-10-05 23:52:51,185 - INFO - Training ARIMA...
2025-10-05 23:52:51,457 - INFO - Training MLP...
2025-10-05 23:53:00,801 - INFO - Training Lasso...
2025-10-05 23:53:01,371 - INFO - AMD: Selected Lasso (RMSE=5.09)
2025-10-05 23:53:01,382 - INFO - Training Random Forest...


  RandomForest | RMSE: $   9.75 | Dir Acc:  52.6%
  XGBoost      | RMSE: $  10.44 | Dir Acc:  51.3%
  ARIMA        | RMSE: $  39.05 | Dir Acc:  57.7%
  MLP          | RMSE: $  10.96 | Dir Acc:  51.3%
  Lasso        | RMSE: $   5.09 | Dir Acc:  52.6%
  → Best: Lasso

[3/24] Training models for AMZN...
------------------------------------------------------------


2025-10-05 23:53:10,663 - INFO - Training XGBoost...
2025-10-05 23:53:42,813 - INFO - Training ARIMA...
2025-10-05 23:53:43,150 - INFO - Training MLP...
2025-10-05 23:53:47,510 - INFO - Training Lasso...
2025-10-05 23:53:48,075 - INFO - AMZN: Selected XGBoost (RMSE=4.14)
2025-10-05 23:53:48,079 - INFO - Training Random Forest...


  RandomForest | RMSE: $   5.61 | Dir Acc:  46.2%
  XGBoost      | RMSE: $   4.14 | Dir Acc:  34.6%
  ARIMA        | RMSE: $  12.84 | Dir Acc:  46.2%
  MLP          | RMSE: $  12.65 | Dir Acc:  38.5%
  Lasso        | RMSE: $   7.99 | Dir Acc:  47.4%
  → Best: XGBoost

[4/24] Training models for BLK...
------------------------------------------------------------


2025-10-05 23:53:55,525 - INFO - Training XGBoost...
2025-10-05 23:54:19,267 - INFO - Training ARIMA...
2025-10-05 23:54:19,721 - INFO - Training MLP...
2025-10-05 23:54:39,660 - INFO - Training Lasso...
2025-10-05 23:54:40,353 - INFO - BLK: Selected Lasso (RMSE=18.06)
2025-10-05 23:54:40,353 - INFO - Training Random Forest...


  RandomForest | RMSE: $  86.63 | Dir Acc:  48.7%
  XGBoost      | RMSE: $  85.87 | Dir Acc:  47.4%
  ARIMA        | RMSE: $ 119.34 | Dir Acc:  46.2%
  MLP          | RMSE: $  31.05 | Dir Acc:  44.9%
  Lasso        | RMSE: $  18.06 | Dir Acc:  51.3%
  → Best: Lasso

[5/24] Training models for CHN...
------------------------------------------------------------


2025-10-05 23:54:47,664 - INFO - Training XGBoost...
2025-10-05 23:55:11,492 - INFO - Training ARIMA...
2025-10-05 23:55:11,775 - INFO - Training MLP...
2025-10-05 23:55:17,541 - INFO - Training Lasso...
2025-10-05 23:55:17,815 - INFO - CHN: Selected MLP (RMSE=1.03)
2025-10-05 23:55:17,820 - INFO - Training Random Forest...


  RandomForest | RMSE: $   3.38 | Dir Acc:  39.7%
  XGBoost      | RMSE: $   3.35 | Dir Acc:  37.2%
  ARIMA        | RMSE: $   3.24 | Dir Acc:  44.9%
  MLP          | RMSE: $   1.03 | Dir Acc:  65.4%
  Lasso        | RMSE: $   5.06 | Dir Acc:  37.2%
  → Best: MLP

[6/24] Training models for ERO...
------------------------------------------------------------


2025-10-05 23:55:25,474 - INFO - Training XGBoost...
2025-10-05 23:55:47,366 - INFO - Training ARIMA...
2025-10-05 23:55:47,648 - INFO - Training MLP...
2025-10-05 23:55:54,183 - INFO - Training Lasso...
2025-10-05 23:55:54,480 - INFO - ERO: Selected XGBoost (RMSE=0.87)
2025-10-05 23:55:54,480 - INFO - Training Random Forest...


  RandomForest | RMSE: $   0.96 | Dir Acc:  52.6%
  XGBoost      | RMSE: $   0.87 | Dir Acc:  41.0%
  ARIMA        | RMSE: $   1.90 | Dir Acc:  38.5%
  MLP          | RMSE: $   1.55 | Dir Acc:  38.5%
  Lasso        | RMSE: $   2.26 | Dir Acc:  56.4%
  → Best: XGBoost

[7/24] Training models for FXP...
------------------------------------------------------------


2025-10-05 23:56:01,440 - INFO - Training XGBoost...
2025-10-05 23:56:29,446 - INFO - Training ARIMA...
2025-10-05 23:56:29,799 - INFO - Training MLP...
2025-10-05 23:56:42,909 - INFO - Training Lasso...


  RandomForest | RMSE: $   1.90 | Dir Acc:  53.8%
  XGBoost      | RMSE: $   2.14 | Dir Acc:  61.5%
  ARIMA        | RMSE: $   1.01 | Dir Acc:  57.7%
  MLP          | RMSE: $   0.74 | Dir Acc:  50.0%
  Lasso        | RMSE: $   4.90 | Dir Acc:  52.6%


2025-10-05 23:56:43,505 - INFO - FXP: Selected MLP (RMSE=0.74)
2025-10-05 23:56:43,525 - INFO - Training Random Forest...


  → Best: MLP

[8/24] Training models for GOOGL...
------------------------------------------------------------


2025-10-05 23:56:51,708 - INFO - Training XGBoost...
2025-10-05 23:57:27,028 - INFO - Training ARIMA...
2025-10-05 23:57:27,290 - INFO - Training MLP...
2025-10-05 23:57:31,590 - INFO - Training Lasso...
2025-10-05 23:57:31,988 - INFO - GOOGL: Selected MLP (RMSE=7.98)
2025-10-05 23:57:31,992 - INFO - Training Random Forest...


  RandomForest | RMSE: $  26.90 | Dir Acc:  57.7%
  XGBoost      | RMSE: $  29.95 | Dir Acc:  65.4%
  ARIMA        | RMSE: $  39.21 | Dir Acc:  46.2%
  MLP          | RMSE: $   7.98 | Dir Acc:  56.4%
  Lasso        | RMSE: $  19.42 | Dir Acc:  50.0%
  → Best: MLP

[9/24] Training models for GXC...
------------------------------------------------------------


2025-10-05 23:57:42,386 - INFO - Training XGBoost...
2025-10-05 23:58:14,200 - INFO - Training ARIMA...
2025-10-05 23:58:14,446 - INFO - Training MLP...
2025-10-05 23:58:30,405 - INFO - Training Lasso...
2025-10-05 23:58:30,777 - INFO - GXC: Selected MLP (RMSE=7.27)
2025-10-05 23:58:30,780 - INFO - Training Random Forest...


  RandomForest | RMSE: $  11.19 | Dir Acc:  51.3%
  XGBoost      | RMSE: $  11.79 | Dir Acc:  51.3%
  ARIMA        | RMSE: $   9.58 | Dir Acc:  57.7%
  MLP          | RMSE: $   7.27 | Dir Acc:  55.1%
  Lasso        | RMSE: $  10.11 | Dir Acc:  51.3%
  → Best: MLP

[10/24] Training models for JPM...
------------------------------------------------------------


2025-10-05 23:58:39,973 - INFO - Training XGBoost...
2025-10-05 23:59:09,327 - INFO - Training ARIMA...
2025-10-05 23:59:09,665 - INFO - Training MLP...
2025-10-05 23:59:17,864 - INFO - Training Lasso...
2025-10-05 23:59:18,403 - INFO - JPM: Selected Lasso (RMSE=13.59)
2025-10-05 23:59:18,403 - INFO - Training Random Forest...


  RandomForest | RMSE: $  32.86 | Dir Acc:  50.0%
  XGBoost      | RMSE: $  35.60 | Dir Acc:  52.6%
  ARIMA        | RMSE: $  30.26 | Dir Acc:  53.8%
  MLP          | RMSE: $  14.32 | Dir Acc:  51.3%
  Lasso        | RMSE: $  13.59 | Dir Acc:  52.6%
  → Best: Lasso

[11/24] Training models for KR...
------------------------------------------------------------


2025-10-05 23:59:25,579 - INFO - Training XGBoost...
2025-10-05 23:59:53,085 - INFO - Training ARIMA...
2025-10-05 23:59:53,539 - INFO - Training MLP...
2025-10-06 00:00:01,503 - INFO - Training Lasso...
2025-10-06 00:00:01,834 - INFO - KR: Selected MLP (RMSE=1.86)
2025-10-06 00:00:01,834 - INFO - Training Random Forest...


  RandomForest | RMSE: $   2.36 | Dir Acc:  42.3%
  XGBoost      | RMSE: $   3.03 | Dir Acc:  46.2%
  ARIMA        | RMSE: $   5.20 | Dir Acc:  51.3%
  MLP          | RMSE: $   1.86 | Dir Acc:  48.7%
  Lasso        | RMSE: $   6.64 | Dir Acc:  57.7%
  → Best: MLP

[12/24] Training models for MDT...
------------------------------------------------------------


2025-10-06 00:00:09,558 - INFO - Training XGBoost...
2025-10-06 00:00:39,424 - INFO - Training ARIMA...
2025-10-06 00:00:39,767 - INFO - Training MLP...
2025-10-06 00:00:48,363 - INFO - Training Lasso...
2025-10-06 00:00:48,653 - INFO - MDT: Selected MLP (RMSE=2.18)
2025-10-06 00:00:48,653 - INFO - Training Random Forest...


  RandomForest | RMSE: $   3.49 | Dir Acc:  44.9%
  XGBoost      | RMSE: $   3.65 | Dir Acc:  52.6%
  ARIMA        | RMSE: $   4.90 | Dir Acc:  47.4%
  MLP          | RMSE: $   2.18 | Dir Acc:  47.4%
  Lasso        | RMSE: $   7.83 | Dir Acc:  38.5%
  → Best: MLP

[13/24] Training models for META...
------------------------------------------------------------


2025-10-06 00:00:56,163 - INFO - Training XGBoost...
2025-10-06 00:01:25,404 - INFO - Training ARIMA...
2025-10-06 00:01:25,803 - INFO - Training MLP...
2025-10-06 00:01:49,870 - INFO - Training Lasso...
2025-10-06 00:01:50,553 - INFO - META: Selected Lasso (RMSE=18.21)
2025-10-06 00:01:50,555 - INFO - Training Random Forest...


  RandomForest | RMSE: $  53.70 | Dir Acc:  42.3%
  XGBoost      | RMSE: $  59.73 | Dir Acc:  52.6%
  ARIMA        | RMSE: $  51.68 | Dir Acc:  57.7%
  MLP          | RMSE: $  23.91 | Dir Acc:  51.3%
  Lasso        | RMSE: $  18.21 | Dir Acc:  41.0%
  → Best: Lasso

[14/24] Training models for MSFT...
------------------------------------------------------------


2025-10-06 00:01:59,372 - INFO - Training XGBoost...
2025-10-06 00:02:31,711 - INFO - Training ARIMA...
2025-10-06 00:02:31,907 - INFO - Training MLP...
2025-10-06 00:02:32,687 - INFO - Training Lasso...
2025-10-06 00:02:32,821 - INFO - MSFT: Selected Lasso (RMSE=23.82)
2025-10-06 00:02:32,821 - INFO - Training Random Forest...


  RandomForest | RMSE: $  57.94 | Dir Acc:  44.9%
  XGBoost      | RMSE: $  59.85 | Dir Acc:  46.2%
  ARIMA        | RMSE: $  36.68 | Dir Acc:  43.6%
  MLP          | RMSE: $ 213.52 | Dir Acc:  53.8%
  Lasso        | RMSE: $  23.82 | Dir Acc:  44.9%
  → Best: Lasso

[15/24] Training models for NFLX...
------------------------------------------------------------


2025-10-06 00:02:39,289 - INFO - Training XGBoost...
2025-10-06 00:03:06,140 - INFO - Training ARIMA...
2025-10-06 00:03:06,461 - INFO - Training MLP...
2025-10-06 00:03:24,876 - INFO - Training Lasso...
2025-10-06 00:03:25,811 - INFO - NFLX: Selected Lasso (RMSE=19.62)
2025-10-06 00:03:25,814 - INFO - Training Random Forest...


  RandomForest | RMSE: $  70.40 | Dir Acc:  47.4%
  XGBoost      | RMSE: $  48.21 | Dir Acc:  52.6%
  ARIMA        | RMSE: $  40.38 | Dir Acc:  53.8%
  MLP          | RMSE: $  36.77 | Dir Acc:  50.0%
  Lasso        | RMSE: $  19.62 | Dir Acc:  43.6%
  → Best: Lasso

[16/24] Training models for NVDA...
------------------------------------------------------------


2025-10-06 00:03:33,920 - INFO - Training XGBoost...
2025-10-06 00:04:06,112 - INFO - Training ARIMA...
2025-10-06 00:04:06,603 - INFO - Training MLP...
2025-10-06 00:04:12,497 - INFO - Training Lasso...
2025-10-06 00:04:13,054 - INFO - NVDA: Selected MLP (RMSE=11.15)
2025-10-06 00:04:13,068 - INFO - Training Random Forest...


  RandomForest | RMSE: $  32.00 | Dir Acc:  48.7%
  XGBoost      | RMSE: $  35.91 | Dir Acc:  56.4%
  ARIMA        | RMSE: $  30.40 | Dir Acc:  41.0%
  MLP          | RMSE: $  11.15 | Dir Acc:  59.0%
  Lasso        | RMSE: $  13.10 | Dir Acc:  50.0%
  → Best: MLP

[17/24] Training models for OXY...
------------------------------------------------------------


2025-10-06 00:04:23,761 - INFO - Training XGBoost...
2025-10-06 00:04:49,030 - INFO - Training ARIMA...
2025-10-06 00:04:49,224 - INFO - Training MLP...
2025-10-06 00:04:58,034 - INFO - Training Lasso...
2025-10-06 00:04:58,609 - INFO - OXY: Selected ARIMA (RMSE=1.61)
2025-10-06 00:04:58,609 - INFO - Training Random Forest...


  RandomForest | RMSE: $   2.82 | Dir Acc:  52.6%
  XGBoost      | RMSE: $   2.34 | Dir Acc:  46.2%
  ARIMA        | RMSE: $   1.61 | Dir Acc:  48.7%
  MLP          | RMSE: $   1.69 | Dir Acc:  51.3%
  Lasso        | RMSE: $   2.83 | Dir Acc:  43.6%
  → Best: ARIMA

[18/24] Training models for PGJ...
------------------------------------------------------------


2025-10-06 00:05:06,034 - INFO - Training XGBoost...
2025-10-06 00:05:33,336 - INFO - Training ARIMA...
2025-10-06 00:05:33,842 - INFO - Training MLP...
2025-10-06 00:05:40,374 - INFO - Training Lasso...
2025-10-06 00:05:40,656 - INFO - PGJ: Selected MLP (RMSE=1.16)
2025-10-06 00:05:40,667 - INFO - Training Random Forest...


  RandomForest | RMSE: $   1.67 | Dir Acc:  44.9%
  XGBoost      | RMSE: $   1.77 | Dir Acc:  39.7%
  ARIMA        | RMSE: $   2.41 | Dir Acc:  44.9%
  MLP          | RMSE: $   1.16 | Dir Acc:  53.8%
  Lasso        | RMSE: $   5.21 | Dir Acc:  43.6%
  → Best: MLP

[19/24] Training models for RSP...
------------------------------------------------------------


2025-10-06 00:05:49,271 - INFO - Training XGBoost...
2025-10-06 00:06:18,418 - INFO - Training ARIMA...
2025-10-06 00:06:19,047 - INFO - Training MLP...
2025-10-06 00:06:28,517 - INFO - Training Lasso...
2025-10-06 00:06:28,673 - INFO - RSP: Selected MLP (RMSE=4.21)
2025-10-06 00:06:28,675 - INFO - Training Random Forest...


  RandomForest | RMSE: $   5.35 | Dir Acc:  53.8%
  XGBoost      | RMSE: $   5.69 | Dir Acc:  46.2%
  ARIMA        | RMSE: $   7.93 | Dir Acc:  53.8%
  MLP          | RMSE: $   4.21 | Dir Acc:  47.4%
  Lasso        | RMSE: $   7.87 | Dir Acc:  47.4%
  → Best: MLP

[20/24] Training models for SPY...
------------------------------------------------------------


2025-10-06 00:06:35,552 - INFO - Training XGBoost...
2025-10-06 00:07:11,200 - INFO - Training ARIMA...
2025-10-06 00:07:11,566 - INFO - Training MLP...
2025-10-06 00:07:16,906 - INFO - Training Lasso...
2025-10-06 00:07:17,054 - INFO - SPY: Selected Lasso (RMSE=12.03)
2025-10-06 00:07:17,056 - INFO - Training Random Forest...


  RandomForest | RMSE: $  42.64 | Dir Acc:  59.0%
  XGBoost      | RMSE: $  46.95 | Dir Acc:  51.3%
  ARIMA        | RMSE: $  41.74 | Dir Acc:  42.3%
  MLP          | RMSE: $  77.38 | Dir Acc:  42.3%
  Lasso        | RMSE: $  12.03 | Dir Acc:  55.1%
  → Best: Lasso

[21/24] Training models for TSLA...
------------------------------------------------------------


2025-10-06 00:07:24,613 - INFO - Training XGBoost...
2025-10-06 00:07:52,147 - INFO - Training ARIMA...
2025-10-06 00:07:52,439 - INFO - Training MLP...
2025-10-06 00:08:04,756 - INFO - Training Lasso...
2025-10-06 00:08:05,112 - INFO - TSLA: Selected Lasso (RMSE=12.40)
2025-10-06 00:08:05,114 - INFO - Training Random Forest...


  RandomForest | RMSE: $  43.16 | Dir Acc:  53.8%
  XGBoost      | RMSE: $  40.66 | Dir Acc:  51.3%
  ARIMA        | RMSE: $  48.72 | Dir Acc:  48.7%
  MLP          | RMSE: $  28.79 | Dir Acc:  56.4%
  Lasso        | RMSE: $  12.40 | Dir Acc:  52.6%
  → Best: Lasso

[22/24] Training models for VGK...
------------------------------------------------------------


2025-10-06 00:08:12,896 - INFO - Training XGBoost...
2025-10-06 00:08:43,855 - INFO - Training ARIMA...
2025-10-06 00:08:44,102 - INFO - Training MLP...
2025-10-06 00:08:48,253 - INFO - Training Lasso...
2025-10-06 00:08:48,344 - INFO - VGK: Selected ARIMA (RMSE=2.06)
2025-10-06 00:08:48,345 - INFO - Training Random Forest...


  RandomForest | RMSE: $   3.23 | Dir Acc:  51.3%
  XGBoost      | RMSE: $   3.85 | Dir Acc:  48.7%
  ARIMA        | RMSE: $   2.06 | Dir Acc:  43.6%
  MLP          | RMSE: $   4.61 | Dir Acc:  52.6%
  Lasso        | RMSE: $  11.33 | Dir Acc:  44.9%
  → Best: ARIMA

[23/24] Training models for XPP...
------------------------------------------------------------


2025-10-06 00:08:56,449 - INFO - Training XGBoost...
2025-10-06 00:09:37,207 - INFO - Training ARIMA...
2025-10-06 00:09:37,532 - INFO - Training MLP...
2025-10-06 00:09:46,234 - INFO - Training Lasso...
2025-10-06 00:09:46,423 - INFO - XPP: Selected MLP (RMSE=1.11)
2025-10-06 00:09:46,424 - INFO - Training Random Forest...


  RandomForest | RMSE: $   3.93 | Dir Acc:  47.4%
  XGBoost      | RMSE: $   3.75 | Dir Acc:  51.3%
  ARIMA        | RMSE: $   2.74 | Dir Acc:  44.9%
  MLP          | RMSE: $   1.11 | Dir Acc:  47.4%
  Lasso        | RMSE: $   8.62 | Dir Acc:  43.6%
  → Best: MLP

[24/24] Training models for YINN...
------------------------------------------------------------


2025-10-06 00:09:53,957 - INFO - Training XGBoost...
2025-10-06 00:10:37,244 - INFO - Training ARIMA...
2025-10-06 00:10:37,661 - INFO - Training MLP...
2025-10-06 00:10:49,217 - INFO - Training Lasso...
2025-10-06 00:10:49,339 - INFO - YINN: Selected MLP (RMSE=2.66)


  RandomForest | RMSE: $   5.11 | Dir Acc:  39.7%
  XGBoost      | RMSE: $   6.79 | Dir Acc:  37.2%
  ARIMA        | RMSE: $   6.40 | Dir Acc:  47.4%
  MLP          | RMSE: $   2.66 | Dir Acc:  51.3%
  Lasso        | RMSE: $   9.77 | Dir Acc:  48.7%
  → Best: MLP

TRAINING COMPLETE!


In [7]:
import pandas as pd

# Create summary dataframe
summary_df = pd.DataFrame(training_summary)

if not summary_df.empty:
    print("\n" + "="*60)
    print("OVERALL TRAINING SUMMARY")
    print("="*60)
    
    # Best performers by model type
    print("\nBest RMSE by Model Type:")
    print("-"*60)
    for model_type in summary_df['model'].unique():
        model_data = summary_df[summary_df['model'] == model_type]
        best_idx = model_data['rmse'].idxmin()
        best_row = model_data.loc[best_idx]
        print(f"{model_type:15s} | {best_row['symbol']:6s} | RMSE: ${best_row['rmse']:8.2f}")
    
    # Average performance
    print("\nAverage Performance by Model Type:")
    print("-"*60)
    avg_performance = summary_df.groupby('model').agg({
        'rmse': 'mean',
        'dir_acc': 'mean'
    }).round(2)
    print(avg_performance)
    
    # Best stocks (lowest RMSE across all models)
    print("\nTop 10 Stocks by Average RMSE:")
    print("-"*60)
    stock_avg = summary_df.groupby('symbol')['rmse'].mean().sort_values().head(10)
    for symbol, rmse in stock_avg.items():
        print(f"{symbol:6s} | Avg RMSE: ${rmse:8.2f}")
    
    # Check model selection table
    conn = mysql.connector.connect(**db_config)
    selection_df = pd.read_sql("""
        SELECT symbol, selected_model_type, rmse, notes
        FROM model_selection
        ORDER BY rmse ASC
    """, conn)
    conn.close()
    
    print("\nSelected Models for Each Stock:")
    print("-"*60)
    model_counts = selection_df['selected_model_type'].value_counts()
    for model, count in model_counts.items():
        print(f"{model:15s}: {count} stocks")
    
    print("\n✓ All models trained and evaluated!")
    print(f"✓ Model files saved in: {trainer.model_save_dir}/")
    print("✓ Performance metrics saved to database")
    print("\nReady for Week 4: Trading System Implementation")
else:
    print("No training summary available")


OVERALL TRAINING SUMMARY

Best RMSE by Model Type:
------------------------------------------------------------
RandomForest    | ERO    | RMSE: $    0.96
XGBoost         | ERO    | RMSE: $    0.87
ARIMA           | FXP    | RMSE: $    1.01
MLP             | FXP    | RMSE: $    0.74
Lasso           | ERO    | RMSE: $    2.26

Average Performance by Model Type:
------------------------------------------------------------
               rmse  dir_acc
model                       
ARIMA         23.74    48.77
Lasso         10.48    48.24
MLP           21.10    50.48
RandomForest  21.35    48.93
XGBoost       21.40    48.82

Top 10 Stocks by Average RMSE:
------------------------------------------------------------
ERO    | Avg RMSE: $    1.51
FXP    | Avg RMSE: $    2.14
OXY    | Avg RMSE: $    2.26
PGJ    | Avg RMSE: $    2.45
CHN    | Avg RMSE: $    3.21
KR     | Avg RMSE: $    3.82
XPP    | Avg RMSE: $    4.03
MDT    | Avg RMSE: $    4.41
VGK    | Avg RMSE: $    5.02
YINN   | Avg RMSE: