In [46]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
import pickle
import logging
import os

In [47]:
logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(levelname)s: %(message)s')
logger = logging.getLogger(__name__)

In [48]:
class StockPredictor:
    def __init__(self, symbol='AAPL', period='6mp'):
        self.symbol = symbol
        self.period = period
        self.data = None
        self.models = {}
        self.close_scaler = RobustScaler()
        self.feature_scaler = StandardScaler()
        
        # Setup logging
        logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s: %(message)s')
        self.logger = logging.getLogger(__name__)

    def fetch_data(self):
        """Fetch historical stock data using yfinance"""
        try:
            self.data = yf.download(self.symbol, period=self.period, interval='1d')
            if self.data.empty:
                self.logger.error(f"No data retrieved for {self.symbol}")
                return False
            
            self.logger.info(f"Successfully fetched {len(self.data)} rows for {self.symbol}")
            return True
        except Exception as e:
            self.logger.error(f"Failed to fetch data for {self.symbol}: {str(e)}")
            return False

    def add_technical_indicators(self):
        """Add technical indicators to the dataset"""
        if self.data is None or self.data.empty:
            self.logger.error("No data available for adding technical indicators")
            return False

        try:
            self.data['Daily_Return'] = self.data['Close'].pct_change()
            self.data['MA50'] = self.data['Close'].rolling(window=50).mean()
            self.data['MA200'] = self.data['Close'].rolling(window=200).mean()

            # Calculate RSI
            delta = self.data['Close'].diff()
            gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
            loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
            rs = gain / loss
            self.data['RSI'] = 100 - (100 / (1 + rs))
            
            # Calculate Volatility
            self.data['Volatility'] = self.data['Daily_Return'].rolling(window=20).std()
            
            # Drop NaN values and verify we still have data
            initial_size = len(self.data)
            self.data.dropna(inplace=True)
            final_size = len(self.data)
            
            self.logger.info(f"Removed {initial_size - final_size} rows with NaN values")
            
            if final_size == 0:
                self.logger.error("No data remaining after removing NaN values")
                return False
                
            return True
        except Exception as e:
            self.logger.error(f"Error adding technical indicators: {str(e)}")
            return False

    def prepare_data(self, look_back=60):
        """Prepare data for model training"""
        if self.data is None or self.data.empty:
            self.logger.error("No data available for preparation")
            return None, None

        try:
            self.data['Target'] = self.data['Close'].pct_change().shift(-1)
            features = ['Open', 'High', 'Low', 'Close', 'Volume']
            
            # Verify all required features exist
            missing_features = [f for f in features if f not in self.data.columns]
            if missing_features:
                self.logger.error(f"Missing required features: {missing_features}")
                return None, None

            X = self.data[features].values
            y = self.data['Target'].values[:-1]  # Remove last row as it will have NaN target

            if len(X) == 0 or len(y) == 0:
                self.logger.error("No data available after preprocessing")
                return None, None

            # Scale the features and target
            X_scaled = self.feature_scaler.fit_transform(X)
            y_scaled = self.close_scaler.fit_transform(y.reshape(-1, 1)).flatten()
            
            # Create sequences
            X_seq, y_seq = [], []
            for i in range(len(X_scaled) - look_back - 1):
                X_seq.append(X_scaled[i:(i + look_back)])
                y_seq.append(y_scaled[i + look_back])
            
            X_seq = np.array(X_seq)
            y_seq = np.array(y_seq)
            
            if len(X_seq) == 0 or len(y_seq) == 0:
                self.logger.error("No sequences created")
                return None, None
                
            self.logger.info(f"Created {len(X_seq)} sequences of length {look_back}")
            return X_seq, y_seq

        except Exception as e:
            self.logger.error(f"Error preparing data: {str(e)}")
            return None, None

    def create_lstm_model(self, input_shape):
        """Create and compile LSTM model"""
        model = Sequential([
            Bidirectional(LSTM(256, return_sequences=True), input_shape=input_shape),
            Dropout(0.3),
            Bidirectional(LSTM(256, return_sequences=False)),
            Dropout(0.3),
            Dense(64, activation='relu'),
            Dropout(0.2),
            Dense(1, activation='linear')
        ])
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
        return model

    def train_models(self, X, y):
        """Train all models"""
        # Train LSTM
        X_lstm = X.reshape(X.shape[0], X.shape[1], X.shape[2])
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001)
        ]
        
        lstm_model = self.create_lstm_model((X.shape[1], X.shape[2]))
        lstm_history = lstm_model.fit(
            X_lstm, y,
            epochs=100,
            batch_size=32,
            validation_split=0.2,
            callbacks=callbacks,
            verbose=1
        )

        # Train XGBoost
        xgb_model = xgb.XGBRegressor(
            n_estimators=100,
            learning_rate=0.01,
            max_depth=4,
            subsample=0.8,
            colsample_bytree=0.8
        )

        # Train Random Forest
        rf_model = RandomForestRegressor(
            n_estimators=100,
            max_depth=8,
            random_state=42
        )

        # Flatten data for tree-based models
        X_flat = X.reshape(X.shape[0], -1)
        xgb_model.fit(X_flat, y)
        rf_model.fit(X_flat, y)

        self.models = {
            'lstm': lstm_model,
            'xgboost': xgb_model,
            'random_forest': rf_model
        }

        return lstm_history

    def evaluate_models(self, X, y):
        """Evaluate all models"""
        X_flat = X.reshape(X.shape[0], -1)
        X_lstm = X.reshape(X.shape[0], X.shape[1], X.shape[2])

        predictions = {
            'lstm': self.models['lstm'].predict(X_lstm),
            'xgboost': self.models['xgboost'].predict(X_flat),
            'random_forest': self.models['random_forest'].predict(X_flat)
        }

        metrics = {}
        for name, pred in predictions.items():
            metrics[name] = {
                'mae': mean_absolute_error(y, pred),
                'mse': mean_squared_error(y, pred),
                'rmse': np.sqrt(mean_squared_error(y, pred)),
                'r2': r2_score(y, pred)
            }
        return metrics

    def save_models(self, output_dir='models'):
        """Save models and scalers to files"""
        os.makedirs(output_dir, exist_ok=True)
        
        self.models['lstm'].save(f'{output_dir}/lstm_model.keras')
        with open(f'{output_dir}/xgboost_model.pkl', 'wb') as f:
            pickle.dump(self.models['xgboost'], f)
        with open(f'{output_dir}/random_forest_model.pkl', 'wb') as f:
            pickle.dump(self.models['random_forest'], f)
        with open(f'{output_dir}/close_scaler.pkl', 'wb') as f:
            pickle.dump(self.close_scaler, f)
        with open(f'{output_dir}/feature_scaler.pkl', 'wb') as f:
            pickle.dump(self.feature_scaler, f)

In [49]:
predictor = StockPredictor(symbol='AAPL', period='6mo')
if predictor.fetch_data():
    predictor.add_technical_indicators()
    X, y = predictor.prepare_data(look_back=60)
    history = predictor.train_models(X, y)
    metrics = predictor.evaluate_models(X, y)
    
    for model_name, model_metrics in metrics.items():
        print(f"\n{model_name.upper()} Metrics:")
        for metric_name, value in model_metrics.items():
            print(f"{metric_name}: {value:.4f}")
    
    predictor.save_models()

[*********************100%***********************]  1 of 1 completed
2025-01-29 19:54:13,515 - INFO: Successfully fetched 126 rows for AAPL
2025-01-29 19:54:13,523 - INFO: Removed 126 rows with NaN values
2025-01-29 19:54:13,523 - ERROR: No data remaining after removing NaN values
2025-01-29 19:54:13,526 - ERROR: No data available for preparation


AttributeError: 'NoneType' object has no attribute 'reshape'