In [7]:
import os
os.chdir("..")

# Improved LSTM Model and Portfolio Optimization

This notebook implements an enhanced LSTM model with better architecture and robust portfolio optimization using modern portfolio theory with ESG constraints.

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import TimeSeriesSplit
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import minimize
%matplotlib inline

In [2]:
np.random.seed(42)
tf.random.set_seed(42)

In [13]:
merged_df = pd.read_csv("data/processed/MultiIndex_stock_data.csv", header=[0,1], index_col=0)
merged_df.index = pd.to_datetime(merged_df.index)
esg_scores = pd.read_csv("data/raw_data/esg_scores.csv", index_col=0)

In [22]:
# Diagnostic prints
print("DataFrame Info:")
print(merged_df.columns.levels[0][:5])  # Print first 5 stock tickers
print("\nColumn levels:")
print(merged_df.columns.levels)
print("\nESG Scores Info:")
print(esg_scores.head())
print("\nSample data for first stock:")
first_stock = merged_df.columns.levels[0][0]
print(merged_df[first_stock].head())

DataFrame Info:
Index(['AAPL', 'ABT', 'ADBE', 'AMGN', 'AMZN'], dtype='object')

Column levels:
[['AAPL', 'ABT', 'ADBE', 'AMGN', 'AMZN', 'BA', 'BKNG', 'BLK', 'BMY', 'CAT', 'COST', 'CRM', 'CSCO', 'CVX', 'DHR', 'DIS', 'FIS', 'GE', 'GILD', 'GOOGL', 'HON', 'IBM', 'INTC', 'JPM', 'KO', 'LIN', 'LMT', 'MCD', 'MDT', 'META', 'MMM', 'MRK', 'MSFT', 'NFLX', 'NKE', 'NVDA', 'ORCL', 'PEP', 'PFE', 'PYPL', 'QCOM', 'SAP', 'SBUX', 'SPGI', 'T', 'TSLA', 'TXN', 'UNH', 'V', 'WMT', 'ZTS'], ['20DayRet', '20DayVol', 'Adj Close', 'Close', 'DailyRet', 'High', 'Low', 'Open', 'Volume', 'Z20DayRet', 'Z20DayVol']]

ESG Scores Info:
  ticker  esg_score
0   AAPL   0.587270
1   MSFT   0.875357
2  GOOGL   0.765997
3   AMZN   0.699329
4   TSLA   0.478009

Sample data for first stock:
            20DayRet  20DayVol  Adj Close      Close  DailyRet       High  \
Date                                                                        
2015-08-03 -0.060000  0.016946  26.496563  29.610001 -0.023578  30.642500   
2015-08-04 -0

## 1. Improved Data Preprocessing

Implementing robust data preprocessing with better feature scaling and cross-validation.

In [28]:
def prepare_sequence_data(data, lookback=60, forecast_horizon=1):
    """
    Prepare sequence data with proper padding and scaling
    """
    try:
        scaler = StandardScaler()
        scaled_data = scaler.fit_transform(data)
        
        X, y = [], []
        for i in range(len(scaled_data) - lookback - forecast_horizon + 1):
            X.append(scaled_data[i:(i + lookback)])
            y.append(scaled_data[i + lookback:i + lookback + forecast_horizon, 0])
        
        return np.array(X), np.array(y), scaler
    except Exception as e:
        print(f"Error in prepare_sequence_data: {str(e)}")
        print(f"Data shape: {data.shape if hasattr(data, 'shape') else 'no shape'}")
        raise

def create_time_series_cv(n_splits=5):
    """
    Create time series cross-validation splits with appropriate test size
    """
    # Calculate an appropriate test size that allows for n_splits
    # For TimeSeriesSplit, we need: n_samples >= (n_splits + 1) * test_size
    n_samples = len(merged_df)
    max_test_size = n_samples // (n_splits + 1)
    test_size = min(max_test_size, int(len(merged_df) * 0.2))
    
    return TimeSeriesSplit(n_splits=n_splits, test_size=test_size)

## 2. Enhanced LSTM Model Architecture

Implementing a bidirectional LSTM with attention and batch normalization for better performance.

In [29]:
def create_improved_lstm_model(input_shape):
    inputs = tf.keras.Input(shape=input_shape)
    
    x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    
    x = layers.Bidirectional(layers.LSTM(32, return_sequences=True))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    
    # Attention layer
    attention = layers.Dense(1, activation='tanh')(x)
    attention = layers.Flatten()(attention)
    attention_weights = layers.Activation('softmax')(attention)
    attention_weights = layers.RepeatVector(64)(attention_weights)
    attention_weights = layers.Permute([2, 1])(attention_weights)
    
    sent_representation = layers.multiply([x, attention_weights])
    sent_representation = layers.Lambda(lambda x: tf.keras.backend.sum(x, axis=1))(sent_representation)
    
    # Dense layers
    x = layers.Dense(32, activation='relu')(sent_representation)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    
    outputs = layers.Dense(1, activation='linear')(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )
    
    return model

In [30]:
def debug_tensor_shape(tensor, name):
    print(f"{name} shape: {tensor.shape if hasattr(tensor, 'shape') else 'No shape'}")
    print(f"{name} type: {type(tensor)}")
    if isinstance(tensor, np.ndarray) or tf.is_tensor(tensor):
        print(f"{name} dtype: {tensor.dtype}")
    if hasattr(tensor, 'isna'):
        print(f"{name} has NaN: {tensor.isna().any()}")
    print("-" * 50)

def test_model_batch(X_sample, y_sample, input_shape):
    print("Testing model with small batch...")
    try:
        model = create_improved_lstm_model(input_shape)
        print("Model created successfully")
        
        debug_tensor_shape(X_sample, "X_sample")
        debug_tensor_shape(y_sample, "y_sample")
        
        test_pred = model.predict(X_sample[:2])
        print("Forward pass successful")
        debug_tensor_shape(test_pred, "test_pred")
        
        return True
    except Exception as e:
        print(f"Error in test_model_batch: {str(e)}")
        return False

In [None]:
# Suppress TensorFlow deprecation warnings
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)

# Train models with cross-validation
cv = create_time_series_cv()
stock_predictions = {}
stock_models = {}

try:
    for ticker in stock_features.keys():
        print(f"\nProcessing {ticker}...")
        features = stock_features[ticker].values
        print(f"Feature shape: {features.shape}")
        
        # Prepare sequences
        try:
            X, y, scaler = prepare_sequence_data(features)
            print(f"X shape: {X.shape}, y shape: {y.shape}")
            
            # Test model with small batch first
            print("\nTesting model initialization...")
            try:
                # Create model once for testing
                model = create_improved_lstm_model(input_shape=(X.shape[1], X.shape[2]))
                print("Model architecture created successfully")
                
                # Test forward pass with small batch
                test_batch = X[:2]
                print(f"Test batch shape: {test_batch.shape}")
                test_pred = model.predict(test_batch, verbose=0)
                print(f"Test prediction shape: {test_pred.shape}")
                print("Model test successful")
                
                # Clean up test model to free memory
                del model
                tf.keras.backend.clear_session()
                
            except Exception as e:
                print(f"Model test failed: {str(e)}")
                continue
            
            # Initialize lists for cross-validation results
            val_predictions = []
            val_indices = []
            
            # Cross-validation training
            for fold, (train_idx, val_idx) in enumerate(cv.split(X)):
                print(f"\nFold {fold + 1}/5")
                try:
                    X_train, X_val = X[train_idx], X[val_idx]
                    y_train, y_val = y[train_idx], y[val_idx]
                    
                    print(f"Training set shape: X={X_train.shape}, y={y_train.shape}")
                    print(f"Validation set shape: X={X_val.shape}, y={y_val.shape}")
                    
                    # Create fresh model for each fold
                    model = create_improved_lstm_model(input_shape=(X.shape[1], X.shape[2]))
                    
                    # Callbacks
                    callbacks = [
                        tf.keras.callbacks.EarlyStopping(
                            monitor='val_loss',
                            patience=10,
                            restore_best_weights=True
                        ),
                        tf.keras.callbacks.ReduceLROnPlateau(
                            monitor='val_loss',
                            factor=0.5,
                            patience=5,
                            min_lr=0.0001
                        )
                    ]
                    
                    # Train the model
                    history = model.fit(
                        X_train, y_train,
                        epochs=50,
                        batch_size=32,
                        validation_data=(X_val, y_val),
                        callbacks=callbacks,
                        verbose=1
                    )
                    
                    # Store predictions
                    fold_predictions = model.predict(X_val, verbose=0)
                    val_predictions.extend(fold_predictions.flatten())
                    val_indices.extend(val_idx)
                    
                    # Print fold metrics
                    fold_mse = model.evaluate(X_val, y_val, verbose=0)[0]
                    print(f"Fold {fold + 1} MSE: {fold_mse:.6f}")
                    
                    # Clean up fold model to free memory
                    del model
                    tf.keras.backend.clear_session()
                    
                except Exception as e:
                    print(f"Error in fold {fold + 1}: {str(e)}")
                    continue
            
            if len(val_predictions) > 0:
                # Store predictions
                stock_predictions[ticker] = pd.Series(
                    scaler.inverse_transform(np.array(val_predictions).reshape(-1, 1)).flatten(),
                    index=merged_df.index[60:][val_indices]
                )
                
                # Print final metrics
                final_mse = np.mean((stock_predictions[ticker] - features[60:, 0][val_indices]) ** 2)
                print(f"\n{ticker} - Final MSE: {final_mse:.6f}")
            else:
                print(f"\nNo valid predictions for {ticker}")
                
        except Exception as e:
            print(f"Error processing {ticker}: {str(e)}")
            continue

except Exception as e:
    print(f"Error in main training loop: {str(e)}")
    raise

# Print summary
print("\nTraining Summary:")
print(f"Successfully processed stocks: {len(stock_predictions)}/{len(stock_features)}")
for ticker in stock_predictions.keys():
    print(f"{ticker}: {len(stock_predictions[ticker])} predictions")

# Save predictions if we have any
if len(stock_predictions) > 0:
    predictions_df = pd.DataFrame(stock_predictions)
    predictions_df.to_csv("data/processed/lstm_predictions.csv")
    print("\nPredictions saved to lstm_predictions.csv")
else:
    print("\nNo predictions to save")


Processing AAPL...
Feature shape: (2370, 8)
X shape: (2310, 60, 8), y shape: (2310, 1)

Testing model initialization...
Model architecture created successfully
Test batch shape: (2, 60, 8)
Test prediction shape: (2, 1)
Model test successful
Test prediction shape: (2, 1)
Model test successful

Fold 1/5
Training set shape: X=(335, 60, 8), y=(335, 1)
Validation set shape: X=(395, 60, 8), y=(395, 1)

Fold 1/5
Training set shape: X=(335, 60, 8), y=(335, 1)
Validation set shape: X=(395, 60, 8), y=(395, 1)
Epoch 1/50
Epoch 1/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 173ms/step - loss: 2.4876 - mae: 1.3116 - val_loss: 0.5827 - val_mae: 0.7550 - learning_rate: 0.0010
Epoch 2/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 173ms/step - loss: 2.4876 - mae: 1.3116 - val_loss: 0.5827 - val_mae: 0.7550 - learning_rate: 0.0010
Epoch 2/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 85ms/step - loss: 1.8684 - mae: 1.1493 - val_loss: 0.5

## 3. Portfolio Optimization Setup

Prepare the predicted returns and risk metrics for portfolio optimization with ESG constraints.

In [None]:
returns = predictions_df.pct_change().dropna()
exp_returns = returns.mean()
cov_matrix = returns.cov()

# Add ESG constraints
esg_threshold = 0.6  
min_esg_weight = 0.4  

high_esg_stocks = esg_scores[esg_scores['esg_score'] >= esg_threshold].index

def portfolio_stats(weights):
    portfolio_return = np.sum(exp_returns * weights)
    portfolio_risk = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    sharpe_ratio = portfolio_return / portfolio_risk
    
    portfolio_esg = np.sum(esg_scores['esg_score'] * weights)
    return portfolio_return, portfolio_risk, sharpe_ratio, portfolio_esg

def optimize_portfolio(target_return=None):
    n_assets = len(exp_returns)
    
    weights = np.array([1/n_assets] * n_assets)
    
    constraints = [
        {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},  # Sum of weights = 1
        {'type': 'ineq', 'fun': lambda x: np.sum(x[esg_scores.index.isin(high_esg_stocks)]) - min_esg_weight}  # ESG constraint
    ]
    
    if target_return is not None:
        constraints.append({
            'type': 'eq',
            'fun': lambda x: np.sum(exp_returns * x) - target_return
        })
    
    bounds = tuple((0, 0.2) for asset in range(n_assets))
    
    def objective(weights):
        return np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    
    result = minimize(objective, weights, method='SLSQP',
                     bounds=bounds, constraints=constraints)
    
    return result.x if result.success else None

target_returns = np.linspace(exp_returns.min(), exp_returns.max(), 50)
efficient_portfolios = []

for target in target_returns:
    weights = optimize_portfolio(target)
    if weights is not None:
        stats = portfolio_stats(weights)
        efficient_portfolios.append({
            'Return': stats[0],
            'Risk': stats[1],
            'Sharpe': stats[2],
            'ESG Score': stats[3],
            'Weights': weights
        })

## 4. Portfolio Analysis and Visualization

Analyze the optimized portfolios and create visualizations of the efficient frontier and portfolio weights.

In [None]:
# Convert efficient portfolios to DataFrame
ef_df = pd.DataFrame([{
    'Return': p['Return'],
    'Risk': p['Risk'],
    'Sharpe': p['Sharpe'],
    'ESG Score': p['ESG Score']
} for p in efficient_portfolios])

# Plot efficient frontier
plt.figure(figsize=(12, 8))
plt.scatter(ef_df['Risk'], ef_df['Return'], c=ef_df['ESG Score'], 
           cmap='viridis', s=50)
plt.colorbar(label='ESG Score')
plt.xlabel('Portfolio Risk (Volatility)')
plt.ylabel('Expected Return')
plt.title('Efficient Frontier with ESG Scores')
plt.show()

# Find optimal portfolio (highest Sharpe ratio)
optimal_idx = ef_df['Sharpe'].idxmax()
optimal_portfolio = efficient_portfolios[optimal_idx]

# Plot optimal portfolio weights
optimal_weights = pd.Series(
    optimal_portfolio['Weights'], 
    index=exp_returns.index
).sort_values(ascending=True)

plt.figure(figsize=(15, 8))
optimal_weights.plot(kind='bar')
plt.title('Optimal Portfolio Weights')
plt.xlabel('Stocks')
plt.ylabel('Weight')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Print portfolio statistics
print("\nOptimal Portfolio Statistics:")
print(f"Expected Return: {optimal_portfolio['Return']:.4f}")
print(f"Risk: {optimal_portfolio['Risk']:.4f}")
print(f"Sharpe Ratio: {optimal_portfolio['Sharpe']:.4f}")
print(f"ESG Score: {optimal_portfolio['ESG Score']:.4f}")

# Calculate maximum drawdown
def calculate_max_drawdown(returns):
    cumulative = (1 + returns).cumprod()
    rolling_max = cumulative.expanding().max()
    drawdowns = cumulative / rolling_max - 1
    return drawdowns.min()

# Calculate portfolio returns
portfolio_returns = returns.dot(optimal_portfolio['Weights'])
max_drawdown = calculate_max_drawdown(portfolio_returns)
print(f"Maximum Drawdown: {max_drawdown:.4f}")

# Save optimal portfolio
optimal_portfolio_df = pd.DataFrame({
    'Weight': optimal_weights,
    'ESG Score': esg_scores['esg_score']
})
optimal_portfolio_df.to_csv('data/processed/optimal_portfolio.csv')