Final Python Code For Research Implementation

In [None]:
!pip install yfinance
!pip install PyPortfolioOpt
!pip install plotly
!pip install --upgrade tensorflow

Collecting PyPortfolioOpt
  Downloading pyportfolioopt-1.5.6-py3-none-any.whl.metadata (22 kB)
Collecting ecos<3.0.0,>=2.0.14 (from PyPortfolioOpt)
  Downloading ecos-2.0.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.0 kB)
Downloading pyportfolioopt-1.5.6-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ecos-2.0.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (220 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m220.1/220.1 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ecos, PyPortfolioOpt
Successfully installed PyPortfolioOpt-1.5.6 ecos-2.0.14


In [3]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, optimizers, callbacks
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA
from pypfopt import EfficientCVaR, objective_functions
from pypfopt.risk_models import CovarianceShrinkage
import plotly.graph_objects as go
from datetime import datetime, timedelta
import warnings
from sklearn.model_selection import TimeSeriesSplit
import plotly.express as px

warnings.filterwarnings("ignore", category=UserWarning)

def add_technicals(df):
    df = df.copy()
    close = df['Close'].ffill().bfill()
    high = df['High'].ffill().bfill()
    low = df['Low'].ffill().bfill()
    volume = df['Volume'].ffill().bfill()

    # Percentage changes with strict bounds
    df['Returns'] = close.pct_change().clip(-0.1, 0.1).fillna(0)

    # RSI with safe calculations
    delta = close.diff().fillna(0)
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)

    avg_gain = gain.rolling(14, min_periods=1).mean().replace(0, 1e-8)
    avg_loss = loss.rolling(14, min_periods=1).mean().replace(0, 1e-8)

    rs = (avg_gain / avg_loss).replace([np.inf, -np.inf], 1)
    df['RSI'] = 100 - (100 / (1 + rs)).clip(0, 100)

    # MACD with volatility scaling
    macd_scale = close.std() * 0.1
    df['MACD'] = (close.ewm(span=12, adjust=False).mean() - close.ewm(span=26, adjust=False).mean()).clip(-macd_scale, macd_scale)
    df['Signal'] = df['MACD'].ewm(span=9, adjust=False).mean()

    # Volatility with percentage-based ATR
    tr = pd.concat([
        high - low,
        (high - close.shift()).abs(),
        (low - close.shift()).abs()
    ], axis=1).max(axis=1)
    df['ATR'] = (tr.rolling(14).mean() / close).clip(0, 0.2)

    # Volume indicators with normalization
    df['OBV'] = (np.sign(close.diff()) * volume).fillna(0).cumsum()
    df['CMF'] = ((2*close - low - high).clip(-1, 1) * volume).rolling(20).sum() / (volume.rolling(20).sum().replace(0, 1e-8))

    # Final cleaning pipeline
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.ffill().bfill()

    # Validate no remaining NaNs
    if df.isnull().any().any():
        df = df.dropna()  # Drop any remaining NaNs

    return df[['Returns', 'RSI', 'MACD', 'Signal', 'ATR', 'OBV', 'CMF']]

class DataEngine:
    def __init__(self, tickers, start_date, end_date):
        self.tickers = tickers
        self.start = start_date
        self.end = end_date
        self.data = self._fetch_data()

    def _fetch_data(self):
        data = yf.download(self.tickers, start=self.start, end=self.end,
                         group_by='ticker', progress=False)
        clean_data = {}
        for t in self.tickers:
            df = data[t].copy()
            df = df[['Open', 'High', 'Low', 'Close', 'Volume']]
            df = add_technicals(df)
            if df.isnull().any().any():
                df = df.dropna()  # Drop any remaining NaNs
            clean_data[t] = df
        return clean_data

    def update_data(self):
        new_start = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
        new_data = yf.download(self.tickers, start=new_start, progress=False)
        for t in self.tickers:
            if t in new_data.columns.levels[0]:
                updated = pd.concat([self.data[t], new_data[t]], axis=0)
                updated = updated[~updated.index.duplicated(keep='last')]
                self.data[t] = add_technicals(updated)

class TCNModel(tf.keras.Model):
    def __init__(self, num_features, filters=64, kernel_size=3, dropout=0.2):
        super().__init__()
        self.conv1 = layers.Conv1D(filters, kernel_size, padding='causal', activation='relu')
        self.bn1 = layers.BatchNormalization()
        self.conv2 = layers.Conv1D(filters*2, kernel_size, padding='causal', activation='relu')
        self.bn2 = layers.BatchNormalization()
        self.gap = layers.GlobalAveragePooling1D()
        self.dense = layers.Dense(1, activation='tanh')
        self.dropout = layers.Dropout(dropout)

    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.gap(x)
        x = self.dropout(x)
        return self.dense(x)

class PortfolioOptimizer:
    def __init__(self, returns):
        # Convert returns to numpy for numerical stability
        returns_np = returns.to_numpy()

        # Handle extreme values and NaNs
        np.nan_to_num(returns_np, copy=False, nan=0.0, posinf=1e-5, neginf=-1e-5)

        # Remove columns with all zeros
        valid_cols = np.where(np.any(returns_np != 0, axis=0))[0]
        self.returns = pd.DataFrame(
            returns_np[:, valid_cols],
            columns=returns.columns[valid_cols],
            index=returns.index
        )

        # Add small epsilon to avoid perfect zero returns
        self.returns += np.random.uniform(-1e-8, 1e-8, size=self.returns.shape)

    def optimize_weights(self):
        if self.returns.empty or len(self.returns) < 10:
            return self.equal_weights_fallback()

        try:
            # Use empirical covariance with diagonal loading
            cov_matrix = CovarianceShrinkage(self.returns).ledoit_wolf(shrinkage_target='single_factor')

            # Regularize expected returns
            mu = self.returns.mean().clip(-0.1, 0.1)

            optimizer = EfficientCVaR(
                expected_returns=mu,
                returns=self.returns,
                beta=0.95,
                weight_bounds=(0.05, 0.4)
            )
            optimizer.add_objective(objective_functions.L2_reg, gamma=0.1)
            weights = optimizer.min_cvar()

            # Ensure numerical stability in weights
            weights = {k: max(v, 0) for k,v in weights.items()}
            total = sum(weights.values())
            return {k: round(v/total, 3) for k,v in weights.items() if v/total > 0.01}

        except Exception as e:
            print(f"Optimization failed: {str(e)}, using fallback")
            return self.equal_weights_fallback()

    def equal_weights_fallback(self):
        n = len(self.returns.columns)
        if n == 0:
            return {}
        equal_weight = 1.0 / n
        return {col: round(equal_weight, 3) for col in self.returns.columns}

class AIWealthManager:
    def __init__(self, tickers, start_date, end_date):
        self.tickers = [t for t in tickers if t != '']  # Clean empty tickers
        self.data_engine = DataEngine(tickers, start_date, end_date)
        self.scaler = RobustScaler()
        self.models = {t: TCNModel(num_features=6) for t in self.tickers}

    def _prepare_data(self, ticker, window=30, horizon=5):
        df = self.data_engine.data[ticker]
        features = df.values
        target = df['Returns'].shift(-horizon).fillna(0)

        # Scale features
        scaled = self.scaler.fit_transform(features)

        # Create sequences with validation
        X, y = [], []
        for i in range(len(scaled)-window-horizon):
            X.append(scaled[i:i+window])
            y.append(target.iloc[i+window+horizon])

        X = np.array(X)
        y = np.array(y)

        # Final validation
        if np.isnan(X).any() or np.isnan(y).any():
            X = X[~np.isnan(X).any(axis=(1,2))]
            y = y[~np.isnan(y)]

        return X, y

    def train_models(self, epochs=50):
        for ticker in self.tickers:
            X, y = self._prepare_data(ticker)
            if len(X) == 0 or len(y) == 0:
                print(f"Skipping {ticker} due to insufficient data.")
                continue

            tscv = TimeSeriesSplit(n_splits=3)

            for train_idx, val_idx in tscv.split(X):
                X_train, X_val = X[train_idx], X[val_idx]
                y_train, y_val = y[train_idx], y[val_idx]

                self.models[ticker].compile(
                    optimizer=optimizers.Adam(0.001),
                    loss='mse',
                    metrics=['mae']
                )

                self.models[ticker].fit(
                    X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=epochs,
                    batch_size=32,
                    callbacks=[callbacks.EarlyStopping(patience=3)],
                    verbose=0
                )

    def generate_signals(self):
        signals = {}
        for ticker in self.tickers:
            latest = self.data_engine.data[ticker].iloc[-30:].values
            scaled = self.scaler.transform(latest)
            if len(scaled) < 30:
                print(f"Skipping {ticker} due to insufficient data.")
                continue
            prediction = self.models[ticker].predict(scaled[np.newaxis, ...])[0][0]
            signals[ticker] = float(prediction)
        return signals

    def construct_portfolio(self):
        returns = pd.DataFrame({
            t: self.data_engine.data[t]['Returns'] for t in self.tickers
        }).dropna(how='all').fillna(0)

        if returns.empty:
            returns = pd.DataFrame(0.0001,  # Small non-zero value
                index=[pd.Timestamp.now()],
                columns=self.tickers
            )

        optimizer = PortfolioOptimizer(returns)
        return optimizer.optimize_weights()

    def visualize(self, weights):
        fig = go.Figure()
        fig.add_trace(go.Pie(
            labels=list(weights.keys()),
            values=list(weights.values()),
            hole=0.4,
            marker_colors=px.colors.qualitative.Plotly
        ))
        fig.update_layout(title='Optimized Portfolio Allocation')
        fig.show()

if __name__ == "__main__":
    # Test with liquid ETFs
    tickers = ['SPY', 'QQQ', 'GLD', 'TLT', 'VTI', 'IWM']
    start = '2015-01-01'
    end = datetime.now().strftime('%Y-%m-%d')

    manager = AIWealthManager(tickers, start, end)
    manager.data_engine.update_data()
    manager.train_models(epochs=30)

    portfolio = manager.construct_portfolio()
    print("\nOptimized Portfolio:")
    print(pd.Series(portfolio).sort_values(ascending=False))

    manager.visualize(portfolio)


YF.download() has changed argument auto_adjust default to True

Optimized Portfolio:
TLT    0.235
GLD    0.221
SPY    0.148
VTI    0.144
IWM    0.129
QQQ    0.124
dtype: float64
