In [7]:
# Portfolio Risk Analysis Platform
# AI-driven portfolio risk and analytics platform for professional investment managers

import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
from tabulate import tabulate
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


In [16]:
class Utils:
    """
    Utility class for data fetching operations
    """
    
    @staticmethod
    def fetch_price_data(portfolio_data, start_date=None, end_date=None):
        """Fetch price data for all symbols in the portfolio"""

        default_start = (datetime.now() - pd.DateOffset(years=6)).strftime('%Y-%m-%d')
        min_purchase_date = pd.to_datetime(portfolio_data['purchase_date']).min().strftime('%Y-%m-%d')
        start_date = start_date or min(default_start, min_purchase_date)
        end_date = end_date or datetime.now().strftime('%Y-%m-%d')

        symbols = list(portfolio_data['symbol'].unique())
        
        print(f"Fetching price data from {start_date} to {end_date}")
        price_data = {}
        for symbol in symbols:
            try:
                ticker = yf.Ticker(symbol)
                hist = ticker.history(start=start_date, end=end_date)
                price_data[symbol] = hist
            except Exception as e:
                print(f"Error fetching {symbol}: {e}")
        
        return price_data

    @staticmethod
    def get_market_data(start_date=None, end_date=None):
        """Fetch market data for SPY and other relevant indices"""
        start_date = (datetime.now() - pd.DateOffset(years=6)).strftime('%Y-%m-%d')
        end_date = end_date or datetime.now().strftime('%Y-%m-%d')

        symbols = ['SPY', 'QQQ', 'VTI', 'GLD', 'BTC-USD', 'ETH-USD']
        market_data = {}
        for symbol in symbols:  
            try:
                ticker = yf.Ticker(symbol)
                hist = ticker.history(start=start_date, end=end_date)
                market_data[symbol] = hist
            except Exception as e:
                print(f"Error fetching {symbol}: {e}")
        
        return market_data
    
    @staticmethod
    def get_market_returns(start_date=None, end_date=None):
        """Fetch market data and return market returns"""
        market_data = Utils.get_market_data(start_date, end_date)
        market_returns = {}
        for symbol, prices in market_data.items():
            if 'Close' in prices and len(prices) > 0:
                returns = prices['Close'].pct_change().dropna()
                market_returns[symbol] = returns
        
        return market_returns


In [17]:
class PortfolioAnalyzer:
    """
    AI-driven portfolio risk and analytics platform for professional investment managers.
    Provides comprehensive risk analysis, performance metrics, and scenario testing.
    """
    def __init__(self):
        self.portfolio_data = None
        self.price_data = {}
        self.returns_data = {}
        self.risk_metrics = {}
        self.performance_metrics = {}
        
    def load_portfolio_from_inputs(self, inputs_path="../inputs/portfolio1/portfolio_data.csv"):
        self.portfolio_data = pd.read_csv(inputs_path)
        print(f"Loaded portfolio with {len(self.portfolio_data)} positions from {inputs_path}")
        return self.portfolio_data
    
    def calculate_returns(self):
        for symbol, prices in self.price_data.items():
            returns = prices['Close'].pct_change().dropna()
            self.returns_data[symbol] = returns
        
        # Calculate portfolio returns (weighted by market value)
        self.portfolio_returns = self._calculate_portfolio_returns()
        self.returns_data['PORTFOLIO'] = self.portfolio_returns
        
        return self.returns_data
    
    def fetch_price_data(self):
        self.price_data = Utils.fetch_price_data(portfolio_data=self.portfolio_data)
        return self.price_data

    def _calculate_portfolio_returns(self):
        
        current_values = {}
        total_value = 0
        returns_dict = {}

        for _, row in self.portfolio_data.iterrows():
            symbol = row['symbol']
            shares = row['shares']
            purchase_date = pd.to_datetime(row.get('purchase_date', None))

            # Skip if no price data
            if symbol not in self.price_data or self.price_data[symbol].empty:
                continue

            prices = self.price_data[symbol].copy()
            tz = prices.index.tz
            purchase_date = purchase_date.tz_localize(tz) if tz else purchase_date.tz_convert(None)

            # Restrict price history to after purchase date
            if purchase_date is not None:
                prices = prices[prices.index >= purchase_date]

            if prices.empty:
                continue

            # Compute returns since purchase
            returns = prices['Close'].pct_change().dropna()
            returns_dict[symbol] = returns

            # Compute current value for weighting
            current_price = prices['Close'].iloc[-1]
            current_value = shares * current_price
            current_values[symbol] = current_value
            total_value += current_value

        if total_value == 0:
            raise ValueError("No valid holdings found or price data missing.")

        # Normalize weights by total portfolio value
        weights = {sym: val / total_value for sym, val in current_values.items()}

        # Weight and combine all return series
        weighted_returns = []
        for sym, w in weights.items():
            weighted_returns.append(returns_dict[sym] * w)

        # Combine (outer join) and fill missing returns with 0 before summing
        portfolio_df = pd.concat(weighted_returns, axis=1).fillna(0)
        portfolio_returns = portfolio_df.sum(axis=1)

        return portfolio_returns


In [18]:
import numpy as np
import pandas as pd

class PerformanceMetrics:
    """
    Class for calculating comprehensive performance metrics including returns, 
    risk-adjusted returns, and market capture ratios.
    """
    
    def __init__(self, portfolio_df=None, returns_data=None, price_data=None, risk_metrics=None):
        self.returns_data = returns_data
        self.performance_metrics = pd.DataFrame()
        self.market_returns = Utils.get_market_returns()
        self.sp500_returns = self.market_returns['SPY']
    
    def calculate_performance_metrics(self):
        """
        Calculate comprehensive performance metrics and return as a DataFrame
        """
        metrics = []

        for symbol, returns in self.returns_data.items():
            if len(returns) == 0:
                continue
                
            # === Basic metrics ===
            total_return = (1 + returns).prod() - 1
            annualized_return = (1 + total_return) ** (252 / len(returns)) - 1
            volatility = returns.std() * np.sqrt(252)
            sharpe_ratio = annualized_return / volatility if volatility > 0 else np.nan

            # === Drawdown analysis ===
            cumulative_returns = (1 + returns).cumprod()
            running_max = cumulative_returns.expanding().max()
            drawdown = (cumulative_returns - running_max) / running_max
            max_drawdown = drawdown.min()

            # === Market capture ratios ===
            upside_capture = downside_capture = np.nan
            common_dates = returns.index.intersection(self.sp500_returns.index)
            if len(common_dates) > 0:
                aligned_returns = returns.loc[common_dates]
                aligned_sp500 = self.sp500_returns.loc[common_dates]
                
                # Upside capture
                sp500_up_days = aligned_sp500[aligned_sp500 > 0]
                if len(sp500_up_days) > 0:
                    portfolio_up_days = aligned_returns.loc[sp500_up_days.index]
                    upside_capture = portfolio_up_days.mean() / sp500_up_days.mean()

                # Downside capture
                sp500_down_days = aligned_sp500[aligned_sp500 < 0]
                if len(sp500_down_days) > 0:
                    portfolio_down_days = aligned_returns.loc[sp500_down_days.index]
                    downside_capture = portfolio_down_days.mean() / sp500_down_days.mean()

            metrics.append({
                'Symbol': symbol,
                'Total Return': total_return,
                'Annualized Return': annualized_return,
                'Volatility': volatility,
                'Sharpe Ratio': sharpe_ratio,
                'Max Drawdown': max_drawdown,
                'Upside Capture': upside_capture,
                'Downside Capture': downside_capture
            })

        # Convert to DataFrame
        df = pd.DataFrame(metrics).set_index('Symbol')
        self.performance_metrics = df
        return df


In [19]:
class RiskMetrics:
    """
    Calculate comprehensive multi-period risk metrics for each asset:
    Volatility, VaR, CVaR, Beta, and Correlation vs S&P 500.
    """

    def __init__(self, portfolio_df=None, returns_data=None, price_data=None):
        self.returns_data = returns_data or {}
        self.risk_metrics = pd.DataFrame()
        self.market_returns = Utils.get_market_returns()
        self.sp500_returns = self.market_returns['SPY']

        # Trailing windows (in trading days)
        self.periods = {
            '3M': 63,
            '1Y': 252,
            '3Y': 756,
            '5Y': 1260,
            'Max': None
        }

    # ---------- Basic Metrics ----------

    @staticmethod
    def calc_volatility(returns):
        """Annualized volatility"""
        return returns.std() * np.sqrt(252) if len(returns) > 1 else np.nan

    @staticmethod
    def calc_var(returns, level=0.05):
        """Historical Value at Risk (VaR)"""
        return np.percentile(returns, 100 * level) if len(returns) > 1 else np.nan

    @staticmethod
    def calc_cvar(returns, level=0.05):
        """Conditional Value at Risk (CVaR)"""
        if len(returns) < 2:
            return np.nan
        var = np.percentile(returns, 100 * level)
        tail = returns[returns <= var]
        return tail.mean() if len(tail) > 0 else np.nan

    # ---------- Helper ----------

    def _align_with_market(self, returns):
        """Align symbol returns with S&P500 dates."""
        common_dates = returns.index.intersection(self.sp500_returns.index)
        return returns.loc[common_dates], self.sp500_returns.loc[common_dates]

    # ---------- Multi-period Metrics ----------

    def calc_metrics_over_periods(self, returns):
        """
        Compute all metrics (vol, VaR, CVaR, beta, corr) over trailing windows.
        Returns a dict of dicts: {period: {metric_name: value}}
        """
        aligned_r, aligned_m = self._align_with_market(returns)
        results = {}

        for label, window in self.periods.items():
            if window and len(aligned_r) < window:
                results[label] = {m: np.nan for m in ['Vol', 'VaR95', 'VaR99', 'CVaR95', 'Beta', 'Corr']}
                continue

            # use trailing slice or full overlap for Max
            r_win = aligned_r.iloc[-window:] if window else aligned_r
            m_win = aligned_m.iloc[-window:] if window else aligned_m
            if len(r_win) < 2:
                results[label] = {m: np.nan for m in ['Vol', 'VaR95', 'VaR99', 'CVaR95', 'Beta', 'Corr']}
                continue

            # compute metrics
            vol = self.calc_volatility(r_win)
            var_95 = self.calc_var(r_win, 0.05)
            var_99 = self.calc_var(r_win, 0.01)
            cvar_95 = self.calc_cvar(r_win, 0.05)

            cov = np.cov(r_win, m_win)[0, 1]
            var_m = np.var(m_win)
            beta = cov / var_m if var_m > 0 else np.nan
            corr = np.corrcoef(r_win, m_win)[0, 1] if len(r_win) > 2 else np.nan

            results[label] = {
                'Vol': vol,
                'VaR95': var_95,
                'VaR99': var_99,
                'CVaR95': cvar_95,
                'Beta': beta,
                'Corr': corr
            }

        return results

    # ---------- Master ----------

    def calculate_risk_metrics(self):
        """Compute all multi-period risk metrics for each symbol."""
        metrics = []

        for symbol, returns in self.returns_data.items():
            if len(returns) < 30:
                continue

            all_periods = self.calc_metrics_over_periods(returns)
            row = {'Symbol': symbol}

            # Flatten metrics by appending period to column name
            for period, vals in all_periods.items():
                for k, v in vals.items():
                    name = f"{k} {period}"
                    row[name] = v

            metrics.append(row)

        df = pd.DataFrame(metrics).set_index('Symbol')
        self.risk_metrics = df
        return df


In [28]:

print("=" * 60)
print("STOCK + ETF ANALYSIS")
print("=" * 60)

base = PortfolioAnalyzer()
portfolio_df = base.load_portfolio_from_inputs()
price_data = base.fetch_price_data()
returns_data = base.calculate_returns()
market_data = Utils.get_market_data()

perf_metrics = PerformanceMetrics(portfolio_df=portfolio_df, price_data=price_data, returns_data=returns_data)
perf = perf_metrics.calculate_performance_metrics()

risk_metrics = RiskMetrics(portfolio_df=portfolio_df, price_data=price_data, returns_data=returns_data)
risk = risk_metrics.calculate_risk_metrics()



print("\n=== Portfolio Summary ===")
print("Symbols:", ", ".join(perf_metrics.returns_data.keys()))

# --- Performance Metrics ---
if isinstance(perf, dict):
    perf_df = pd.DataFrame(perf).T
else:
    perf_df = perf  # already a DataFrame
print("\n--- Performance Metrics (sample) ---")
print(tabulate(perf_df.head(), headers="keys", tablefmt="github", floatfmt=".4f"))

# --- Risk Metrics ---
if isinstance(risk, dict):
    risk_df = pd.DataFrame(risk).T
else:
    risk_df = risk  # already a DataFrame
print("\n--- Risk Metrics (sample) ---")
print(tabulate(risk_df.head(), headers="keys", tablefmt="github", floatfmt=".4f"))


print(tabulate(risk.reset_index(), headers='keys', tablefmt='github', showindex=False))



STOCK + ETF ANALYSIS
Loaded portfolio with 16 positions from ../inputs/portfolio1/portfolio_data.csv
Fetching price data from 2019-10-19 to 2025-10-19

=== Portfolio Summary ===
Symbols: AAPL, MSFT, GOOGL, SPY, QQQ, VTI, TSLA, NVDA, JPM, BAC, VEA, VWO, JNJ, PFE, KO, PG, PORTFOLIO

--- Performance Metrics (sample) ---
| Symbol   |   Total Return |   Annualized Return |   Volatility |   Sharpe Ratio |   Max Drawdown |   Upside Capture |   Downside Capture |
|----------|----------------|---------------------|--------------|----------------|----------------|------------------|--------------------|
| AAPL     |         3.3564 |              0.2792 |       0.3184 |         0.8768 |        -0.3336 |           1.2606 |             1.1743 |
| MSFT     |         2.9134 |              0.2565 |       0.2943 |         0.8716 |        -0.3715 |           1.2000 |             1.1279 |
| GOOGL    |         3.0995 |              0.2663 |       0.3207 |         0.8302 |        -0.4432 |           1.2452

In [None]:
import os

output_dir = "outputs/portfolio1"
os.makedirs(output_dir, exist_ok=True)


returns_df = pd.DataFrame(perf_metrics.returns_data)
returns_df.to_csv(os.path.join(output_dir, "returns.csv"))

perf_df.to_csv(os.path.join(output_dir, "performance.csv"))


risk_df.to_csv(os.path.join(output_dir, "risk.csv"))


if hasattr(perf_metrics, "price_data"):
    # combine all price histories into one DataFrame with MultiIndex
    price_panels = []
    for sym, df in perf_metrics.price_data.items():
        temp = df.copy()
        temp["Symbol"] = sym
        price_panels.append(temp)
    prices_df = pd.concat(price_panels)
    prices_df.to_csv(os.path.join(output_dir, "prices.csv"))
    print(f"✅ Saved prices to {output_dir}/prices.csv")
