# Equity Factor Models

This notebook constructs equity factor models similar to the crypto factor models:
- **SMB (Size)**: Small minus Big based on market cap
- **Value**: Based on earnings yield
- **Momentum**: 12-1 month trailing returns
- **Growth**: Based on revenue growth
- **Quality**: ROE + Gross Margin - Debt/Equity
- **Market**: Russell 1000 cap-weighted returns

In [None]:
# Imports
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from typing import Optional
import warnings
warnings.filterwarnings('ignore')

import yfinance as yf

# Local imports
from utils import FactorModel, get_equity_data


## Data Pull
Pull price data from yfinance. Fundamental data (earningsyield, pricetobook, etc.) deferred to future iteration — yfinance lacks historical structured fundamentals.

In [None]:
# Pull equity price data via yfinance
# Use a representative universe of large/mid cap tickers
print('Pulling equity price data via yfinance...')

# Russell 1000 representative tickers — expand as needed
tickers = [
    'AAPL', 'MSFT', 'AMZN', 'NVDA', 'GOOGL', 'META', 'TSLA', 'BRK-B', 'UNH', 'JNJ',
    'V', 'XOM', 'JPM', 'PG', 'MA', 'HD', 'CVX', 'MRK', 'ABBV', 'LLY',
    'PEP', 'KO', 'COST', 'AVGO', 'TMO', 'MCD', 'WMT', 'CSCO', 'ACN', 'ABT',
    'DHR', 'NEE', 'LIN', 'ADBE', 'TXN', 'PM', 'RTX', 'HON', 'UNP', 'LOW',
    'INTC', 'AMD', 'QCOM', 'COP', 'BMY', 'UPS', 'CAT', 'GE', 'BA', 'SPGI',
    'DE', 'AMAT', 'GS', 'ELV', 'ADP', 'BLK', 'SYK', 'ISRG', 'MDLZ', 'GILD',
    'LRCX', 'CB', 'REGN', 'VRTX', 'ADI', 'ZTS', 'SO', 'PGR', 'CI', 'CME',
]

# Download all at once for efficiency
raw = yf.download(tickers, start='2020-01-01', progress=True, auto_adjust=True)

# Build long-format price DataFrame
records = []
close_df = raw['Close'] if 'Close' in raw.columns.get_level_values(0) else raw
volume_df = raw['Volume'] if 'Volume' in raw.columns.get_level_values(0) else None

for ticker in tickers:
    if ticker not in close_df.columns:
        continue
    prices = close_df[ticker].dropna()
    for date, price in prices.items():
        rec = {'ticker': ticker, 'date': date, 'price': price}
        if volume_df is not None and ticker in volume_df.columns:
            rec['volume'] = volume_df.loc[date, ticker]
        records.append(rec)

price_df = pd.DataFrame(records)
price_df['date'] = pd.to_datetime(price_df['date'])
price_df['ticker'] = price_df['ticker'].str.upper()
print(f'Loaded {len(price_df):,} price records for {price_df["ticker"].nunique():,} tickers')
print(f'Date range: {price_df["date"].min()} to {price_df["date"].max()}')
price_df.head()


In [None]:
# Fundamental data (earningsyield, pricetobook, roe, etc.) NOT available from yfinance
# in a reliable historical structured format. Deferring to future iteration.
# For now, create empty fundamental_df placeholder so downstream cells can run with price-only factors.

print('Fundamental data: DEFERRED — using price-only factors')
fundamental_df = pd.DataFrame(columns=['ticker', 'date', 'metric', 'value'])


In [None]:
# Company metadata — derive from yfinance info or use static mapping
# For now, use a simple sector mapping for the tickers we downloaded
print('Building company metadata...')

metadata_records = []
for ticker in tickers:
    try:
        info = yf.Ticker(ticker).info
        metadata_records.append({
            'ticker': ticker,
            'company_name': info.get('shortName', ''),
            'sector': info.get('sector', ''),
            'exchange': info.get('exchange', ''),
        })
    except Exception:
        metadata_records.append({
            'ticker': ticker,
            'company_name': '',
            'sector': '',
            'exchange': '',
        })

metadata_df = pd.DataFrame(metadata_records)
print(f'Loaded metadata for {len(metadata_df):,} companies')
metadata_df.head()


## Data Preparation
Merge price and fundamental data, create Russell 1000 universe

In [None]:
# Resample price data to monthly (end of month)
print('Resampling price data to monthly...')

price_df = price_df.set_index('date')
price_monthly = price_df.groupby('ticker').resample('M').agg({
    'price': 'last',
    'volume': 'sum'
}).reset_index()

print(f'Monthly price data: {len(price_monthly):,} rows')

# Calculate monthly returns
price_monthly = price_monthly.sort_values(['ticker', 'date'])
price_monthly['return_1m'] = price_monthly.groupby('ticker')['price'].pct_change(1)
price_monthly['return_12m'] = price_monthly.groupby('ticker')['price'].pct_change(12)

price_monthly.head(15)

In [None]:
# Expand annual fundamental data to monthly (forward fill until next annual report)
print('Processing fundamental data...')

fundamental_df['date'] = pd.to_datetime(fundamental_df['date'])

# Get the date range we need (from price data)
min_date = price_monthly['date'].min()
max_date = price_monthly['date'].max()
all_months = pd.date_range(start=min_date, end=max_date, freq='M')

print(f'Expanding fundamentals to monthly from {min_date} to {max_date}')

# For each ticker, create monthly rows and forward fill from annual data
def expand_to_monthly(group):
    ticker = group['ticker'].iloc[0]
    # Create a dataframe with all months
    monthly_df = pd.DataFrame({'date': all_months})
    monthly_df['ticker'] = ticker
    # Merge with annual fundamental data
    monthly_df = monthly_df.merge(group.drop(columns=['ticker']), on='date', how='left')
    # Forward fill the fundamental values
    monthly_df = monthly_df.ffill()
    return monthly_df

fundamental_monthly = fundamental_df.groupby('ticker').apply(expand_to_monthly).reset_index(drop=True)
fundamental_monthly = fundamental_monthly.dropna(subset=['marketcap'])  # Drop rows before first fundamental data

print(f'Monthly fundamental data: {len(fundamental_monthly):,} rows, {fundamental_monthly["ticker"].nunique()} tickers')
print(f'Date range: {fundamental_monthly["date"].min()} to {fundamental_monthly["date"].max()}')
fundamental_monthly.head()

In [None]:
# Merge price and fundamental data
print('Merging price and fundamental data...')

df = price_monthly.merge(
    fundamental_monthly,
    on=['ticker', 'date'],
    how='inner'
)

# Add metadata
df = df.merge(metadata_df, on='ticker', how='left')

print(f'Master dataset: {len(df):,} rows, {df["ticker"].nunique():,} tickers')
print(f'Date range: {df["date"].min()} to {df["date"].max()}')
print(f'Columns: {df.columns.tolist()}')
df.head()

In [None]:
# Create Russell 1000 proxy - top 1000 stocks by market cap each month
print('Creating Russell 1000 universe...')

def get_russell1000_universe(group):
    group = group.dropna(subset=['marketcap'])
    if len(group) >= 1000:
        group = group.nlargest(1000, 'marketcap')
    group['in_russell1000'] = True
    return group

russell_universe = df.groupby('date').apply(get_russell1000_universe).reset_index(drop=True)
print(f'Russell 1000 universe: {len(russell_universe):,} rows')

# Filter to Russell 1000 only
df_russell = russell_universe.copy()
print(f'Filtered to Russell 1000: {len(df_russell):,} rows, {df_russell["ticker"].nunique():,} unique tickers')

## Utility Functions

In [None]:
def cumulative_returns(factor_returns):
    returns_df = pd.DataFrame(list(factor_returns.items()), columns=['date', 'value'])
    returns_df['cumulative_returns'] = (1 + returns_df['value']).cumprod() - 1
    return returns_df

def calculate_factor_metrics(factor_returns_dict, factor_name):
    returns_df = cumulative_returns(factor_returns_dict)
    raw_returns = pd.Series(factor_returns_dict)
    
    sharpe = (raw_returns.mean() / raw_returns.std()) * np.sqrt(12)
    downside = raw_returns[raw_returns < 0]
    sortino = (raw_returns.mean() / downside.std()) * np.sqrt(12) if len(downside) > 0 else np.nan
    
    dates = list(factor_returns_dict.keys())
    days = (dates[-1] - dates[0]).days
    years = days / 365
    cumulative = returns_df['cumulative_returns'].iloc[-1]
    annualized = ((1 + cumulative) ** (1 / years)) - 1 if cumulative > -1 else np.nan
    
    return {
        'factor': factor_name,
        'annualized_return': annualized,
        'cumulative_returns': cumulative,
        'sharpe_ratio': sharpe,
        'sortino_ratio': sortino,
        'years': years,
        'start_date': dates[0].strftime('%Y-%m-%d'),
        'end_date': dates[-1].strftime('%Y-%m-%d'),
    }, returns_df

def plot_factor_performance(returns_df, factor_name, results_dict, long_only_df=None, short_only_df=None):
    fig, ax1 = plt.subplots(figsize=(12, 6))
    ax1.plot(returns_df['date'], returns_df['cumulative_returns'], linewidth=1.5, color='tab:blue', label='Factor (L-S)')
    if long_only_df is not None:
        ax1.plot(long_only_df['date'], long_only_df['cumulative_returns'], linewidth=1.5, color='tab:green', label='Long Only')
    if short_only_df is not None:
        ax1.plot(short_only_df['date'], short_only_df['cumulative_returns'], linewidth=1.5, color='tab:red', label='Short Only')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Cumulative Returns')
    ax1.legend(loc='upper left')
    ax1.grid(True, alpha=0.3)
    ax1.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: '{:.0%}'.format(x)))
    plt.title(f'{factor_name} Factor Performance')
    metrics_text = f"Ann. Return: {results_dict['annualized_return']:.1%}\nSharpe: {results_dict['sharpe_ratio']:.2f}\nPeriod: {results_dict['start_date']} to {results_dict['end_date']}"
    ax1.text(0.02, 0.98, metrics_text, transform=ax1.transAxes, fontsize=9, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    plt.tight_layout()
    plt.show()

print('Utility functions loaded')

## SMB Factor (Size)
Small minus Big - long small caps, short large caps

In [None]:
print('Building SMB Factor...')
BREAKPOINT = 0.3
MIN_STOCKS = 10  # Reduced from 50 since we only have ~71 stocks with fundamental data

smb_factor_returns, smb_long_returns, smb_short_returns = {}, {}, {}

for date, group in df_russell.groupby('date'):
    group = group.dropna(subset=['marketcap', 'return_1m'])
    if len(group) < MIN_STOCKS:
        continue
    n_select = max(int(len(group) * BREAKPOINT), 5)
    small_caps = group.nsmallest(n_select, 'marketcap')
    big_caps = group.nlargest(n_select, 'marketcap')
    small_return = small_caps['return_1m'].mean()
    big_return = big_caps['return_1m'].mean()
    smb_factor_returns[date] = small_return - big_return
    smb_long_returns[date] = small_return
    smb_short_returns[date] = big_return

print(f"Generated {len(smb_factor_returns)} periods of SMB returns")
smb_results, smb_returns_df = calculate_factor_metrics(smb_factor_returns, 'SMB')
smb_long_df, smb_short_df = cumulative_returns(smb_long_returns), cumulative_returns(smb_short_returns)
print(f"SMB: Ann. Return: {smb_results['annualized_return']:.1%}, Sharpe: {smb_results['sharpe_ratio']:.2f}")
plot_factor_performance(smb_returns_df, 'SMB (Size)', smb_results, smb_long_df, smb_short_df)

## Value Factor
Long high earnings yield (cheap), short low earnings yield (expensive)

In [None]:
print('Building Value Factor...')
BREAKPOINT = 0.3
MIN_STOCKS = 10

value_factor_returns, value_long_returns, value_short_returns = {}, {}, {}

for date, group in df_russell.groupby('date'):
    group = group.dropna(subset=['earningsyield', 'return_1m'])
    group = group[(group['earningsyield'] > -1) & (group['earningsyield'] < 1)]
    if len(group) < MIN_STOCKS:
        continue
    n_select = max(int(len(group) * BREAKPOINT), 5)
    value_stocks = group.nlargest(n_select, 'earningsyield')
    growth_stocks = group.nsmallest(n_select, 'earningsyield')
    value_return = value_stocks['return_1m'].mean()
    growth_return = growth_stocks['return_1m'].mean()
    value_factor_returns[date] = value_return - growth_return
    value_long_returns[date] = value_return
    value_short_returns[date] = growth_return

print(f"Generated {len(value_factor_returns)} periods of Value returns")
value_results, value_returns_df = calculate_factor_metrics(value_factor_returns, 'Value')
value_long_df, value_short_df = cumulative_returns(value_long_returns), cumulative_returns(value_short_returns)
print(f"Value: Ann. Return: {value_results['annualized_return']:.1%}, Sharpe: {value_results['sharpe_ratio']:.2f}")
plot_factor_performance(value_returns_df, 'Value (Earnings Yield)', value_results, value_long_df, value_short_df)

## Momentum Factor
12-1 month momentum - long winners, short losers

In [None]:
print('Building Momentum Factor...')
MIN_STOCKS = 10

df_russell = df_russell.sort_values(['ticker', 'date'])
df_russell['price_lag1'] = df_russell.groupby('ticker')['price'].shift(1)
df_russell['price_lag12'] = df_russell.groupby('ticker')['price'].shift(12)
df_russell['momentum_12_1'] = (df_russell['price_lag1'] / df_russell['price_lag12']) - 1

BREAKPOINT = 0.3
momentum_factor_returns, momentum_long_returns, momentum_short_returns = {}, {}, {}

for date, group in df_russell.groupby('date'):
    group = group.dropna(subset=['momentum_12_1', 'return_1m'])
    if len(group) < MIN_STOCKS:
        continue
    n_select = max(int(len(group) * BREAKPOINT), 5)
    winners = group.nlargest(n_select, 'momentum_12_1')
    losers = group.nsmallest(n_select, 'momentum_12_1')
    winner_return = winners['return_1m'].mean()
    loser_return = losers['return_1m'].mean()
    momentum_factor_returns[date] = winner_return - loser_return
    momentum_long_returns[date] = winner_return
    momentum_short_returns[date] = loser_return

print(f"Generated {len(momentum_factor_returns)} periods of Momentum returns")
momentum_results, momentum_returns_df = calculate_factor_metrics(momentum_factor_returns, 'Momentum')
momentum_long_df, momentum_short_df = cumulative_returns(momentum_long_returns), cumulative_returns(momentum_short_returns)
print(f"Momentum: Ann. Return: {momentum_results['annualized_return']:.1%}, Sharpe: {momentum_results['sharpe_ratio']:.2f}")
plot_factor_performance(momentum_returns_df, 'Momentum (12-1)', momentum_results, momentum_long_df, momentum_short_df)

## Growth Factor
Long high revenue growth, short low revenue growth

In [None]:
print('Building Growth Factor...')
BREAKPOINT = 0.3
MIN_STOCKS = 10

growth_factor_returns, growth_long_returns, growth_short_returns = {}, {}, {}

for date, group in df_russell.groupby('date'):
    group = group.dropna(subset=['revenuegrowth', 'return_1m'])
    group = group[(group['revenuegrowth'] > -1) & (group['revenuegrowth'] < 5)]
    if len(group) < MIN_STOCKS:
        continue
    n_select = max(int(len(group) * BREAKPOINT), 5)
    high_growth = group.nlargest(n_select, 'revenuegrowth')
    low_growth = group.nsmallest(n_select, 'revenuegrowth')
    high_growth_return = high_growth['return_1m'].mean()
    low_growth_return = low_growth['return_1m'].mean()
    growth_factor_returns[date] = high_growth_return - low_growth_return
    growth_long_returns[date] = high_growth_return
    growth_short_returns[date] = low_growth_return

print(f"Generated {len(growth_factor_returns)} periods of Growth returns")
growth_results, growth_returns_df = calculate_factor_metrics(growth_factor_returns, 'Growth')
growth_long_df, growth_short_df = cumulative_returns(growth_long_returns), cumulative_returns(growth_short_returns)
print(f"Growth: Ann. Return: {growth_results['annualized_return']:.1%}, Sharpe: {growth_results['sharpe_ratio']:.2f}")
plot_factor_performance(growth_returns_df, 'Growth (Revenue)', growth_results, growth_long_df, growth_short_df)

## Quality Factor
Composite: z(ROE) + z(Gross Margin) - z(Debt/Equity)

In [None]:
print('Building Quality Factor...')
MIN_STOCKS = 10

def calculate_quality_score(group):
    group = group.copy()
    for col in ['roe', 'grossmargin', 'debttoequity']:
        if col in group.columns:
            mean, std = group[col].mean(), group[col].std()
            group[f'{col}_z'] = (group[col] - mean) / std if std > 0 else 0
    group['quality_score'] = group.get('roe_z', 0) + group.get('grossmargin_z', 0) - group.get('debttoequity_z', 0)
    return group

df_russell = df_russell.groupby('date').apply(calculate_quality_score).reset_index(drop=True)

BREAKPOINT = 0.3
quality_factor_returns, quality_long_returns, quality_short_returns = {}, {}, {}

for date, group in df_russell.groupby('date'):
    group = group.dropna(subset=['quality_score', 'return_1m'])
    if len(group) < MIN_STOCKS:
        continue
    n_select = max(int(len(group) * BREAKPOINT), 5)
    high_quality = group.nlargest(n_select, 'quality_score')
    low_quality = group.nsmallest(n_select, 'quality_score')
    high_quality_return = high_quality['return_1m'].mean()
    low_quality_return = low_quality['return_1m'].mean()
    quality_factor_returns[date] = high_quality_return - low_quality_return
    quality_long_returns[date] = high_quality_return
    quality_short_returns[date] = low_quality_return

print(f"Generated {len(quality_factor_returns)} periods of Quality returns")
quality_results, quality_returns_df = calculate_factor_metrics(quality_factor_returns, 'Quality')
quality_long_df, quality_short_df = cumulative_returns(quality_long_returns), cumulative_returns(quality_short_returns)
print(f"Quality: Ann. Return: {quality_results['annualized_return']:.1%}, Sharpe: {quality_results['sharpe_ratio']:.2f}")
plot_factor_performance(quality_returns_df, 'Quality (ROE+GM-D/E)', quality_results, quality_long_df, quality_short_df)

## Market Factor
Russell 1000 cap-weighted returns

In [None]:
print('Building Market Factor...')
MIN_STOCKS = 10

market_factor_returns = {}

for date, group in df_russell.groupby('date'):
    group = group.dropna(subset=['marketcap', 'return_1m'])
    if len(group) < MIN_STOCKS:
        continue
    total_mcap = group['marketcap'].sum()
    weighted_return = (group['marketcap'] * group['return_1m']).sum() / total_mcap
    market_factor_returns[date] = weighted_return

print(f"Generated {len(market_factor_returns)} periods of Market returns")
market_results, market_returns_df = calculate_factor_metrics(market_factor_returns, 'Market')
print(f"Market: Ann. Return: {market_results['annualized_return']:.1%}, Sharpe: {market_results['sharpe_ratio']:.2f}")
plot_factor_performance(market_returns_df, 'Market (Cap-Weighted)', market_results)

## Factor Summary & Correlation

In [None]:
print('=' * 60)
print('EQUITY FACTOR SUMMARY')
print('=' * 60)

summary_df = pd.DataFrame({
    'Factor': ['SMB', 'Value', 'Momentum', 'Growth', 'Quality', 'Market'],
    'Ann. Return': [f"{r['annualized_return']:.1%}" for r in [smb_results, value_results, momentum_results, growth_results, quality_results, market_results]],
    'Sharpe': [f"{r['sharpe_ratio']:.2f}" for r in [smb_results, value_results, momentum_results, growth_results, quality_results, market_results]],
    'Cum. Return': [f"{r['cumulative_returns']:.1%}" for r in [smb_results, value_results, momentum_results, growth_results, quality_results, market_results]],
})
print(summary_df.to_string(index=False))

In [None]:
# Factor correlation matrix
factor_returns_dict = {
    'SMB': smb_factor_returns, 'Value': value_factor_returns, 'Momentum': momentum_factor_returns,
    'Growth': growth_factor_returns, 'Quality': quality_factor_returns, 'Market': market_factor_returns,
}

factor_dfs = [pd.DataFrame(list(r.items()), columns=['date', n]).set_index('date') for n, r in factor_returns_dict.items()]
all_factors_df = pd.concat(factor_dfs, axis=1).dropna()

print('Factor Correlation Matrix:')
print(all_factors_df.corr().round(2))

# Plot comparison
fig, ax = plt.subplots(figsize=(14, 8))
colors = {'SMB': 'tab:blue', 'Value': 'tab:orange', 'Momentum': 'tab:green', 'Growth': 'tab:red', 'Quality': 'tab:purple', 'Market': 'tab:gray'}
for col in all_factors_df.columns:
    cum_ret = (1 + all_factors_df[col]).cumprod() - 1
    ax.plot(cum_ret.index, cum_ret.values, linewidth=1.5, color=colors[col], label=col)
ax.legend(loc='upper left')
ax.grid(True, alpha=0.3)
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: '{:.0%}'.format(x)))
plt.title('Equity Factor Cumulative Returns')
plt.tight_layout()
plt.show()

In [None]:
# Export
all_factors_df.to_csv('equity_factor_returns.csv')
print('Factor returns saved to equity_factor_returns.csv')

equity_models = {
    'smb': {'returns': smb_factor_returns, 'results': smb_results},
    'value': {'returns': value_factor_returns, 'results': value_results},
    'momentum': {'returns': momentum_factor_returns, 'results': momentum_results},
    'growth': {'returns': growth_factor_returns, 'results': growth_results},
    'quality': {'returns': quality_factor_returns, 'results': quality_results},
    'market': {'returns': market_factor_returns, 'results': market_results},
}
print('Equity factor models ready!')

## Portfolio Factor Exposure Analysis
Measure how much exposure a portfolio has to each factor by regressing portfolio returns against factor returns.

In [None]:
import statsmodels.api as sm

def analyze_portfolio_factor_exposure(portfolio_returns, factor_returns_dict, portfolio_name="Portfolio"):
    """
    Regress portfolio returns against factor returns to measure factor exposures (betas).
    
    Args:
        portfolio_returns: dict of {date: return} for the portfolio
        factor_returns_dict: dict of {factor_name: {date: return}}
        portfolio_name: name for display
    
    Returns:
        DataFrame with factor betas and model object
    """
    # Convert to DataFrames
    portfolio_df = pd.DataFrame(list(portfolio_returns.items()), columns=['date', 'portfolio']).set_index('date')
    
    factor_dfs = []
    for name, returns in factor_returns_dict.items():
        df = pd.DataFrame(list(returns.items()), columns=['date', name]).set_index('date')
        factor_dfs.append(df)
    
    factors_df = pd.concat(factor_dfs, axis=1)
    
    # Merge and align dates
    merged = portfolio_df.merge(factors_df, left_index=True, right_index=True, how='inner')
    
    # Drop rows with NaN or inf values
    merged = merged.replace([np.inf, -np.inf], np.nan).dropna()
    
    if len(merged) < 10:
        print(f"Warning: Only {len(merged)} overlapping periods after removing NaN/inf")
        return None, None
    
    # Run regression: portfolio = alpha + beta1*factor1 + beta2*factor2 + ...
    y = merged['portfolio']
    X = merged.drop(columns=['portfolio'])
    X = sm.add_constant(X)
    
    model = sm.OLS(y, X).fit()
    
    # Build results table
    results = []
    for i, name in enumerate(['Alpha'] + list(factor_returns_dict.keys())):
        results.append({
            'Factor': name,
            'Beta': model.params.iloc[i],
            't-stat': model.tvalues.iloc[i],
            'p-value': model.pvalues.iloc[i]
        })
    
    results_df = pd.DataFrame(results)
    
    # Print results
    print(f"\n{'='*60}")
    print(f"FACTOR EXPOSURE ANALYSIS: {portfolio_name}")
    print(f"{'='*60}")
    print(f"Periods: {len(merged)} months")
    print(f"R-squared: {model.rsquared:.3f}")
    print(f"Adj R-squared: {model.rsquared_adj:.3f}")
    print(f"\nFactor Loadings (Betas):")
    print("-" * 50)
    for _, row in results_df.iterrows():
        sig = "***" if row['p-value'] < 0.01 else "**" if row['p-value'] < 0.05 else "*" if row['p-value'] < 0.1 else ""
        print(f"  {row['Factor']:12s}: {row['Beta']:+.4f}  (t={row['t-stat']:+.2f}) {sig}")
    
    print("-" * 50)
    print("Significance: *** p<0.01, ** p<0.05, * p<0.1")
    print("\nInterpretation:")
    print("  Beta > 0: Portfolio has positive exposure to this factor")
    print("  Beta = 1: Portfolio moves 1:1 with the factor")
    print("  Beta < 0: Portfolio moves opposite to the factor")
    
    return results_df, model

# Create the factor returns dict for analysis
equity_factor_returns = {
    'SMB': smb_factor_returns,
    'Value': value_factor_returns,
    'Momentum': momentum_factor_returns,
    'Growth': growth_factor_returns,
    'Quality': quality_factor_returns,
    'Market': market_factor_returns,
}

print("Factor exposure analysis function ready!")

In [None]:
# Example: Analyze a sample portfolio's factor exposures
# Replace 'my_portfolio_returns' with your portfolio's monthly returns as {date: return}

# Example 1: Analyze the Quality long-only portfolio against all factors
quality_long_exposure, quality_long_model = analyze_portfolio_factor_exposure(
    portfolio_returns=quality_long_returns,
    factor_returns_dict=equity_factor_returns,
    portfolio_name="Quality Long-Only"
)

In [None]:
# Example 2: Analyze a custom portfolio
# To use with your own portfolio, create a dict of {date: monthly_return}
# 
# my_portfolio_returns = {
#     pd.Timestamp('2023-01-31'): 0.05,   # 5% return in Jan 2023
#     pd.Timestamp('2023-02-28'): -0.02,  # -2% return in Feb 2023
#     # ... more months
# }
# 
# exposure_df, model = analyze_portfolio_factor_exposure(
#     portfolio_returns=my_portfolio_returns,
#     factor_returns_dict=equity_factor_returns,
#     portfolio_name="My Portfolio"
# )

# Example: Analyze the Value long-only portfolio
value_long_exposure, value_long_model = analyze_portfolio_factor_exposure(
    portfolio_returns=value_long_returns,
    factor_returns_dict=equity_factor_returns,
    portfolio_name="Value Long-Only"
)