In [1]:
pip install --upgrade yfinance==0.2.54

Collecting yfinance==0.2.54
  Downloading yfinance-0.2.54-py2.py3-none-any.whl.metadata (5.8 kB)
Downloading yfinance-0.2.54-py2.py3-none-any.whl (108 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.7/108.7 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: yfinance
  Attempting uninstall: yfinance
    Found existing installation: yfinance 0.2.52
    Uninstalling yfinance-0.2.52:
      Successfully uninstalled yfinance-0.2.52
Successfully installed yfinance-0.2.54


In [4]:
pip install arch

Collecting arch
  Downloading arch-7.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading arch-7.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (985 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m985.3/985.3 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: arch
Successfully installed arch-7.2.0


In [5]:
import yfinance as yf
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
import numpy as np
import statsmodels.api as sm  # for CAPM and Fama-French regressions
import pandas_datareader
import matplotlib.pyplot as plt
from arch import arch_model
import itertools

In [25]:
# Step 1: Define parameters
symbols = ["AAPL", "MSFT", "GOOG", "BRK-B", "JPM", "JNJ", "V", "PG", "UNH"]
start_date = '2020-01-01'
end_date = '2023-12-31'
ff_filepath = '/content/F-F_Research_Data_Factors_daily.CSV'

# Step 2: Load Fama–French 3-factor data
ff = pd.read_csv(ff_filepath, skiprows=3, header=0, names=['Date', 'Mkt-RF', 'SMB', 'HML', 'RF']) # Added names for columns and header=0
ff['Date'] = pd.to_datetime(ff['Date'], format='%Y%m%d')
ff.set_index('Date', inplace=True)
# Convert percentages to decimals (if required)
ff = ff.astype(float) / 100

# Step 3: Download stock data
data = yf.download(symbols, start=start_date, end=end_date)['Close']

# Step 4: Compute daily stock returns
stock_returns = data.pct_change().dropna()

# Step 5: Merge stock returns with Fama–French factors
merged_data = pd.merge(stock_returns, ff, left_index=True, right_index=True, how='inner')

# Step 6: Compute excess returns for each stock (stock return minus risk-free rate)
for symbol in symbols:
    merged_data[symbol + '_excess'] = merged_data[symbol] - merged_data['RF']

# Step 7: Run regressions for each stock and extract alpha
regression_results = {}
alphas = {}
for symbol in symbols:
    # Define dependent variable: excess return for the stock
    y = merged_data[symbol + '_excess']
    # Independent variables: Fama–French factors
    X = merged_data[['Mkt-RF', 'SMB', 'HML']]
    X = sm.add_constant(X)  # Adds constant term to capture alpha
    model = sm.OLS(y, X).fit()
    regression_results[symbol] = model
    alphas[symbol] = model.params['const']  # alpha is the constant term

# Step 8: Create a DataFrame to rank stocks by alpha
alpha_df = pd.DataFrame(list(alphas.items()), columns=['Stock', 'Alpha'])
alpha_df.sort_values(by='Alpha', ascending=False, inplace=True)
print("Estimated Daily Alphas:")
print(alpha_df)

# Step 9: Choose the top 6 stocks with the highest alpha
top6 = alpha_df.head(6)
print("\nSelected basket of 6 stocks based on highest alpha:")
print(top6)


[*********************100%***********************]  9 of 9 completed


Estimated Daily Alphas:
   Stock     Alpha
0   AAPL  0.000595
1   MSFT  0.000516
2   GOOG  0.000383
8    UNH  0.000352
3  BRK-B  0.000076
7     PG  0.000030
6      V -0.000042
5    JNJ -0.000048
4    JPM -0.000087

Selected basket of 6 stocks based on highest alpha:
   Stock     Alpha
0   AAPL  0.000595
1   MSFT  0.000516
2   GOOG  0.000383
8    UNH  0.000352
3  BRK-B  0.000076
7     PG  0.000030


In [24]:
# -------------------------------
# 1. Define Parameters and Symbols
# -------------------------------
symbols = ["AAPL", "MSFT", "GOOG", "BRK-B", "JPM", "JNJ", "V", "PG", "UNH"]
start_date = '2020-01-01'
end_date = '2023-12-31'
ff_filepath = '/content/F-F_Research_Data_Factors_daily.CSV'  # adjust path as needed

# -------------------------------
# 2. Load Fama–French 3-Factor Data
# -------------------------------
# The F-F CSV typically has a header/footer; we skip some rows if necessary.
# Adjust skiprows if the file's header is not standard.
ff = pd.read_csv(ff_filepath, skiprows=3, header=0, names=['Date', 'Mkt-RF', 'SMB', 'HML', 'RF']) # Added names for columns and header=0
ff['Date'] = pd.to_datetime(ff['Date'], format='%Y%m%d')
ff.set_index('Date', inplace=True)
# Convert percentages to decimals (if required)
ff = ff.astype(float) / 100

# -------------------------------
# 3. Download Stock Data and Compute Returns
# -------------------------------
data = yf.download(symbols, start=start_date, end=end_date)['Close']
stock_returns = data.pct_change().dropna()  # daily returns

# -------------------------------
# 4. Merge Stock Returns with F-F Factors
# -------------------------------
# Merge on dates common to both datasets
merged_data = pd.merge(stock_returns, ff, left_index=True, right_index=True, how='inner')

# Compute excess returns for each stock: stock return minus risk-free rate ('RF')
for symbol in symbols:
    merged_data[symbol + '_excess'] = merged_data[symbol] - merged_data['RF']

# -------------------------------
# 5. Generate All Combinations of 6 Stocks
# -------------------------------
combinations = list(itertools.combinations(symbols, 6))

# -------------------------------
# 6. For Each Combination: Build Portfolio and Run OLS Regression
# -------------------------------
results = []  # To store (combination, portfolio_alpha)

for combo in combinations:
    # Calculate equal-weighted portfolio excess returns for the combination
    # Each day, average the excess returns of the stocks in the combo.
    portfolio_excess = merged_data[[stock + '_excess' for stock in combo]].mean(axis=1)

    # Define the dependent variable as the portfolio's excess returns.
    y = portfolio_excess

    # Independent variables: Fama–French factors (columns: Mkt-RF, SMB, HML)
    X = merged_data[['Mkt-RF', 'SMB', 'HML']]
    X = sm.add_constant(X)  # adds constant term to capture alpha

    # Run the OLS regression
    model = sm.OLS(y, X).fit()

    # Extract the portfolio alpha (the constant term)
    portfolio_alpha = model.params['const']

    results.append((combo, portfolio_alpha))

# -------------------------------
# 7. Sort and Display the Results
# -------------------------------
# Convert results to a DataFrame for easier viewing
results_df = pd.DataFrame(results, columns=['Combination', 'Portfolio Alpha'])
results_df.sort_values(by='Portfolio Alpha', ascending=False, inplace=True)

print("Total combinations:", len(combinations))
print(results_df.to_string(index=False))


[*********************100%***********************]  9 of 9 completed


Total combinations: 84
                        Combination  Portfolio Alpha
 (AAPL, MSFT, GOOG, BRK-B, PG, UNH)         0.000326
  (AAPL, MSFT, GOOG, BRK-B, V, UNH)         0.000314
(AAPL, MSFT, GOOG, BRK-B, JNJ, UNH)         0.000313
(AAPL, MSFT, GOOG, BRK-B, JPM, UNH)         0.000306
     (AAPL, MSFT, GOOG, V, PG, UNH)         0.000306
   (AAPL, MSFT, GOOG, JNJ, PG, UNH)         0.000305
   (AAPL, MSFT, GOOG, JPM, PG, UNH)         0.000298
    (AAPL, MSFT, GOOG, JNJ, V, UNH)         0.000293
    (AAPL, MSFT, GOOG, JPM, V, UNH)         0.000286
  (AAPL, MSFT, GOOG, JPM, JNJ, UNH)         0.000285
   (AAPL, MSFT, GOOG, BRK-B, V, PG)         0.000260
 (AAPL, MSFT, GOOG, BRK-B, JNJ, PG)         0.000259
    (AAPL, MSFT, BRK-B, V, PG, UNH)         0.000255
  (AAPL, MSFT, BRK-B, JNJ, PG, UNH)         0.000254
 (AAPL, MSFT, GOOG, BRK-B, JPM, PG)         0.000252
  (AAPL, MSFT, BRK-B, JPM, PG, UNH)         0.000247
  (AAPL, MSFT, GOOG, BRK-B, JNJ, V)         0.000247
   (AAPL, MSFT, BRK-B, 