In [1]:
# Downloads JSE prices from Yahoo, prepares "Total Return" proxies & returns, saves files, and makes a check plot.
from pathlib import Path
from datetime import date
import itertools
import numpy as np
import pandas as pd
import yfinance as yf

In [3]:
# Date range constants for data retrieval
START_DATE = "2020-01-01"  # Start date ensuring >1000 daily observations
END_DATE = "2025-09-30"    # End date for data collection
K = 3                       # number of factors (keep = 3 for the assignment)
GAMMA = 0.25                # shrinkage weight, must satisfy 0 < GAMMA < 0.5
USE_LOG_RETURNS = True      # True: log returns; False: simple returns

# Dictionary mapping stock tickers to company names
TICKERS = {
    "AAPL": "Apple Inc.",
    "MSFT": "Microsoft Corporation",
    "GOOGL": "Alphabet Inc. (Class A)",
    "AMZN": "Amazon.com, Inc.",
    "META": "Meta Platforms, Inc.",
    "TSLA": "Tesla, Inc.",
    "BRK-B": "Berkshire Hathaway Inc. (Class B)",
    "NVDA": "NVIDIA Corporation",
    "JPM": "JPMorgan Chase & Co.",
    "JNJ": "Johnson & Johnson"
}

# Extract list of ticker symbols for data retrieval
TICKER_LIST = list(TICKERS.keys())

def download_stock_data(tickers: list, start_date: str, end_date: str) -> pd.DataFrame:
    """
    Download daily adjusted stock data for the specified tickers.
    
    Args:
        tickers: List of stock ticker symbols
        start_date: Start date in 'YYYY-MM-DD' format
        end_date: End date in 'YYYY-MM-DD' format
        
    Returns:
        DataFrame containing the downloaded stock data
    """
    return yf.download(tickers,start=start_date,end=end_date,interval="1d",
                       auto_adjust=True,  # Provides dividend & split adjusted OHLC
                       group_by="ticker",threads=True,progress=False
                      )

def extract_adjusted_closing_prices(stock_data: pd.DataFrame) -> pd.DataFrame:
    """
    Extract and clean adjusted closing prices from stock data.
    
    Args:
        stock_data: MultiIndex DataFrame with (ticker, field) structure
        
    Returns:
        DataFrame with adjusted closing prices for all tickers
    """
    # Extract 'Close' slice from MultiIndex
    close_prices = stock_data.xs("Close", axis=1, level=1).sort_index()
    
    # Remove any duplicate indices (keeping first occurrence)
    close_prices = close_prices.loc[~close_prices.index.duplicated(keep="first")]
    
    # Remove rows where all tickers have NaN values
    close_prices = close_prices.dropna(how="all")

    # Compute returns
    returns = close_prices.pct_change().dropna()       # simple returns r_t = (I_t - I_{t-1}) / I_{t-1}
    logrets = np.log(close_prices).diff().dropna()     # log returns
    
    return logrets

In [4]:
# Download stock data
USA_Data = download_stock_data(TICKER_LIST, START_DATE, END_DATE)

# Extract adjusted closing prices returns
USA_Data_Close = extract_adjusted_closing_prices(USA_Data)
USA_Data_Close = USA_Data_Close.reset_index()
USA_Data_Close.head()

Ticker,Date,GOOGL,AMZN,JPM,NVDA,TSLA,BRK-B,JNJ,MSFT,AAPL,META
0,2020-01-03,-0.005245,-0.012213,-0.013284,-0.016135,0.029203,-0.009724,-0.011645,-0.01253,-0.00977,-0.005305
1,2020-01-06,0.026305,0.014776,-0.000796,0.004185,0.019072,0.003575,-0.001248,0.002582,0.007937,0.018658
2,2020-01-07,-0.001934,0.002089,-0.017147,0.012034,0.038067,-0.004725,0.006088,-0.00916,-0.004714,0.002161
3,2020-01-08,0.007092,-0.007839,0.007771,0.001874,0.048033,0.00031,-0.000138,0.015803,0.015958,0.010087
4,2020-01-09,0.010443,0.004788,0.003645,0.010923,-0.022189,0.011702,0.002962,0.012416,0.021019,0.014209


In [5]:
USA_Data_Close.to_excel('USAStocksReturns.xlsx',index=False)