In [1]:
# Downloads JSE prices from Yahoo, prepares "Total Return" proxies & returns, saves files, and makes a check plot.
from pathlib import Path
from datetime import date
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt

In [16]:
# ---------- CONFIG ----------
START = "1998-01-01"     # ensures >1000 daily obs
END = None               # None = up to today
OUTDIR = Path("data")
PLOT_ASSETS = ["NPN.JO", "MTN.JO", "SOL.JO"]  # any 3 you want to show

TICKERS = {
    "ABG.JO": "Absa Group Ltd",
    "SBK.JO": "Standard Bank Group Ltd",
    "FSR.JO": "FirstRand Ltd",
    "SLM.JO": "Sanlam Ltd",
    "SOL.JO": "Sasol Ltd",
    "SHP.JO": "Shoprite Holdings Ltd",
    "WHL.JO": "Woolworths Holdings Ltd",
    "MRP.JO": "Mr Price Group Ltd",
    "BVT.JO": "Bidvest Group Ltd",
    "GFI.JO": "Gold Fields Ltd",
    "NED.JO": "Nedbank Group Ltd",
    "INL.JO": "Investec Ltd",
    "CLS.JO": "Clicks Group Ltd",
    "GRT.JO": "Growthpoint Properties Ltd",
    "MTN.JO": "MTN Group Ltd",
    "IMP.JO": "Impala Platinum Holdings Ltd",
    "NPN.JO": "Naspers Ltd (N Shares)",
    "CFR.JO": "Compagnie Financière Richemont SA",
    "HAR.JO": "Harmony Gold Mining Company Ltd",
    "APN.JO": "Aspen Pharmacare Holdings Ltd",
}

# Convenience: list of tickers in stable order (e.g., for loops / downloads)
TICKER_LIST = list(TICKERS.keys())

def main():
    OUTDIR.mkdir(parents=True, exist_ok=True)

    # 1) Download daily adjusted data (auto_adjust=True gives dividend & split adjusted OHLC)
    df_raw = yf.download(
        TICKER_LIST,
        start=START,
        end=END,
        interval="1d",
        auto_adjust=True,
        group_by="ticker",
        threads=True,
        progress=False,
    )

    # 2) Extract adjusted closing prices (with auto_adjust=True, 'Close' is adjusted)
    # df_raw has a MultiIndex of (ticker, field). We take the 'Close' slice.
    close = df_raw.xs("Close", axis=1, level=1).sort_index()
    close = close.loc[~close.index.duplicated(keep="first")]  # just in case
    close = close.dropna(how="all")  # drop rows where all tickers are NaN

    # Drop tickers that completely failed to download
    non_empty_cols = [c for c in close.columns if close[c].dropna().shape[0] > 0]
    close = close[non_empty_cols]

    # 3) Build a "Total Return" index proxy by rebasing adjusted close to 100
    tri = (close / close.iloc[0]) * 100.0

    # 4) Compute returns
    returns = close.pct_change().dropna()       # simple returns r_t = (I_t - I_{t-1}) / I_{t-1}
    logrets = np.log(close).diff().dropna()     # log returns

    # 5) Basic sanity checks
    print(f"Universe downloaded: {list(close.columns)}")
    print(f"Observations: prices={len(close):,}, returns={len(returns):,}")

    # 6) Save tidy files for the assignment
    close.to_parquet(OUTDIR / "jse_prices_adjclose.parquet")
    returns.to_parquet(OUTDIR / "jse_returns_simple.parquet")
    logrets.to_parquet(OUTDIR / "jse_returns_log.parquet")
    tri.to_parquet(OUTDIR / "jse_total_return_index_rebased100.parquet")

    close.to_csv(OUTDIR / "jse_prices_adjclose.csv", index=True)
    returns.to_csv(OUTDIR / "jse_returns_simple.csv", index=True)
    logrets.to_csv(OUTDIR / "jse_returns_log.csv", index=True)
    tri.to_csv(OUTDIR / "jse_total_return_index_rebased100.csv", index=True)

    # 7) Plot: “Total Return” proxy for three assets (satisfies Q1 plotting requirement)
    to_plot = [t for t in PLOT_ASSETS if t in tri.columns][:3]
    ax = tri[to_plot].plot(figsize=(10, 6), lw=1.7)
    ax.set_title("JSE: Total Return (Adj Close rebased to 100)")
    ax.set_ylabel("Index (100 = first date)")
    ax.set_xlabel("Date")
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    # 8) Optional: weekly/monthly variants (uncomment if you want different frequencies)
    # close_w = close.resample("W-FRI").last()
    # close_m = close.resample("M").last()
    # close_w.to_parquet(OUTDIR / "jse_prices_adjclose_weekly.parquet")
    # close_m.to_parquet(OUTDIR / "jse_prices_adjclose_monthly.parquet")

if __name__ == "__main__":
    main()

Universe downloaded: ['CFR.JO', 'GRT.JO', 'BVT.JO', 'SLM.JO', 'HAR.JO', 'MTN.JO', 'MRP.JO', 'WHL.JO', 'APN.JO', 'GFI.JO', 'SHP.JO', 'SBK.JO', 'IMP.JO', 'FSR.JO', 'ABG.JO', 'SOL.JO', 'CLS.JO', 'INL.JO', 'NED.JO', 'NPN.JO']
Observations: prices=6,583, returns=600


<Figure size 1000x600 with 1 Axes>

In [20]:
pip install --user mitosheet

Note: you may need to restart the kernel to use updated packages.
