In [2]:
!pip install --quiet yfinance beautifulsoup4 requests lxml

In [4]:
from pathlib import Path
RAW_DIR = Path("data/raw")
RAW_DIR.mkdir(parents=True, exist_ok=True)
print("✓ Folder created at", RAW_DIR.resolve())


✓ Folder created at C:\Users\eesha\Downloads\Core Data Set BE\data\raw


In [26]:
# --- imports & folder ---
from pathlib import Path
import io, re, requests, pandas as pd, yfinance as yf
from bs4 import BeautifulSoup

RAW_DIR = Path("data/raw")
RAW_DIR.mkdir(parents=True, exist_ok=True)
print("✓ Folder ready at", RAW_DIR.resolve())

# ---------- 1.  helpers ----------
def fetch_boedata(code):
    url = (
        "https://www.bankofengland.co.uk/boeapps/database/Rates"
        f"?SeriesCodes={code}&Filter=N&CSVF=TN"
    )
    df = pd.read_csv(io.BytesIO(requests.get(url, timeout=15).content))
    df.columns = ["date", code]
    df["date"] = pd.to_datetime(df["date"])
    return df

def fetch_ons(code, dataset):
    """ONS Beta v1 endpoint that returns observations directly."""
    url = (
        f"https://api.beta.ons.gov.uk/v1/timeseries/{code}/observations"
        f"?datasetId={dataset}&geography=K02000001"
    )
    js = requests.get(url, timeout=15).json()
    obs = [(o["date"], o["observation"]) for o in js["observations"]]
    df  = pd.DataFrame(obs, columns=["date", code])
    df["date"] = pd.to_datetime(df["date"].str.replace(" ", "-") + "-01")
    df[code]   = pd.to_numeric(df[code], errors="coerce")
    return df

# ---------- 2.  series mapping ----------
series_map = {
    # ONS   (code, datasetId)
    ("BKTL", "qna"):  "gdp_yoy",             # GDP YoY %
    ("L55O", "mm23"): "cpi_yoy",             # CPI YoY %
    ("MGSX", "lms"):  "unemployment_pct",    # Unemployment %
    ("LNMQ", "lms"):  "weekly_earnings_yoy", # Earnings YoY %
    # BoE
    ("IUDMNPY", "boe"): "ois_7d",            # 7‑day OIS
    ("IUDERB3A","boe"): "bank_rate",         # Official Bank Rate
}

# ---------- 3.  download loop ----------
for (code, src), nice in series_map.items():
    df = fetch_boedata(code) if src == "boe" else fetch_ons(code, src)
    df.rename(columns={code: nice}, inplace=True)
    df.to_csv(RAW_DIR / f"{nice}.csv", index=False)
    print(f"✓ {nice}.csv saved ({len(df)} rows)")

# ---------- 4.  FTSE All‑Share ----------
ftse = yf.download("^FTAS", start="2000-01-01", interval="1mo", group_by="ticker")
ftse_close = ftse["Close"].reset_index()
ftse_close.rename(columns={"Date": "date", "^FTAS": "ftse_allshare"}, inplace=True)
ftse_close.to_csv(RAW_DIR / "ftse_allshare.csv", index=False)
print("✓ ftse_allshare.csv saved")

# ---------- 5.  scrape MPR / FSR ----------
def _scrape(url, label):
    html = requests.get(url, timeout=15).text
    soup = BeautifulSoup(html, "html.parser")
    patt = re.compile(rf"{label}.*?-\s+([A-Za-z]+)\s+(\d{{4}})", re.I)
    dates = {
        pd.to_datetime(" ".join(m.groups()))
        for a in soup.find_all("a") if (m := patt.search(a.get_text(strip=True)))
    }
    return pd.DataFrame({"date": sorted(dates), "event": label[:3].upper()})

fsr = _scrape(
    "https://www.bankofengland.co.uk/sitemap/financial-stability-report",
    "Financial Stability Report",
)
mpr = _scrape(
    "https://www.bankofengland.co.uk/sitemap/monetary-policy-report",
    "Monetary Policy Report",
)

calendar = (
    pd.concat([fsr, mpr])
    .sort_values("date")
    .reset_index(drop=True)
)
calendar.to_csv(RAW_DIR / "mpr_fsr_calendar.csv", index=False)
print("✓ mpr_fsr_calendar.csv saved with", len(calendar), "rows")

# ---------- 6.  final check ----------
print("\nFiles inside data/raw/:")
for p in RAW_DIR.glob("*.csv"):
    print("  •", p.name)


✓ Folder ready at C:\Users\eesha\Downloads\Core Data Set BE\data\raw


TypeError: string indices must be integers, not 'str'

In [28]:
print(requests.get("https://api.beta.ons.gov.uk/v1/timeseries/BKTL/observations?datasetId=qna").text[:400])


"No API is defined for GET /timeseries/BKTL/observations"
