## This notebook demonstrates how to programmatically extract foreign exchange (FX) time-series data from Yahoo Finance using Python.
## It includes a reusable function that supports both single and multiple FX pairs, downloads historical OHLC price data from 2020 onwards, and standardizes the output into a structured dataset for downstream analysis such as trend detection, volatility measurement, and FX risk visualization.

# FX Data Flow Overview

In [2]:
# If you don't have these installed, run this once in your terminal:
# pip install yfinance pandas

import yfinance as yf      # Used to download data from Yahoo Finance
import pandas as pd        # Used to store and work with the data in tables (DataFrames)



In [3]:
# A dictionary (like a lookup table):
# Left side is a friendly name we use (USDINR),
# Right side is Yahoo Finance's ticker symbol for that FX rate.
FX_TICKERS = {
    "USDINR": "INR=X",       # USD/INR (1 USD in INR)
    "EURINR": "EURINR=X",    # EUR/INR
    "GBPINR": "GBPINR=X",    # GBP/INR
    "JPYINR": "JPYINR=X",    # JPY/INR
    # You can add more pairs here later
}


In [4]:
def fetch_fx_timeseries(
    pairs,                  # Can be ONE pair (string) or MANY pairs (list/tuple)
    start="2020-01-01",     # Starting date for the data
    end=None,               # Ending date (None means "up to today")
    interval="1d",          # Data frequency: "1d" = daily, "1h" = hourly, etc.
    auto_adjust=False,      # If True, Yahoo may adjust close prices; for FX usually not needed
):
    """
    Downloads FX time series data from Yahoo Finance.

    pairs: "USDINR"  OR  ["USDINR", "EURINR"]
    Returns a tidy table with columns:
    date, pair, open, high, low, close, adj_close, volume
    """

    # -----------------------------
    # 1) Make sure "pairs" is always a list
    # -----------------------------

    # If the user passes a single pair like "USDINR" (a string),
    # convert it into a list: ["USDINR"]
    if isinstance(pairs, str):
        pairs = [pairs]

    # If user passes a list/tuple/set like ["USDINR", "EURINR"],
    # convert it to a normal list
    elif isinstance(pairs, (list, tuple, set)):
        pairs = list(pairs)

    # If it's something else (like a number), show an error
    else:
        raise TypeError("pairs must be a string (single pair) or a list/tuple/set of pairs.")

    # If the list is empty, that's a problem
    if not pairs:
        raise ValueError("pairs cannot be empty.")


    # -----------------------------
    # 2) Convert friendly pair names to Yahoo tickers
    # -----------------------------

    tickers = []             # This will store Yahoo tickers like ["INR=X", "EURINR=X"]
    ticker_to_pair = {}      # This maps "INR=X" back to "USDINR" for labeling

    for p in pairs:
        p = p.strip().upper()  # Clean up text: remove spaces, force uppercase

        # Check if user typed a valid pair name
        if p not in FX_TICKERS:
            raise ValueError(f"Unknown pair '{p}'. Available: {sorted(FX_TICKERS.keys())}")

        t = FX_TICKERS[p]      # Convert "USDINR" -> "INR=X"
        tickers.append(t)      # Add ticker into list
        ticker_to_pair[t] = p  # Save mapping for later


    # -----------------------------
    # 3) Download from Yahoo Finance (one request can fetch many tickers)
    # -----------------------------

    raw = yf.download(
        tickers=" ".join(tickers),  # Turn list into a string like "INR=X EURINR=X"
        start=start,
        end=end,
        interval=interval,
        group_by="ticker",          # Keeps data grouped by ticker when multiple tickers are requested
        auto_adjust=auto_adjust,
        threads=True,               # Faster download when multiple tickers
        progress=False,             # No download progress bar
    )

    # If nothing came back, raise an error
    if raw is None or raw.empty:
        raise RuntimeError("No data returned. Check tickers/dates/interval.")


    # -----------------------------
    # 4) Convert the downloaded data into a tidy table
    # -----------------------------

    frames = []   # We will store one DataFrame per ticker, then combine them

    # When we request MULTIPLE tickers, yfinance returns MultiIndex columns like:
    # (INR=X, Open), (INR=X, Close), (EURINR=X, Open), ...
    if isinstance(raw.columns, pd.MultiIndex):

        for t in tickers:
            # Some tickers might fail and not appear; skip them safely
            if t not in raw.columns.get_level_values(0):
                continue

            df = raw[t].copy()   # Take only that ticker's columns

            # Make column names lowercase: Open -> open, Adj Close -> adj_close
            df.columns = [c.lower().replace(" ", "_") for c in df.columns]

            # The date is currently the index; reset_index makes it a normal column
            df = df.reset_index()

            # Add a column called 'pair' so we know which pair this data belongs to
            df.insert(1, "pair", ticker_to_pair[t])

            frames.append(df)

    else:
        # When we request ONLY ONE ticker, yfinance returns normal columns:
        # Open, High, Low, Close...
        df = raw.copy()
        df.columns = [c.lower().replace(" ", "_") for c in df.columns]
        df = df.reset_index()
        df.insert(1, "pair", ticker_to_pair[tickers[0]])
        frames.append(df)

    # Combine all individual ticker tables into one big table
    out = pd.concat(frames, ignore_index=True)


    # -----------------------------
    # 5) Standardize the date column name
    # -----------------------------

    # Sometimes reset_index produces "Date" or "Datetime"
    # We want one consistent column name: "date"
    if "date" not in out.columns:
        if "Date" in out.columns:
            out = out.rename(columns={"Date": "date"})
        elif "Datetime" in out.columns:
            out = out.rename(columns={"Datetime": "date"})
        else:
            # If for some reason it's still different, rename the first column to date
            out = out.rename(columns={out.columns[0]: "date"})


    # -----------------------------
    # 6) Ensure all expected columns exist, even if Yahoo didn't provide them
    # -----------------------------

    expected = ["date", "pair", "open", "high", "low", "close", "adj_close", "volume"]

    for c in expected:
        if c not in out.columns:
            out[c] = pd.NA  # Fill missing columns with empty values

    # Keep only the expected columns in the exact order we want
    out = out[expected]

    # Sort rows by pair name, then by date
    out = out.sort_values(["pair", "date"]).reset_index(drop=True)

    return out



In [5]:

# -----------------------------
# Example usage (runs only when you run the file directly)
# -----------------------------
if __name__ == "__main__":

    # ✅ Example 1: Single pair
    pair = "USDINR"
    single_df = fetch_fx_timeseries(pair, start="2020-01-01")
    print("Single pair rows:", len(single_df))
    single_df.to_csv("{}_from_2020.csv".format(pair), index=False)

    # ✅ Example 2: Multiple pairs
    #multi_df = fetch_fx_timeseries(["USDINR", "EURINR", "GBPINR"], start="2020-01-01")
    #print("Multiple pairs rows:", len(multi_df))
    #multi_df.to_csv("fx_multi_from_2020.csv", index=False)

Single pair rows: 1582
