In [5]:
# --- bootstrap: find src/quantlib ---
import sys

def add_src_to_path(pkg="quantlib", levels_up=3):
    here = Path.cwd().resolve()
    candidates = [here / "src"] + [here.parents[i] / "src" for i in range(levels_up)]
    for p in candidates:
        if (p / pkg / "__init__.py").exists():
            sys.path.insert(0, str(p)); print(f"[OK] sys.path += {p}"); return
    raise FileNotFoundError("src/quantlib not found")

add_src_to_path()


[OK] sys.path += C:\Users\quantbase\Desktop\quant\src


In [6]:
# --- 0) Imports & paths -------------------------------------------------------
from datetime import date, timedelta
import pathlib, importlib
from pathlib import Path
import pandas as pd
from pandas.tseries.offsets import BDay
from jugaad_data.nse import stock_df  # historical OHLCV to DataFrame
from quantlib.io import load_index_and_merge
from quantlib import io as qio

In [7]:
importlib.reload(qio)

<module 'quantlib.io' from 'C:\\Users\\quantbase\\Desktop\\quant\\src\\quantlib\\io.py'>

In [8]:
# --- 1) Configure root & today's run folder ----------------------------------
ROOT = Path(r"C:\Users\quantbase\Desktop\quant")
RUN_DATE = "26-09-2025"  # <Format "DD-MM-YYYY"
RUN_DIR  = ROOT / RUN_DATE


RAW = RUN_DIR / "data_raw"
INT = RUN_DIR / "data_int"
FIG = RUN_DIR / "figures"
for p in (RAW, INT, FIG):
    p.mkdir(parents=True, exist_ok=True) 

In [9]:
# --- 2) User inputs -----------------------------------------------------------
# You can pass a single ticker string or a list of tickers.
# (Keep these as NSE tickers; if you want “friendly names → tickers” we can add a map later.)
symbols = ["SBIN", "HDFCBANK", "BAJFINANCE","NEWGEN","IRCTC","AEROFLEX"]  # e.g., basket
t_days  = 252                                          # e.g., past 1Y of business days
series  = "EQ"

# past t business days → [from_date, to_date]
to_dt   = pd.Timestamp.today().normalize()
from_dt = (to_dt - BDay(t_days)).date()
to_dt_d = to_dt.date()

print(f"Fetching {len(symbols)} symbol(s) from {from_dt} to {to_dt_d} (series={series}) into {RAW}") 

Fetching 6 symbol(s) from 2024-10-11 to 2025-09-30 (series=EQ) into C:\Users\quantbase\Desktop\quant\26-09-2025\data_raw


In [10]:
# --- 3) Fetch loop: one CSV per symbol ---------------------------------------
fetched = []
errors  = []

for sym in symbols:
    try:
        df = stock_df(symbol=sym, from_date=from_dt, to_date=to_dt_d, series=series)
        # Standardize a bit to match your pipeline expectations
        # Ensure 'date' index and keep only what we need (you can keep OHLCV if you like)
        if not df.empty:
            # Normalize columns (jugaad_data already returns 'DATE','OPEN','HIGH','LOW','CLOSE','VOLUME','SERIES','SYMBOL' typically)
            # We'll ensure datetime index and sort:
            df["DATE"] = pd.to_datetime(df["DATE"])
            df = df.sort_values("DATE").set_index("DATE")
            # Save raw CSV exactly as fetched for auditability
            out_csv = RAW / f"{sym}.csv"
            df.to_csv(out_csv, index=True, date_format="%Y-%m-%d")
            fetched.append(sym)
        else:
            print(f"⚠️ Empty frame for {sym} (maybe too-new listing or no trades in range).")
    except Exception as e:
        print(f"❌ Error fetching {sym}: {e}")
        errors.append((sym, str(e)))

print(f"Done. OK: {len(fetched)}  Failed: {len(errors)}")

Done. OK: 6  Failed: 0


In [16]:


# --- 4) Build Close matrix (anchor left-join) --------------------------------
# Read back just what we saved, keep only 'CLOSE'
per_symbol = {}
for sym in fetched:
    dfi = pd.read_csv(RAW / f"{sym}.csv", parse_dates=["DATE"])
    if "CLOSE" not in dfi.columns:
        print(f"⚠️ {sym} missing CLOSE; skipping.")
        continue
    dfi = dfi[["DATE","CLOSE"]].rename(columns={"DATE":"date","CLOSE":"close"})
    dfi = dfi.dropna(subset=["date","close"]).sort_values("date")
    dfi = dfi.set_index("date")
    # force numeric 'close'
    dfi["close"] = pd.to_numeric(dfi["close"], errors="coerce")
    per_symbol[sym] = dfi

# Choose an anchor (longest history) to preserve rows; then left-join others
if not per_symbol:
    raise RuntimeError("No usable symbols fetched; nothing to build.")

# anchor = symbol with most rows
anchor_sym = max(per_symbol.items(), key=lambda kv: len(kv[1]))[0]
prices_close = per_symbol[anchor_sym][["close"]].rename(columns={"close": anchor_sym})

for sym, dfi in per_symbol.items():
    if sym == anchor_sym: 
        continue
    prices_close = prices_close.join(dfi["close"].rename(sym), how="left")

# Optional: keep only positive closes
prices_close = prices_close.where(prices_close > 0)

# --- 5) Save artifacts & quick coverage --------------------------------------
prices_close.to_csv(INT / "prices_close_anchor_leftjoin.csv")
prices_close.to_pickle(INT / "prices_close_anchor_leftjoin.pkl")


Done. OK: 6  Failed: 0


In [17]:
# Coverage summary
cov = []
for sym in prices_close.columns:
    s = prices_close[sym].dropna()
    cov.append((sym, len(s), s.index.min().date() if not s.empty else None,
                      s.index.max().date() if not s.empty else None))
coverage = pd.DataFrame(cov, columns=["symbol","rows","start","end"]).sort_values("rows", ascending=False)
coverage.to_csv(INT / "coverage.csv", index=False)

print(f"\nAnchor: {anchor_sym}")
print(f"Panel shape: {prices_close.shape} (dates x symbols)")
print(f"Saved: {INT/'prices_close_anchor_leftjoin.pkl'}, {INT/'coverage.csv'}") 


Anchor: SBIN
Panel shape: (241, 6) (dates x symbols)
Saved: C:\Users\quantbase\Desktop\quant\26-09-2025\data_int\prices_close_anchor_leftjoin.pkl, C:\Users\quantbase\Desktop\quant\26-09-2025\data_int\coverage.csv


In [18]:
# Sanity: a quick peek
display(coverage.head(10))
display(prices_close.head())

Unnamed: 0,symbol,rows,start,end
0,SBIN,241,2024-10-10,2025-09-26
1,HDFCBANK,241,2024-10-10,2025-09-26
2,BAJFINANCE,241,2024-10-10,2025-09-26
3,NEWGEN,241,2024-10-10,2025-09-26
4,IRCTC,241,2024-10-10,2025-09-26
5,AEROFLEX,241,2024-10-10,2025-09-26


Unnamed: 0_level_0,SBIN,HDFCBANK,BAJFINANCE,NEWGEN,IRCTC,AEROFLEX
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-10-10,797.1,1662.4,7319.7,1314.05,882.65,196.18
2024-10-11,799.75,1651.0,7302.0,1373.3,889.2,191.12
2024-10-14,805.15,1688.1,7208.8,1350.85,885.0,206.2
2024-10-15,804.65,1684.1,7016.9,1296.0,895.3,208.55
2024-10-16,805.45,1699.8,6956.35,1269.55,892.6,203.44


In [19]:
# ------------------ Index CSV 

In [20]:
ROOT = Path(r"C:\Users\quantbase\Desktop\quant")
RUN_DATE = "26-09-2025"  # <- set this for the run you want to analyze
RUN_DIR  = ROOT / RUN_DATE
DATA_INT = RUN_DIR / "data_int"
DATA_RAW = RUN_DIR / "data_raw"
FIG_DIR = RUN_DIR/ "figures"

In [21]:
info = load_index_and_merge(
    DATA_INT, DATA_RAW,
    prefer=["NIFTY50", "NIFTYBANK"]   # optional preference order
    # panel_candidates=["prices_close_anchor_leftjoin.pkl"],  # optional override
)

print(info)

{'panel_path': 'C:\\Users\\quantbase\\Desktop\\quant\\26-09-2025\\data_int\\prices_close_anchor_leftjoin.pkl', 'out_pkl': 'C:\\Users\\quantbase\\Desktop\\quant\\26-09-2025\\data_int\\prices_close_anchor_leftjoin_with_index.pkl', 'out_csv': 'C:\\Users\\quantbase\\Desktop\\quant\\26-09-2025\\data_int\\prices_close_anchor_leftjoin_with_index.csv', 'index_name': 'NIFTY50', 'shape': (241, 7)}


In [22]:
info = qio.load_index_and_merge(DATA_INT, DATA_RAW, prefer=["NIFTY50"])
print(info)

{'panel_path': 'C:\\Users\\quantbase\\Desktop\\quant\\26-09-2025\\data_int\\prices_close_anchor_leftjoin.pkl', 'out_pkl': 'C:\\Users\\quantbase\\Desktop\\quant\\26-09-2025\\data_int\\prices_close_anchor_leftjoin_with_index.pkl', 'out_csv': 'C:\\Users\\quantbase\\Desktop\\quant\\26-09-2025\\data_int\\prices_close_anchor_leftjoin_with_index.csv', 'index_name': 'NIFTY50', 'shape': (241, 7)}


In [23]:
panel = pd.read_pickle(info["out_pkl"])
panel.tail()

Unnamed: 0_level_0,SBIN,HDFCBANK,BAJFINANCE,NEWGEN,IRCTC,AEROFLEX,NIFTY50
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-09-22,855.25,964.2,1006.5,898.1,727.1,192.39,25202.35
2025-09-23,870.6,957.2,1025.5,884.2,722.6,192.74,25169.5
2025-09-24,866.2,951.05,1029.75,875.5,715.45,185.81,25056.9
2025-09-25,861.15,949.85,1012.75,909.45,714.9,185.49,24890.85
2025-09-26,856.95,945.05,985.1,888.6,703.1,179.81,
