In [1]:
# Backend logic for BSE raw JSON dump, importable in a notebook
import json
import random
import time
from datetime import datetime, timedelta
from pathlib import Path

import requests
from dateutil.parser import parse as dtparse

# HTTP headers and API endpoint template
HEADERS = {
    "User-Agent": "Mozilla/5.0",
    "Origin": "https://www.bseindia.com",
    "Referer": "https://www.bseindia.com/",
}
URL = (
    "https://api.bseindia.com/BseIndiaAPI/api/AnnSubCategoryGetData/w"
    "?pageno={page}&strCat=-1&strPrevDate={from_}&strScrip={scrip}"
    "&strSearch=P&strToDate={to_}&strType=C&subcategory=-1"
)


def year_chunks(start_date, end_date):
    """
    Yield (start, end) pairs in ~1-year chunks between two dates.
    """
    cur = start_date
    one_year = timedelta(days=365)
    while cur <= end_date:
        nxt = min(cur + one_year - timedelta(days=1), end_date)
        yield cur, nxt
        cur = nxt + timedelta(days=1)


def fetch_chunk(session, scrip, d_from, d_to):
    """
    Retrieve all pages of announcements for a given scrip code
    between d_from and d_to (inclusive).
    Returns a list of raw JSON payloads.
    """
    data, page = [], 1
    while True:
        url = URL.format(
            page=page,
            from_=d_from.strftime("%Y%m%d"),
            to_=d_to.strftime("%Y%m%d"),
            scrip=scrip,
        )
        response = session.get(url, headers=HEADERS, timeout=20)
        response.raise_for_status()
        payload = response.json()
        if not payload.get("Table"):
            break
        data.append(payload)
        page += 1
    return data

def dump_raw_payloads(payloads, outfile):
    """
    Append newline-delimited JSON payloads to the given file path,
    each pretty-printed with indentation.
    """
    with outfile.open("a", encoding="utf-8") as f:
        for p in payloads:
            json.dump(p, f, ensure_ascii=False, indent=2)
            f.write("\n")


def run(map_str, start_str, end_str=None, output_dir_name="output"):
    """
    Programmatic entrypoint:
      - map_str: comma-separated "ISIN=scripCode" pairs
      - start_str: YYYY-MM-DD
      - end_str: YYYY-MM-DD (defaults to today if None)
      - output_dir_name: directory to write JSON files into
    Returns a list of log strings.
    """
    # Parse dates
    start_date = dtparse(start_str).date()
    end_date = dtparse(end_str).date() if end_str else datetime.now().date()
    if start_date > end_date:
        raise ValueError("start date must be <= end date")

    # Parse the mapping string
    mapping = {}
    for pair in map_str.split(","):
        if "=" not in pair:
            raise ValueError(f"Invalid mapping entry: '{pair}'")
        isin, scrip = pair.split("=", 1)
        mapping[isin.strip()] = scrip.strip()

    # Prepare output directory and HTTP session
    output_dir = Path(output_dir_name)
    output_dir.mkdir(exist_ok=True)
    session = requests.Session()
    logs = []

    # Fetch and dump data in year-long chunks per ISIN
    for isin, scrip in mapping.items():
        outfile = output_dir / f"{isin}.json"
        # Overwrite old file if exists
        if outfile.exists():
            outfile.unlink()
        for chunk_start, chunk_end in year_chunks(start_date, end_date):
            payloads = fetch_chunk(session, scrip, chunk_start, chunk_end)
            dump_raw_payloads(payloads, outfile)
            entry = (
                f"{isin} ({scrip}) {chunk_start:%Y-%m-%d}–{chunk_end:%Y-%m-%d} "
                f"→ {outfile.name}: {len(payloads)} pages"
            )
            print(entry)
            logs.append(entry)
            time.sleep(random.uniform(1, 14))

    print(f"Finished; raw dumps are in ./{output_dir_name}/")
    return logs

In [2]:
import requests
import re
import random
from time import sleep


def fetch_bse_code(isins):
    url = f"https://api.bseindia.com/BseIndiaAPI/api/PeerSmartSearch/w?Type=SS&text={isin}"
    headers = {
        "Accept": "application/json, text/plain, */*",
        "Referer": "https://www.bseindia.com/",
        "User-Agent": "Mozilla/5.0"
    }
    resp = requests.get(url, headers=headers, timeout=5)
    resp.raise_for_status()
    m = re.search(r"liclick\('(\d{5,6})'", resp.text)
    return m.group(1) if m else None



In [15]:
nifty50_isins = [
    "INE361B01024"
]

In [16]:
mapping = {}
for isin in nifty50_isins:
    code = fetch_bse_code(isin)
    mapping[isin] = code
    print(f"{isin} → {code}")
    sleep(random.randint(1, 3))

# now build your list of "{isin}={scrip}" strings


INE361B01024 → 532488


In [17]:
mapping_list = [f"{isin}={scrip}" for isin, scrip in mapping.items()]
mapping_str = ",".join(mapping_list)

In [18]:

start_str = "2014-01-01"
end_str   = "2025-01-01"  # or None to default to today


In [19]:
print(mapping_str)

INE361B01024=532488


In [20]:

log = run(mapping_str, start_str, end_str)


INE361B01024 (532488) 2014-01-01–2014-12-31 → INE361B01024.json: 1 pages
INE361B01024 (532488) 2015-01-01–2015-12-31 → INE361B01024.json: 1 pages
INE361B01024 (532488) 2016-01-01–2016-12-30 → INE361B01024.json: 1 pages
INE361B01024 (532488) 2016-12-31–2017-12-30 → INE361B01024.json: 1 pages
INE361B01024 (532488) 2017-12-31–2018-12-30 → INE361B01024.json: 1 pages
INE361B01024 (532488) 2018-12-31–2019-12-30 → INE361B01024.json: 2 pages
INE361B01024 (532488) 2019-12-31–2020-12-29 → INE361B01024.json: 2 pages
INE361B01024 (532488) 2020-12-30–2021-12-29 → INE361B01024.json: 1 pages
INE361B01024 (532488) 2021-12-30–2022-12-29 → INE361B01024.json: 2 pages
INE361B01024 (532488) 2022-12-30–2023-12-29 → INE361B01024.json: 2 pages
INE361B01024 (532488) 2023-12-30–2024-12-28 → INE361B01024.json: 2 pages
INE361B01024 (532488) 2024-12-29–2025-01-01 → INE361B01024.json: 1 pages
Finished; raw dumps are in ./output/


In [25]:
import json
from json import JSONDecoder
import pandas as pd
from pathlib import Path

def parse_multiple_json_objects(text):
    """
    Yield each top-level JSON object from a string that contains
    multiple JSON objects concatenated together.
    """
    decoder = JSONDecoder()
    idx = 0
    length = len(text)
    while idx < length:
        # Skip any whitespace/newlines before the next object
        while idx < length and text[idx].isspace():
            idx += 1
        if idx >= length:
            break
        # Decode one JSON object starting at idx
        obj, end = decoder.raw_decode(text, idx)
        yield obj
        idx = end

def CSVHandler():
    input_dir = Path("output")
    output_dir = Path("csv")
    output_dir.mkdir(exist_ok=True)

    for json_file in input_dir.glob("*.json"):
        # Read the entire file as a single string
        text = json_file.read_text(encoding="utf-8")

        # Collect all rows from every JSON object’s "Table"
        rows = []
        for obj in parse_multiple_json_objects(text):
            table = obj.get("Table")
            if isinstance(table, list):
                rows.extend(table)

        # Build the dataframe with the desired columns
        df = pd.DataFrame(rows, columns=["HEADLINE", "NEWS_DT", "SLONGNAME"])
        df.insert(0, "ISIN", json_file.stem)

        # Write out to csv/<json_filename>.csv
        csv_path = output_dir / f"{json_file.stem}.csv"
        df.to_csv(csv_path, index=False)
        print(f"Wrote {csv_path}")

    print("All JSON files processed and converted to CSV.")


In [27]:
CSVHandler()

Wrote csv/INE721A01047.csv
Wrote csv/INE238A01034.csv
Wrote csv/INE066A01021.csv
Wrote csv/INE397D01024.csv
Wrote csv/INE585B01010.csv
Wrote csv/INE062A01020.csv
Wrote csv/INE263A01024.csv
Wrote csv/INE101A01026.csv
Wrote csv/INE009A01021.csv
Wrote csv/INE095A01012.csv
Wrote csv/INE192A01025.csv
Wrote csv/INE795G01014.csv
Wrote csv/INE849A01020.csv
Wrote csv/INE742F01042.csv
Wrote csv/INE437A01024.csv
Wrote csv/INE059A01026.csv
Wrote csv/INE002A01018.csv
Wrote csv/INE752E01010.csv
Wrote csv/INE030A01027.csv
Wrote csv/INE213A01029.csv
Wrote csv/INE423A01024.csv
Wrote csv/INE075A01022.csv
Wrote csv/INE019A01038.csv
Wrote csv/INE044A01036.csv
Wrote csv/INE038A01020.csv
Wrote csv/INE733E01010.csv
Wrote csv/INE361B01024.csv
Wrote csv/INE467B01029.csv
Wrote csv/INE239A01024.csv
Wrote csv/INE155A01022.csv
Wrote csv/INE047A01021.csv
Wrote csv/INE154A01025.csv
Wrote csv/INE918I01026.csv
Wrote csv/INE481G01011.csv
Wrote csv/INE280A01028.csv
Wrote csv/INE040A01034.csv
Wrote csv/INE021A01026.csv
W