In [44]:
import pandas as pd
csv_path = r"BSE_CODES.csv"

df = pd.read_csv(csv_path)

codes = df["BSE Exchange Code"].to_list()
print(codes[:10])

[533179, 532466, 543398, 532755, 507685, 532540, 544028, 532400, 532281, 542651]


In [None]:
import requests, time
import pandas as pd
import pprint

url = "https://api.bseindia.com/BseIndiaAPI/api/AnnSubCategoryGetData/w"

params_template = {
    "pageno": 1,
    "strCat": "Result",
    "strPrevDate": "20250922",
    "strSearch": "P",
    "strToDate": "20251222",
    "strType": "C",
    "subcategory": -1
}

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/120.0.0.0 Safari/537.36",
    "Referer": "https://www.bseindia.com/",
    "Origin": "https://www.bseindia.com",
    "Accept": "application/json, text/javascript, */*; q=0.01",
}

session = requests.Session()
all_dfs = []   # collect DataFrames here

for code in codes:
    params = params_template.copy()
    params["strScrip"] = str(code)

    response = session.get(url, params=params, headers=headers, verify=False)
    print("Scrip", code, "-> Status:", response.status_code)

    if response.status_code == 200:
        data = response.json()
        if "Table" in data and data["Table"]:
            df = pd.DataFrame(data["Table"])
            # Fix attachment URL
            df["ATTACHMENTNAME"] = df["ATTACHMENTNAME"].apply(
                lambda x: "https://www.bseindia.com/xml-data/corpfiling/AttachHis/" + str(x)
            )
            # pprint.pprint(df.head(1).to_dict())  # peek at first row
            all_dfs.append(df)
    else:
        print("‚ùå Request failed for", code)
    
    time.sleep(3)

# Concatenate all results into one DataFrame
if all_dfs:
    final_df = pd.concat(all_dfs, ignore_index=True)
    final_df.to_csv("all_announcements.csv", index=False)
    print("‚úÖ Saved", len(final_df), "rows to all_announcements.csv")
else:
    print("‚ö†Ô∏è No data collected")

In [None]:
path = r"C:\Users\kaustubh.keny\Downloads\reportTable (2).xlsx"

import pandas as pd
import re

df = pd.read_excel(path)
# df.head(20)

content = df.iloc[2:,1:]
# print(content.shape)
category_class = df.columns[0]
rto = re.sub("\\s+",'',category_class,re.IGNORECASE)

content["RTO"] = [rto]*int(content.shape[0])
content.head(15)

In [None]:
xls_path = r"C:\Users\kaustubh.keny\Downloads\allsbe.xls"
xlsx_path = r"C:\Users\kaustubh.keny\Downloads\allsbe.xlsx"

xls = pd.ExcelFile(xls_path)
with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
    for sheet in xls.sheet_names:
        print(f"Converting: {sheet}")
        df = pd.read_excel(xls, sheet_name=sheet)
        # Drop completely empty rows/columns to compact
        df = df.dropna(axis=0, how="all").dropna(axis=1, how="all")
        df.to_excel(writer, sheet_name=sheet, index=False)



AMFII DISTRIBUTOR DATA

In [None]:
import pandas as pd
import requests
import os
import time
import warnings
from urllib3.exceptions import InsecureRequestWarning
from datetime import datetime

# Suppress only InsecureRequestWarning
warnings.filterwarnings("ignore", category=InsecureRequestWarning)

def fetch_distributor_data(output_path, type_, cities, page_size=50):
    base_url = "https://www.amfiindia.com/api/distributor-agent"
    throttle = 2  # seconds between requests
    all_records = []

    print(f"Pulling Data for total cities: {len(cities)}.")

    for corp in type_:
        for city in cities:
            print(f"Fetching data for city: {city}, type: {corp}...")
            
            # First request to get pageCount
            params = {
                "strOpt": corp,
                "city": city,
                "search": "",
                "page": 1,
                "pageSize": page_size
            }

            response = requests.get(base_url, params=params, verify=False)
            response.raise_for_status()
            data = response.json()

            # Collect first page
            for rec in data.get("data", []):
                rec["Type"] = corp
                all_records.append(rec)

            # Get total page count
            page_count = data.get("meta", {}).get("pageCount", 1)
            print(f"Total pages for {city} ({corp}): {page_count}")

            # Loop through remaining pages
            for page in range(2, page_count + 1):
                params["page"] = page
                response = requests.get(base_url, params=params, verify=False)
                response.raise_for_status()
                page_data = response.json()
                for rec in page_data.get("data", []):
                    rec["Type"] = corp
                    all_records.append(rec)
                time.sleep(throttle)
                print(f"Data fetched for page {page}/{page_count}")

    # Convert to DataFrame
    df = pd.DataFrame(all_records)

    # Save to single CSV with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = os.path.join(output_path, f"amfi_dist_{timestamp}.csv")
    df.to_csv(output_file, index=False)
    print(f"Saved {len(df)} rows ‚Üí {output_file}")

if __name__ == "__main__":
    output_folder = r"C:\Users\kaustubh.keny\Downloads\AMFI_Distributor_Data"
    os.makedirs(output_folder, exist_ok=True)

    type_ = ["Individual", "Corporate"]
    
    path = r"cities.csv"
    df = pd.read_csv(path)
    
    cities = df.iloc[:,0].to_list()
    fetch_distributor_data(output_folder, type_, cities[5250:5500]) # do 5000 to 6000
    

AMFII TER DATA

In [None]:
import requests
import time
import csv
from datetime import datetime

def fetch_data(config):
    response = requests.get(config["url"], params=config["payload"], verify=False)
    response.raise_for_status()
    return response.json()

def save_to_csv(data, filename):
   
    if "data" not in data:
        print("No 'data' field found in response.")
        return
   
    records = data["data"]
    if not records:
        print("No records found.")
        return
   
    headers = list(records[0].keys())
   
    with open(filename, mode="w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=headers)
        writer.writeheader()
        writer.writerows(records)
   
    print(f"Data saved to {filename}")

def main():
    print("Fetching data from AMFI API...")
    data = fetch_data(CONFIG)
   
    time.sleep(CONFIG["throttle_seconds"])

    save_to_csv(data, f"{CONFIG["filename"]}_{CONFIG["payload"]["date"]}.csv")


# ---------------- CONFIG ----------------
CONFIG = {
    "url": "https://www.amfiindia.com/api/tracking-difference",
    "payload": {
        "MF_ID": "all",
        "date": "01-May-2022"
    },
    "throttle_seconds": 2,    #delay
    "filename": "tracking_error"
}

if __name__ == "__main__":
    main()

In [None]:
import requests, time
import pandas as pd
from datetime import datetime, timedelta

def fetch_tracking_error(from_date: str, to_date: str, mf_id: str = "all", output_file: str = "tracking_error.xlsx"):
    """
    Fetch AMFI tracking error data for each day in range and save to one sheet.
    
    Args:
        from_date (str): Start date in format 'dd-mon-yyyy' (e.g. '25-nov-2025')
        to_date (str): End date in format 'dd-mon-yyyy'
        mf_id (str): Mutual Fund ID parameter (default 'all')
        output_file (str): Output Excel file
    """
    # Parse dates
    start = datetime.strptime(from_date, "%d-%b-%Y")
    end = datetime.strptime(to_date, "%d-%b-%Y")

    all_data = []

    #day-loop
    current = start
    while current <= end:
        strdt = current.strftime("%d-%b-%Y").lower()  
        url = f"https://www.amfiindia.com/api/tracking-error-data?MF_ID={mf_id}&strdt={strdt}"
        print(f"Fetching {url} ...")

        try:
            resp = requests.get(url, verify=False)
            resp.raise_for_status()
            data = resp.json()  # API returns JSON

            
            df = pd.DataFrame(data)
            df["date"] = strdt
            df["MF_ID"] = mf_id

            all_data.append(df)
            time.sleep(5)

        except Exception as e:
            print(f"Failed for {strdt}: {e}")

        current += timedelta(days=1)

   
    if all_data:
        final_df = pd.concat(all_data, ignore_index=True)
        final_df.to_excel(output_file, index=False)
        print(f"Saved {len(final_df)} rows to {output_file}")
    else:
        print("No data fetched.")

# Example usage
fetch_tracking_error("01-jun-2022", "31-dec-2022", mf_id="all", output_file="tracking_error_22.xlsx")

In [None]:
import pandas as pd
import requests
import os
import time


def fetch_ter_data(output_path, months):
    base_url = "https://www.amfiindia.com/api/populate-te-rdata-revised"
    throttle = 1  # seconds between requests

    for month in months:
        print(f"Fetching data for {month}...")
        all_records = []

        # First request to get pageCount
        params = {
            "MF_ID": "All",
            "Month": month,
            "strCat": -1,
            "strType": -1,
            "page": 1,
            "pageSize": 100
        }

        response = requests.get(base_url, params=params, verify=False)
        response.raise_for_status()
        data = response.json()
        all_records.extend(data.get("data", []))


        page_count = data.get("meta", {}).get("pageCount", 1)
        print(f"Total pages for {month}: {page_count}")

        # Loop through remaining pages
        for page in range(2, page_count + 1):
            params["page"] = page
            response = requests.get(base_url, params=params, verify=False)
            response.raise_for_status()
            page_data = response.json()
            all_records.extend(page_data.get("data", []))
            time.sleep(throttle)
            print(f"Run completed for {page}/{page_count}.")


        df = pd.DataFrame(all_records)

        output_file = os.path.join(output_path, f"AMFI_TER_{month}.csv")
        df.to_csv(output_file, index=False)
        print(f"Saved {len(df)} rows for {month} ‚Üí {output_file}")

if __name__ == "__main__":
    output_folder = r"C:\Users\kaustubh.keny\Downloads\AMFI_TER_Data"
    os.makedirs(output_folder, exist_ok=True)
    
    months = ["10-2021", "11-2021", "12-2021"] #"07-2019", "08-2019", "09-2019", "10-2019", "11-2019", "12-2019"

    fetch_ter_data(output_folder, months)


AMFII FUND PERFORMANCE

In [None]:
import requests
import pandas as pd
from datetime import datetime

url = "https://www.amfiindia.com/gateway/pollingsebi/api/amfi/fundperformance"

headers = {
    "Content-Type": "application/json",
    "User-Agent": "Mozilla/5.0",
    "Referer": "https://www.amfiindia.com/"
}


report_date = "30-Sep-2025"  


category_map = {
    1: [1, 12],
    2: [13, 29],
    3: [30, 40],
    4: [36, 37],
    5: [38, 39]
}

results = []

for maturity_type in [1, 2]:  # Open Ended, Close Ended
    for category, subcategories in category_map.items():
        for subcat in subcategories:
            payload = {
                "maturityType": maturity_type,
                "category": category,
                "subCategory": subcat,
                "mfid": 0,
                "reportDate": report_date
            }

            response = requests.post(url, json=payload, headers=headers, verify=False)
            if response.status_code == 200:
                data = response.json()
                if data["validationStatus"] == "SUCCESS" and data["data"]:
                    df = pd.DataFrame(data["data"])
                    df["maturityType"] = maturity_type
                    df["category"] = category
                    df["subCategory"] = subcat
                    results.append(df)
                    print(f"‚úÖ Fetched: M{maturity_type} C{category} S{subcat}")
                else:
                    print(f"‚ö†Ô∏è No data: M{maturity_type} C{category} S{subcat}")
            else:
                print(f"‚ùå Failed: M{maturity_type} C{category} S{subcat} ‚Üí {response.status_code}")

# Combine all results and save to Excel
if results:
    final_df = pd.concat(results, ignore_index=True)
    filename = f"AMFI_FundPerformance_{report_date.replace('-', '')}.xlsx"
    final_df.to_excel(filename, index=False)
    print(f"\nüìÅ Data saved to {filename}")
else:
    print("\nüö´ No data retrieved.")

In [None]:
import requests
import pandas as pd
import time
 
BASE_URL = "https://api.mospi.gov.in/api/plfs/getData"
 
def fetch_plfs_all(params: dict, output_file: str = "plfs_data.csv", sleep_sec: int = 1):
 
    all_data = []
 
    # Start with page 1 to get metadata
    params["page"] = 1
    resp = requests.get(BASE_URL, params=params, verify=False)
    resp.raise_for_status()
    first = resp.json()
 
    if not first.get("statusCode"):
        raise Exception("API returned error: " + str(first))
 
    meta = first.get("meta_data", {})
    total_pages = meta.get("totalPages", 1)
    print(f"Total pages: {total_pages}")
 
 
    if "data" in first:
        all_data.extend(first["data"])
 
    for page in range(2, total_pages + 1):
        params["page"] = page
        print(f"Fetching page {page}/{total_pages} ...")
        try:
            resp = requests.get(BASE_URL, params=params)
            resp.raise_for_status()
            result = resp.json()
            if "data" in result:
                all_data.extend(result["data"])
            time.sleep(3)
        except Exception as e:
            print(f"Failed for page {page}: {e}")
        time.sleep(sleep_sec)
 
    if all_data:
        df = pd.DataFrame(all_data)
        df.to_csv(output_file, index=False)
        print(f"Saved {len(df)} rows to {output_file}")
    else:
        print("No data fetched.")
 
if __name__ == "__main__":
    payload = {
        "indicator_code": 8,
        "limit": 20,
        "year": "2017-18,2018-19,2019-20",
        "age_code": "1,2,3,4",
        "education_code": "1,2,3,4,5,6,7,8,9,10",
        "gender_code": "1,2,3",
        "religion_code": "1,2,3,4,5",
        "sector_code": "1,2,3",
        "social_category_code": "1,2,3,4,5",
        "state_code": "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,99",
        "weekly_status_code": "1,2",
    }
 
    fetch_plfs_all(payload, output_file=f"Average_Gross_2020-24.csv", sleep_sec=0.5)