In [2]:
import pandas as pd

In [3]:
import requests
from datetime import datetime, timedelta
import json

In [4]:
BASE_API_URL='https://www.nseindia.com/'


In [5]:
headers={'User-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0',
           'Accept-Language': 'en,gu;q=0.9,hi;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Accept':'*/*'}

In [6]:
params = {'index': 'equities', 'from_date': '01-03-2023', 'to_date': '29-02-2024'}

In [7]:
session = requests.Session()
session_response = session.get(BASE_API_URL,headers=headers, timeout=5)
cookies = dict(session_response.cookies) 


In [8]:
api_url = BASE_API_URL + 'api/corporate-bussiness-sustainabilitiy'
response = session.get(api_url, headers=headers, cookies=cookies, params=params)
print("XBRL Reports URL: ",response.url)
json_data= {}
if response.status_code == 200:
    print("Reports downloaded successfully!")
    json_data = response.json()
else:
    print("Failed to download reports. Status code:", response.status_code)

XBRL Reports URL:  https://www.nseindia.com/api/corporate-bussiness-sustainabilitiy?index=equities&from_date=01-03-2023&to_date=29-02-2024
Reports downloaded successfully!


In [9]:
df = pd.DataFrame()
if 'data' in json_data:  # Check if 'data' key exists
    data = json_data['data']  # Extract data associated with 'data' key
    df = pd.DataFrame(data)

df.head()

Unnamed: 0,symbol,companyName,fyFrom,fyTo,attachmentFile,xbrlFile,submissionDate,revisionDate
0,CASTROLIND,Castrol India Limited,2023,2023,https://nsearchives.nseindia.com/corporate/CAS...,https://nsearchives.nseindia.com/corporate/xbr...,29-Feb-2024,-
1,ASAL,Automotive Stampings and Assemblies Limited,2022,2023,https://nsearchives.nseindia.com/corporate/ASA...,https://nsearchives.nseindia.com/corporate/xbr...,23-Jan-2024,-
2,SIEMENS,Siemens Limited,2022,2023,https://nsearchives.nseindia.com/corporate/SIE...,https://nsearchives.nseindia.com/corporate/xbr...,18-Jan-2024,-
3,SAIL,Steel Authority of India Limited,2022,2023,https://nsearchives.nseindia.com/corporate/SAI...,https://nsearchives.nseindia.com/corporate/xbr...,17-Jan-2024,-
4,RGL,Renaissance Global Limited,2022,2023,https://nsearchives.nseindia.com/corporate/RJL...,https://nsearchives.nseindia.com/corporate/xbr...,12-Jan-2024,-


In [10]:
import os

def create_folder_if_not_exists(folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"Folder '{folder_path}' created.")

In [11]:
OUTPUT_FOLDER = "XBRL_Reports"
create_folder_if_not_exists(OUTPUT_FOLDER)
    
# Create folder based on param1 and param2
folder_name = f"{params['from_date']}~{params['to_date']}"
folder_path = os.path.join(OUTPUT_FOLDER, folder_name)
create_folder_if_not_exists(folder_path)

In [12]:
df.columns

Index(['symbol', 'companyName', 'fyFrom', 'fyTo', 'attachmentFile', 'xbrlFile',
       'submissionDate', 'revisionDate'],
      dtype='object')

In [13]:
import threading

In [14]:
folder_path

'XBRL_Reports/01-03-2023~29-02-2024'

In [15]:
def download_file(url,symbol,from_to, folder_path, session, headers, cookies):
    filename = os.path.join(folder_path, f"{symbol}_{from_to[0]}_{from_to[1]}_BRSR.xml")
    try:
        response = session.get(url, headers=headers, cookies=cookies, timeout=5)

        if response.status_code == 200:
            with open(filename, 'wb') as f:
                f.write(response.content)
            print(f"Downloaded: {filename}")
        else:
            print(f"Failed to download: {filename}, Status code: {response.status_code}")
    except Exception as e:
        print(f"Failed to download: {filename}, Error: {str(e)}")

In [20]:
def download_batch(data, folder_path):
    threads = []
    for row in data.itertuples(index=False):
        url = row.xbrlFile
        symbol = row.symbol
        from_to = (row.fyFrom, row.fyTo)
        thread = threading.Thread(target=download_file, args=(url, symbol,from_to, folder_path, session, headers, cookies))
        threads.append(thread)
        thread.start()
    for thread in threads:
        thread.join()

In [17]:
def download_in_batches(data_df, folder_path, batch_size):
    total_rows = len(data_df)
    num_batches = (total_rows + batch_size - 1) // batch_size
    for i in range(num_batches):
        start_index = i * batch_size
        end_index = min((i + 1) * batch_size, total_rows)
        batch_data = data_df.iloc[start_index:end_index]
        batch_data_len=len(batch_data)
        download_batch(batch_data, folder_path)

In [None]:
small_df= df.iloc[:103]
download_in_batches(small_df, folder_path, 5)

### Timepass

In [18]:
res = session.get('https://nsearchives.nseindia.com/corporate/xbrl/BRSR_1033465_23012024115722_WEB.xml',headers=headers, cookies=cookies, timeout=5)
res.status_code

200

In [19]:
x_url="https://nsearchives.nseindia.com/corporate/xbrl/BRSR_1033465_23012024115722_WEB.xml"
x_symbol="ASAL"
ft = (2022, 2023)
download_file(x_url, x_symbol, ft, folder_path, session, headers, cookies)

XBRL_Reports/01-03-2023~29-02-2024/ASAL_2022_2023_BRSR.xml
https://nsearchives.nseindia.com/corporate/xbrl/BRSR_1033465_23012024115722_WEB.xml
Downloaded: XBRL_Reports/01-03-2023~29-02-2024/ASAL_2022_2023_BRSR.xml
