In [39]:
import os
import urllib
import zipfile
import requests
from pathlib import Path
from datetime import datetime, timedelta, date
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.sql import text as sa_text
import time
import warnings
warnings.filterwarnings('ignore')

# database configuration
HOST = "DESKTOP-0H6SQRG"
DB = "nse"
DRIVER = "ODBC+Driver+11+for+SQL+Server"


ERROR_TABLE_NAME = "bse_error_dates"
HISTORY_TABLE = "bse_history"
BHAVCOPY_FOLDER = "bse_bhavcopies"

CREATE_ERROR_TABLE_QUERY = f"""IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='{ERROR_TABLE_NAME}' and xtype='U')
                                create table {ERROR_TABLE_NAME}(id int primary key identity(1,1), [date] date not null,
                                timestamp datetime default current_timestamp)"""

CREATE_BSE_HISTORY_TABLE_QUERY = f"""IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='{HISTORY_TABLE}' and xtype='U')
                                CREATE TABLE {HISTORY_TABLE}(
                                [id] [bigint] primary key identity(1,1),
                                [Date] [date],
                                [Symbol] [varchar](max),
                                [Open] [float] NULL,
                                [High] [float] NULL,
                                [Low] [float] NULL,
                                [Close] [float] NULL,
                                [Last] [float] NULL,
                                [Prev Close] [float] NULL,
                                [Volume] [bigint] NULL,
                                [NO_TRADES] [bigint] NULL,
                                [SC_CODE] [bigint],
                                [SC_GROUP] [varchar](max),
                                [SC_TYPE] [varchar](max),
                                [NET_TURNOV] [float] NULL,
                                [ISIN_CODE] [varchar](max) NULL,
                                timestamp datetime default current_timestamp)"""

REQUIRED_COLUMNS = ['Date', 'SC_CODE', 'Symbol', 'SC_GROUP', 'SC_TYPE', 'Open', 'High', 'Low', 'Close', 'Last', 
                    'Prev Close', 'NO_TRADES', 'Volume', 'NET_TURNOV', 'ISIN_CODE']

column_mappings = {
    "date": "Date", "symbol": "Symbol", "pclose": "Prev Close", "open": "Open", "high": "High", "low":"Low", "close": "Close",
    "vwap": "VWAP", "volume": "Volume", "trades": "Trades", "dvolume": "Deliverable Volume", "pdeliverble":"%Deliverble",
    "series": "Series", "last": "Last"}

engine = create_engine(f'mssql+pyodbc://{HOST}/{DB}?trusted_connection=yes&driver={DRIVER}')
realpath_bhavcopy = os.path.join(os.getcwd(), BHAVCOPY_FOLDER)
if not os.path.exists(realpath_bhavcopy):
    os.mkdir(realpath_bhavcopy)

def execute_sql(query, commit=False):
    with engine.connect() as conn:
        result = conn.execute(query)
        if commit:
            conn.commit()
    return result

    
execute_sql(CREATE_ERROR_TABLE_QUERY, commit=True)
execute_sql(CREATE_BSE_HISTORY_TABLE_QUERY, commit=True)


def convert_strto_datetime(date_time):
    try:
        datetime_str = datetime.strptime(date_time, '%d-%b-%Y')
    except ValueError:
        datetime_str = datetime.strptime(date_time, '%d-%m-%Y')
    return datetime_str

def if_exists(tablename, date=None, df=None):
    DUPLICATE_CHECK_QUERY = """select * from {TABLENAME} where {DATE_COL}='{DATE}' 
                                and {SYMBOL_COL}='{SYMBOL}'"""
    SINGLE_CHECK_QUERY = "select * from {TABLENAME} where {DATE_COL}='{DATE}'"
    result = None
    if date:
        result = execute_sql(SINGLE_CHECK_QUERY.format(TABLENAME=tablename, DATE_COL=column_mappings["date"],
                                                           DATE=date)).fetchone()
        
    elif df is not None:
        rand_rec = df.sample()
        rand_rec = rand_rec.to_dict("records")[0]
        result = execute_sql(DUPLICATE_CHECK_QUERY.format(TABLENAME=tablename, 
                                                           DATE_COL=column_mappings["date"],
                                                           SYMBOL_COL=column_mappings["symbol"], 
                                                           DATE=rand_rec[column_mappings["date"]],
                                                           SYMBOL=rand_rec[column_mappings["symbol"]])).fetchone()
    if result:
        return True
    return False
        

def download(download_url, fpath):
    headers = {
      'authority': 'www.bseindia.com',
      'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
      'accept-language': 'en-US,en;q=0.9',
      'referer': 'https://www.bseindia.com/markets/marketinfo/BhavCopy.aspx',
      'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \
                      (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.42'
    }
    with open(fpath, "wb") as fp:
        r = requests.get(download_url, stream=True, verify=False, headers=headers)
        if r.ok:
            fp.write(r.content)
    return True

def full_bhavcopy_bse(date, folder):
    filename = f'EQ_ISINCODE_{"%02d"%date.day}{"%02d"%date.month}{date.strftime("%y")}.ZIP' 
    file_path = os.path.join(folder, filename)
    csv_path = file_path.replace(".ZIP", ".csv")
    download_url = f'https://www.bseindia.com/download/BhavCopy/Equity/{filename}'
    if not os.path.exists(csv_path):
        if download(download_url, file_path):
            try:
                with zipfile.ZipFile(file_path, "r") as compressed_file:
                    compressed_file.extractall(Path(file_path).parent)
#                 print(f"downloaded data for {date}")
                os.remove(file_path)
                return csv_path
            except Exception as e:
                print(e)
        return None
    return csv_path

def clean_df(filepath):
    df = pd.read_csv(filepath)
    df = df.rename(columns={"SC_NAME": column_mappings["symbol"], "OPEN": column_mappings["open"], "HIGH":column_mappings["high"], 
         "LOW":column_mappings["low"], "CLOSE":column_mappings["close"], "NO_OF_SHRS":column_mappings["volume"],
        "PREVCLOSE":column_mappings["pclose"], "TRADING_DATE":column_mappings["date"], "LAST": column_mappings["last"]})
    return df[REQUIRED_COLUMNS]

def save_bc(date):
    filepath = full_bhavcopy_bse(date, BHAVCOPY_FOLDER)
    df = clean_df(filepath)
    df.to_sql(HISTORY_TABLE, engine, index=False, if_exists="append")
    print(f"Downloaded data for {date.strftime('%d-%b-%Y')}")
    return df

ModuleNotFoundError: No module named 'pyodbc'

## download data from existing table based on last date

In [2]:
last_date = execute_sql(f"select top(1){column_mappings['date']} from {HISTORY_TABLE} order by {column_mappings['date']} DESC").fetchone()
print(f"last date found in database is {last_date[column_mappings['date']].strftime('%d-%b-%Y')}")
next_date = last_date[column_mappings['date']]
yesterday = date.today()-timedelta(days=1)
while True:
    try:
        if yesterday < next_date:
            break
        next_date = next_date+timedelta(days=1)
        if next_date.weekday() < 5:
    #         print(next_date.strftime('%d-%b-%Y'))
            try:
                if if_exists(HISTORY_TABLE, next_date):
                    print(f"Data already exists for {next_date.strftime('%d-%b-%Y')} in {HISTORY_TABLE} table")
                    continue
                df = save_bc(next_date)
            except Exception as e:
                print(f"error occured on {next_date.strftime('%d-%b-%Y')}")
                execute_sql(f"insert into {ERROR_TABLE_NAME}([date]) values('{next_date}')", commit=True)
                continue
        time.sleep(1)
    except KeyboardInterrupt:
        print("Download interrupted..")
        break

last date found in database is 12-May-2023
Downloaded data for 15-May-2023
Downloaded data for 16-May-2023
Downloaded data for 17-May-2023
Downloaded data for 18-May-2023
Downloaded data for 19-May-2023
Downloaded data for 22-May-2023
Downloaded data for 23-May-2023
Downloaded data for 24-May-2023
Downloaded data for 25-May-2023
Downloaded data for 26-May-2023
Downloaded data for 29-May-2023
Downloaded data for 30-May-2023
Downloaded data for 31-May-2023
Downloaded data for 01-Jun-2023
Downloaded data for 02-Jun-2023
Downloaded data for 05-Jun-2023
Downloaded data for 06-Jun-2023
Downloaded data for 07-Jun-2023
Downloaded data for 08-Jun-2023
Downloaded data for 09-Jun-2023
Downloaded data for 12-Jun-2023
Downloaded data for 13-Jun-2023
Downloaded data for 14-Jun-2023
Downloaded data for 15-Jun-2023
Downloaded data for 16-Jun-2023
Downloaded data for 19-Jun-2023
Downloaded data for 20-Jun-2023
Downloaded data for 21-Jun-2023
Downloaded data for 22-Jun-2023
Downloaded data for 23-Jun-20

## Download data for the date in error table

In [3]:
err_dates = execute_sql(f"select [date] as {column_mappings['date']} from {ERROR_TABLE_NAME}").fetchall()
for d in err_dates:
    try:
#         print(d[column_mappings["date"]].strftime('%d-%b-%Y'))
        save_bc(d[column_mappings["date"]])
        execute_sql(f"delete {ERROR_TABLE_NAME} where date='{d[column_mappings['date']]}'")
    except Exception as e:
        print(e)
    time.sleep(1)
        

File is not a zip file
Invalid file path or buffer object type: <class 'NoneType'>
File is not a zip file
Invalid file path or buffer object type: <class 'NoneType'>
File is not a zip file
Invalid file path or buffer object type: <class 'NoneType'>
File is not a zip file
Invalid file path or buffer object type: <class 'NoneType'>
File is not a zip file
Invalid file path or buffer object type: <class 'NoneType'>
File is not a zip file
Invalid file path or buffer object type: <class 'NoneType'>
File is not a zip file
Invalid file path or buffer object type: <class 'NoneType'>
File is not a zip file
Invalid file path or buffer object type: <class 'NoneType'>
File is not a zip file
Invalid file path or buffer object type: <class 'NoneType'>
File is not a zip file
Invalid file path or buffer object type: <class 'NoneType'>
File is not a zip file
Invalid file path or buffer object type: <class 'NoneType'>
File is not a zip file
Invalid file path or buffer object type: <class 'NoneType'>
File

## Download data for a Single day

In [13]:
download_date = "03-01-2022"                #dd/mm/yyyy

dt = convert_strto_datetime(download_date)
if dt.weekday() > 4:
    print(f"Sorry its a weekend.. {download_date}")
elif if_exists(HISTORY_TABLE, dt):
    print(f"Data already exists for {download_date} in {HISTORY_TABLE} table")
else:
    df = save_bc(dt)

Downloaded data for 03-Jan-2022
