In [None]:
from nsepython import *
import datetime
import pandas as pd

import glob

In [None]:
symbol = "PNB" # Selecte the symbol for which you want to download the data according to the instructions given for selection of stock

## Collect Options Data

In [None]:
instrumentType = "options"
optionType="CE"
series = "EQ"

In [None]:
equity =pd.read_csv("../data/PNB.csv")

In [None]:
expiry_list = expiry_history(symbol,start_date="01-01-2012",end_date="01-08-2024")

In [None]:
remove_list = ["19-Sep-2019","03-Oct-2019","10-Oct-2019","17-Oct-2019","24-Oct-2019","07-Nov-2019","14-Nov-2019","21-Nov-2019","20-Apr-2023","04-May-2023"
               ,"11-May-2023","18-May-2023","01-Jun-2023","07-Sep-2023","14-Sep-2023","21-Sep-2023","05-Oct-2023"
               ,"12-Oct-2023","19-Oct-2023"]

# I'm removing these entries as they appear to be weekly expiries mistakenly categorized as monthly expiries. 
# This misclassification hinders data collection, as the current package and other data sources provided only 
# support historical option data on a daily basis
for i in remove_list:
    try:
        expiry_list.remove(i)
    except:
        print("Error in removing",i)

In [None]:
path = "../data/CSV/"
# Options Data is from 01-Jan-2015 to 25-Apr-2024
for i in range(149,35,-1):
        continues_empty = 0

        # Getting Start and End Date for a selected expiry
        start_date = datetime.datetime.strptime(expiry_list[i-1], '%d-%b-%Y')
        start_date += datetime.timedelta(days=1)
        start_date = start_date.strftime('%d-%m-%Y')
        end_date = datetime.datetime.strptime(expiry_list[i], '%d-%b-%Y')
        end_date += datetime.timedelta(days=1)
        end_date = end_date.strftime('%d-%m-%Y')
        expiry_date = expiry_list[i]
        
        # Step 1: Get the stock price of the start date to decide the range of strike prices
        price = list(equity[equity['Date&Time'] == start_date]['Close'])
        temp_date = datetime.datetime.strptime(start_date, '%d-%m-%Y')
        while len(price) == 0:
            temp_date += datetime.timedelta(days=1)
            price = list(equity[equity['Date&Time'] == temp_date.strftime('%Y-%m-%d') ]['Close'])
            
        # fixing the range of stike prices to check +200,-200 is an approximation we can still decrease the range
        price = int(price[0]/40)*40
        start_price = int((price-200)/40)*40
        end_price = int((price+200)/40)*40
        if start_price < 0:
            start_price = 0
        strike_price_wise = {}
        
        # Step 2: Iterate over each strike price with a step of 5 . decided 5 after checking certain strike prices for different Expiry Dates.
        for j in range(start_price,end_price,5):
            x = derivative_history(symbol,start_date,end_date,instrumentType,expiry_date,j,optionType)
            if x.empty:
                # consider only 10 continuous empty strike prices after strike price is greater than price to reduce the number strike price data to collect as rest after will also be empty
                if j > price:  # Only increment if j is greater than price, as per your logic
                    continues_empty += 1
                if continues_empty > 10:
                    break
            else:
                continues_empty = 0  # Reset if x is not empty

            if not x.empty:
                strike_price_wise[j] = x
            
        # Step 3: Converting the strike_price_wise dictionary to Date wise csv for model to take a look at the current Option Chain of a date
        if len(strike_price_wise) == 0:
            continue
        else:
            unique_dates = []
            for key in strike_price_wise.keys():
                unique_dates += list(strike_price_wise[key]['FH_TIMESTAMP'].unique())
            unique_dates = list(set(unique_dates))
            datewise_consolidated_dfs = {}
            

            # Iterate over each unique date and Create a DataFrame for each date with all the strike prices
            for date in unique_dates:
                temp_data = []

                #Iterate over each strike price and its DataFrame
                for strike_price, df in strike_price_wise.items():
                    filtered_df = df[df['FH_TIMESTAMP'] == date]
                    temp_data.append(filtered_df)

                # Combine and drop unnecessary columns during concatenation
                consolidated_df = pd.concat(temp_data, ignore_index=True)
                columns_to_drop = ['_id', 'FH_INSTRUMENT', 'FH_SYMBOL', 'FH_MARKET_TYPE', 'FH_UNDERLYING_VALUE', 'TIMESTAMP']
                consolidated_df.drop(columns=columns_to_drop, inplace=True)
                datewise_consolidated_dfs[date] = consolidated_df
                
                
            for x,y in datewise_consolidated_dfs.items():
                y.to_csv(f"{path}{x}.csv",index=False)

### Exception Handling: Updating URL in `derivative_history`

In case of an error with the `derivative_history` function , perform the following steps to ensure correct data fetching:

1. **Identify the Function**: Navigate to the definition of the `derivative_history` function in the source code.
2. **Backup Function**: Locate the `derivative_history_virgin` function within the same source code file.
3. **Update the URL**: Modify the `nsefetch_url` variable by changing the base URL to:
   ```python
   "https://www.nseindia.com/api/historical/foCPV?&from="


# Verify Option Data Collected

In [None]:
files = glob.glob("../data/CSV/*.csv")

In [None]:
files = [file[12:] for file in files]

In [None]:
files = [file.split('.')[0] for file in files]

In [None]:
files = [datetime.datetime.strptime(file, '%d-%b-%Y') for file in files]

In [None]:
files.sort()
files = [file.strftime('%d-%b-%Y') for file in files]

In [None]:
df = pd.read_csv("../data/PNB.csv")

In [None]:
for i in range(745,3050):
    x = df['Date&Time'][i]
    y = datetime.datetime.strptime(x, '%Y-%m-%d').strftime('%d-%b-%Y')
    if y not in files:
        print(y)