In [None]:
import os, zipfile, glob


In [None]:
dirName = "C:/Personal/Projects/NSEIndexOptionsData/temp/banknifty/"

In [None]:
for filename in glob.iglob(f'{dirName}/**/*.zip', recursive=True):
        print(filename)
        zip_ref = zipfile.ZipFile(filename) # create zipfile object
        zip_ref.extractall(os.path.dirname(filename)) # extract file to dir
        zip_ref.close() # close file
        os.remove(filename) # delete zipped file

In [None]:
import pandas as pd
import datetime
import glob
import re

In [None]:
columns = ['Ticker', 'Date/Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Open Interest']

dtypes = {
    'Ticker': 'string',
    'Open': 'float64',
    'High': 'float64',
    'Low': 'float64',
    'Close': 'float64',
    'Volume': 'float64',
    'Open Interest': 'float64'
}

def dateparse(d,t):
    dt = d + " " + t
    return datetime.datetime.strptime(dt, '%Y/%m/%d %H:%M')

def dateParseSpot(d,t):
    dt = d + " " + t
    return datetime.datetime.strptime(dt, '%m/%d/%Y %H:%M:%S')

# Prepare spot data parquet

In [None]:
finalDf = pd.DataFrame(columns=columns)

dateparse = lambda x: datetime.datetime.strptime(x, '%Y/%m/%d %H:%M')

for file in glob.glob('C:/Personal/Projects/NSEIndexOptionsData/temp/Spot/*.csv'):
    df = pd.read_csv(file,
                     skiprows=1,
                     header=None,
                     names=['Ticker', 'Date', 'Time', 'Open', 'High',
                            'Low', 'Close', 'Volume', 'Open Interest'],
                     dtype=dtypes,
                     parse_dates={'Date/Time': ['Date', 'Time']}, date_parser=dateparse)

    df.Ticker = df.Ticker.str.replace('.NSEBANK',
                                      'BANKNIFTY').replace('.CNX100',
                                                           'CNX100').replace('.CNXIT',
                                                                             'CNXIT').replace('.NSEI',
                                                                                              'NIFTY')

    df = df[columns]

    finalDf = pd.concat([finalDf, df],
                        ignore_index=True).sort_values(['Ticker',
                                                        'Date/Time']).drop_duplicates(subset=['Ticker',
                                                                                              'Date/Time'],
                                                                                      keep='first')

finalDf.to_parquet('C:/Personal/Projects/NSEIndexOptionsData/2022/spot.parquet',
                   index=False)


In [None]:
finalDf[finalDf['Date/Time'].dt.month == 6].to_parquet('C:/Personal/Projects/PyAlgoMate/pyalgomate/backtesting/data/2023/spot-june.parquet',
                   index=False)

# Prepare expiry data

In [None]:

def shortForm(month):
    return {
            'January': 'JAN',
            'February': 'FEB',
            'March': 'MAR',
            'April': 'APR',
            'May': 'MAY',
            'June': 'JUN',
            'July': 'JUL',
            'August': 'AUG',
            'September': 'SEP', 
            'October': 'OCT',
            'November': 'NOV',
            'December': 'DEC'
    }[month]

year = '22'
for directories in glob.glob('C:/Personal/Projects/NSEIndexOptionsData/temp/nifty/**'):
    for monthDir in glob.glob(directories + '/**'):
        if match := re.search('Expiry (\d\d).*', monthDir, re.IGNORECASE):
            month = shortForm(os.path.basename(os.path.dirname(monthDir)))
            expiry = match.group(1) + month + year
            expiryDate = datetime.datetime.strptime(expiry, "%d%b%y")
            monthly = expiryDate.month != (expiryDate + datetime.timedelta(days=7)).month
            print(expiry + ' ' + ('Monthly' if monthly else 'Weekly'))
    print()

In [None]:
# define regex patterns to match and replace
pattern1 = r"^([a-zA-Z]+)WK(\d+)(P|C)E$"
pattern2 = r"^([a-zA-Z]+)(\d+)(P|C)E$"
replace1 = r"\1Expiry\3\2"

dateparse = lambda x: datetime.datetime.strptime(x, '%Y/%m/%d %H:%M')

for monthlyDir in glob.glob('C:/Personal/Projects/NSEIndexOptionsData/temp/nifty/**'):
    monthDigit = None
    finalDf = pd.DataFrame(columns=columns)
    for weeklyDir in glob.glob(monthlyDir + '/**'):
        if match := re.search('Expiry (\d\d).*', weeklyDir, re.IGNORECASE):
            month = shortForm(os.path.basename(os.path.dirname(weeklyDir)))
            expiry = match.group(1) + month + year
            expiryDate = datetime.datetime.strptime(expiry, "%d%b%y")
            monthly = expiryDate.month != (expiryDate + datetime.timedelta(days=7)).month
            print('Processing ' + expiry + ' ' + ('Monthly' if monthly else 'Weekly'))

            for file in glob.glob(weeklyDir + '/*.csv'):
                df = pd.read_csv(file,
                                header=None,
                                names=['Ticker', 'Date', 'Time', 'Open', 'High',
                                        'Low', 'Close', 'Volume', 'Open Interest'],
                                dtype=dtypes,
                                parse_dates={'Date/Time': ['Date', 'Time']}, date_parser=dateparse)

                df['Ticker'] = df['Ticker'].str.replace(
                    pattern2 if monthly == True else pattern1, replace1, regex=True).str.replace('Expiry', expiry)

                df = df[columns]

                finalDf = pd.concat([finalDf, df],
                                    ignore_index=True).sort_values(['Ticker',
                                                                    'Date/Time']).drop_duplicates(subset=['Ticker',
                                                                                                        'Date/Time'],
                                                                                                keep='first')
   
    minDateTime = finalDf['Date/Time'].min()
    maxDateTime = finalDf['Date/Time'].max()

    spotDf = pd.read_parquet('C:/Personal/Projects/NSEIndexOptionsData/2022/spot.parquet')
    spotDf = spotDf[(spotDf['Date/Time'] >= minDateTime) & (spotDf['Date/Time'] <= maxDateTime)]
    finalDf = pd.concat([finalDf, spotDf],
                        ignore_index=True).sort_values(['Ticker',
                                                        'Date/Time']).drop_duplicates(subset=['Ticker',
                                                                                              'Date/Time'],
                                                                                      keep='first')
    
    monthAsDigit = datetime.datetime.strptime(os.path.basename(monthlyDir), '%B').replace(day=1).strftime('%m')

    finalDf.to_parquet(f'C:/Personal/Projects/NSEIndexOptionsData/2022/nifty/{monthAsDigit}.parquet', index=False)


In [None]:
df = pd.read_parquet('C:/Personal/Projects/NSEIndexOptionsData/2022/banknifty/01.parquet')

In [None]:
df[(df['Date/Time'].dt.date == datetime.date(2022, 1, 3)) & (df['Open Interest'] > 0)]

In [None]:
# for file in glob.glob('C:/Personal/Projects/NSEIndexOptionsData/temp/nifty/December/Expiry 15th December' + '/*.csv'):
#         pd.read_csv(file, header=None)