In [None]:
import pandas as pd
import datetime
import glob

In [None]:
columns = ['Ticker', 'Date/Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Open Interest']

dtypes = {
    'Ticker': 'string',
    'Open': 'float64',
    'High': 'float64',
    'Low': 'float64',
    'Close': 'float64',
    'Volume': 'float64',
    'Open Interest': 'float64'
}

def dateparse(d,t):
    dt = d + " " + t
    return datetime.datetime.strptime(dt, '%Y/%m/%d %H:%M')

def dateParseSpot(d,t):
    dt = d + " " + t
    return datetime.datetime.strptime(dt, '%m/%d/%Y %H:%M:%S')

# Prepare spot data parquet

In [None]:
finalDf = pd.DataFrame(columns=columns)

for file in glob.glob('/mnt/c/Personal/Projects/PyAlgoMate/pyalgomate/backtesting/data/2023/Data/2023/NSE Indices-February 2023/*.csv'):
    df = pd.read_csv(file,
                     skiprows=1,
                     header=None,
                     names=['Ticker', 'Date', 'Time', 'Open', 'High',
                            'Low', 'Close', 'Volume', 'Open Interest'],
                     dtype=dtypes,
                     parse_dates={'Date/Time': ['Date', 'Time']}, date_parser=dateParseSpot)

    df.Ticker = df.Ticker.str.replace('.NSEBANK',
                                      'BANKNIFTY').replace('.CNX100',
                                                           'CNX100').replace('.CNXIT',
                                                                             'CNXIT').replace('.NSEI',
                                                                                              'NIFTY')

    df = df[columns]

    finalDf = pd.concat([finalDf, df],
                        ignore_index=True).sort_values(['Ticker',
                                                        'Date/Time']).drop_duplicates(subset=['Ticker',
                                                                                              'Date/Time'],
                                                                                      keep='first')

finalDf.to_parquet('/mnt/c/Personal/Projects/PyAlgoMate/pyalgomate/backtesting/data/2023/spot-feb23.parquet',
                   index=False)


# Prepare expiry data

In [None]:
expiry = '23FEB23'
weekly = False
directory = '/mnt/c/Personal/Projects/PyAlgoMate/pyalgomate/backtesting/data/2023/Nifty Options/February/Expiry 23rd February/*.csv'

# define regex patterns to match and replace
pattern1 = r'^([a-zA-Z]+)WK(\d+)(P|C)E$'
pattern2 = r'^([a-zA-Z]+)(\d+)(P|C)E$'
replace1 = r'\1Expiry\3\2'

finalDf = pd.DataFrame(columns=columns)

for file in glob.glob(directory):
    df = pd.read_csv(file,
                     header=None,
                     names=['Ticker', 'Date', 'Time', 'Open', 'High',
                            'Low', 'Close', 'Volume', 'Open Interest'],
                     dtype=dtypes,
                     parse_dates={'Date/Time': ['Date', 'Time']}, date_parser=dateparse)

    df['Ticker'] = df['Ticker'].str.replace(
        pattern1 if weekly == True else pattern2, replace1).str.replace('Expiry', expiry)

    df = df[columns]

    finalDf = pd.concat([finalDf, df],
                        ignore_index=True).sort_values(['Ticker',
                                                        'Date/Time']).drop_duplicates(subset=['Ticker',
                                                                                              'Date/Time'],
                                                                                      keep='first')

finalDf.to_parquet(f'/mnt/c/Personal/Projects/PyAlgoMate/pyalgomate/backtesting/data/2023/nifty-{expiry}.parquet',
                   index=False)


In [None]:
parquetFile = '/mnt/c/Personal/Projects/PyAlgoMate/pyalgomate/backtesting/data/2023/Banknifty Options/February/09FEB23.parquet'

In [None]:
parquetDf = pd.read_parquet(parquetFile)

In [None]:
parquetDf.info()

In [None]:
parquetDf.tail()

# Combine Weekly and Monthly files into one file

In [None]:
finalDf = pd.DataFrame(columns=columns)

for file in glob.glob('/mnt/c/Personal/Projects/PyAlgoMate/pyalgomate/backtesting/data/2023/nifty-0dad.parquet'):
    finalDf = pd.concat([finalDf, pd.read_parquet(file)],
                        ignore_index=True).sort_values(['Ticker',
                                                        'Date/Time']).drop_duplicates(subset=['Ticker',
                                                                                              'Date/Time'],
                                                                                      keep='first')

finalDf.sort_values(['Ticker',
                     'Date/Time']).drop_duplicates(subset=['Ticker',
                                                           'Date/Time'],
                                                   keep='first').to_parquet(
    '/mnt/c/Personal/Projects/PyAlgoMate/pyalgomate/backtesting/data/2023/banknifty-01.parquet', index=False)


In [133]:
df = pd.read_parquet(
    '/mnt/c/Personal/Projects/PyAlgoMate/pyalgomate/backtesting/data/2023/nifty-01.parquet')
df['Ticker'] = df['Ticker'].str.replace('NSEI', 'NIFTY')
df.to_parquet(
    '/mnt/c/Personal/Projects/PyAlgoMate/pyalgomate/backtesting/data/2023/nifty-01.parquet', index=False)
