In [None]:
import pandas as pd
import datetime
import re
import pyalgomate.utils as utils
import os, zipfile, glob
from pyalgomate.utils.expiry import UnderlyingIndex

In [None]:
def getTicker(symbol):
    m = re.match(r"([A-Z\:]+)(\d{2})([A-Z]{3})(\d+)([CP])E", symbol)

    if m is not None:
        month = datetime.datetime.strptime(m.group(3), '%b').month
        year = int(m.group(2)) + 2000
        symbol = m.group(1)

        if symbol == 'BANKNIFTY':
            underlyingIndex = UnderlyingIndex.BANKNIFTY
        elif symbol == 'NIFTY':
            underlyingIndex = UnderlyingIndex.NIFTY
        elif symbol == 'FINNIFTY':
            underlyingIndex = UnderlyingIndex.FINNIFTY
        elif symbol == 'MIDCPNIFTY':
            underlyingIndex = UnderlyingIndex.MIDCAPNIFTY

        expiry = utils.getNearestMonthlyExpiryDate(
            datetime.date(year, month, 1), underlyingIndex)
        
        return f'{m.group(1)}{expiry.strftime("%d%b%y")}{m.group(5)}{int(m.group(4))}'.upper()

    m = re.match(r"([A-Z\:]+)(\d{2})(\d|[OND])(\d{2})(\d+)([CP])E", symbol)

    if m is None:
        return symbol

    day = int(m.group(4))
    month = m.group(3)
    if month == 'O':
        month = 10
    elif month == 'N':
        month = 11
    elif month == 'D':
        month = 12
    else:
        month = int(month)

    year = int(m.group(2)) + 2000
    expiry = datetime.date(year, month, day)

    return f'{m.group(1)}{expiry.strftime("%d%b%y")}{m.group(6)}{int(m.group(5))}'.upper()

# Spot

In [None]:
dirName = "../PyAlgoMate-Strategies/data/temp/"

for filename in glob.iglob(f'{dirName}/**/*.zip', recursive=True):
    print(filename)
    zip_ref = zipfile.ZipFile(filename) # create zipfile object
    zip_ref.extractall(os.path.dirname(filename)) # extract file to dir
    zip_ref.close() # close file
    os.remove(filename) # delete zipped file

In [None]:
columns = ['Ticker', 'Date/Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Open Interest']

finalDf = pd.DataFrame(columns=columns)
for file in glob.glob(f'{dirName}/**/*.csv', recursive=True):
    print(file)
    df = pd.read_csv(file, header=None, names=columns)
    df['Date/Time'] = pd.to_datetime(df['Date/Time'], format='%Y-%m-%dT%H:%M:%S%z').dt.tz_localize(None)
    df['Ticker'] = df['Ticker'].astype(str)
    df['Date/Time'] = pd.to_datetime(df['Date/Time']).dt.tz_localize(None)
    df['Open'] = df['Open'].astype(float)
    df['High'] = df['High'].astype(float)
    df['Low'] = df['Low'].astype(float)
    df['Close'] = df['Close'].astype(float)
    df['Volume'] = df['Volume'].astype(float)
    df['Open Interest'] = df['Open Interest'].astype(float)

    df = df[df['Ticker'].isin(['NIFTY 50', 'NIFTY BANK', 'NIFTY FIN SERVICE', 'NIFTY MID SELECT'])]
    df.Ticker = df.Ticker.str.replace('NIFTY BANK', 'BANKNIFTY').replace('NIFTY 50', 'NIFTY').replace('NIFTY FIN SERVICE', 'FINNIFTY').replace('NIFTY MID SELECT', 'MIDCPNIFTY')

    finalDf = pd.concat([finalDf, df], ignore_index=True)
    os.remove(file)

finalDf.to_parquet('../PyAlgoMate-Strategies/data/2023/spot-10.parquet')

# BANKNIFTY

In [None]:
for filename in glob.iglob(f'{dirName}/**/*.zip', recursive=True):
    print(filename)
    zip_ref = zipfile.ZipFile(filename) # create zipfile object
    zip_ref.extractall(os.path.dirname(filename)) # extract file to dir
    zip_ref.close() # close file
    os.remove(filename) # delete zipped file

columns = ['Ticker', 'Date/Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Open Interest']
finalDf = pd.DataFrame(columns=columns)

for file in glob.glob(f'{dirName}/**/*.parquet', recursive=True):
    print(file)
    df = pd.read_parquet(file)
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%dT%H:%M:%S%z').dt.tz_localize(None)
    df['Symbol'] = df['Symbol'].apply(getTicker)
    df = df.rename(columns={
        'Symbol': 'Ticker',
        'Date': 'Date/Time'
    })
    df['Ticker'] = df['Ticker'].astype(str)
    df['Date/Time'] = pd.to_datetime(df['Date/Time']).dt.tz_localize(None)
    df['Open'] = df['Open'].astype(float)
    df['High'] = df['High'].astype(float)
    df['Low'] = df['Low'].astype(float)
    df['Close'] = df['Close'].astype(float)
    df['Volume'] = df['Volume'].astype(float)
    df['Open Interest'] = df['Open Interest'].astype(float)

    finalDf = pd.concat([finalDf, df], ignore_index=True)
    os.remove(file)

finalDf = finalDf.sort_values(['Ticker', 'Date/Time']).drop_duplicates(subset=['Ticker', 'Date/Time'], keep='first')

finalDf[finalDf.Ticker.str.startswith('BANKNIFTY')].to_parquet('../PyAlgoMate-Strategies/data/2023/banknifty-10.parquet')

# NIFTY

In [None]:
for filename in glob.iglob(f'{dirName}/**/*.zip', recursive=True):
    print(filename)
    zip_ref = zipfile.ZipFile(filename) # create zipfile object
    zip_ref.extractall(os.path.dirname(filename)) # extract file to dir
    zip_ref.close() # close file
    os.remove(filename) # delete zipped file

columns = ['Ticker', 'Date/Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Open Interest']
finalDf = pd.DataFrame(columns=columns)

for file in glob.glob(f'{dirName}/**/*.parquet', recursive=True):
    print(file)
    df = pd.read_parquet(file)
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%dT%H:%M:%S%z').dt.tz_localize(None)
    df['Symbol'] = df['Symbol'].apply(getTicker)
    df = df.rename(columns={
        'Symbol': 'Ticker',
        'Date': 'Date/Time'
    })
    df['Ticker'] = df['Ticker'].astype(str)
    df['Date/Time'] = pd.to_datetime(df['Date/Time']).dt.tz_localize(None)
    df['Open'] = df['Open'].astype(float)
    df['High'] = df['High'].astype(float)
    df['Low'] = df['Low'].astype(float)
    df['Close'] = df['Close'].astype(float)
    df['Volume'] = df['Volume'].astype(float)
    df['Open Interest'] = df['Open Interest'].astype(float)

    finalDf = pd.concat([finalDf, df], ignore_index=True)
    os.remove(file)

finalDf = finalDf.sort_values(['Ticker', 'Date/Time']).drop_duplicates(subset=['Ticker', 'Date/Time'], keep='first')

finalDf[finalDf.Ticker.str.startswith('NIFTY')].to_parquet('../PyAlgoMate-Strategies/data/2023/nifty-10.parquet')

# FINNIFTY

In [None]:
for filename in glob.iglob(f'{dirName}/**/*.zip', recursive=True):
    print(filename)
    zip_ref = zipfile.ZipFile(filename) # create zipfile object
    zip_ref.extractall(os.path.dirname(filename)) # extract file to dir
    zip_ref.close() # close file
    os.remove(filename) # delete zipped file

columns = ['Ticker', 'Date/Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Open Interest']
finalDf = pd.DataFrame(columns=columns)

for file in glob.glob(f'{dirName}/**/*.parquet', recursive=True):
    print(file)
    df = pd.read_parquet(file)
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%dT%H:%M:%S%z').dt.tz_localize(None)
    df['Symbol'] = df['Symbol'].apply(getTicker)
    df = df.rename(columns={
        'Symbol': 'Ticker',
        'Date': 'Date/Time'
    })
    df['Ticker'] = df['Ticker'].astype(str)
    df['Date/Time'] = pd.to_datetime(df['Date/Time']).dt.tz_localize(None)
    df['Open'] = df['Open'].astype(float)
    df['High'] = df['High'].astype(float)
    df['Low'] = df['Low'].astype(float)
    df['Close'] = df['Close'].astype(float)
    df['Volume'] = df['Volume'].astype(float)
    df['Open Interest'] = df['Open Interest'].astype(float)

    finalDf = pd.concat([finalDf, df], ignore_index=True)
    os.remove(file)

finalDf = finalDf.sort_values(['Ticker', 'Date/Time']).drop_duplicates(subset=['Ticker', 'Date/Time'], keep='first')

finalDf[finalDf.Ticker.str.startswith('FINNIFTY')].to_parquet('../PyAlgoMate-Strategies/data/2023/finnifty-10.parquet')

# MIDCPNIFTY

In [None]:
for filename in glob.iglob(f'{dirName}/**/*.zip', recursive=True):
    print(filename)
    zip_ref = zipfile.ZipFile(filename) # create zipfile object
    zip_ref.extractall(os.path.dirname(filename)) # extract file to dir
    zip_ref.close() # close file
    os.remove(filename) # delete zipped file

columns = ['Ticker', 'Date/Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Open Interest']
finalDf = pd.DataFrame(columns=columns)

for file in glob.glob(f'{dirName}/**/*.parquet', recursive=True):
    print(file)
    df = pd.read_parquet(file)
    df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%dT%H:%M:%S%z').dt.tz_localize(None)
    df['Symbol'] = df['Symbol'].apply(getTicker)
    df = df.rename(columns={
        'Symbol': 'Ticker',
        'Date': 'Date/Time'
    })
    df['Ticker'] = df['Ticker'].astype(str)
    df['Date/Time'] = pd.to_datetime(df['Date/Time']).dt.tz_localize(None)
    df['Open'] = df['Open'].astype(float)
    df['High'] = df['High'].astype(float)
    df['Low'] = df['Low'].astype(float)
    df['Close'] = df['Close'].astype(float)
    df['Volume'] = df['Volume'].astype(float)
    df['Open Interest'] = df['Open Interest'].astype(float)

    finalDf = pd.concat([finalDf, df], ignore_index=True)
    os.remove(file)

finalDf = finalDf.sort_values(['Ticker', 'Date/Time']).drop_duplicates(subset=['Ticker', 'Date/Time'], keep='first')

finalDf[finalDf.Ticker.str.startswith('MIDCPNIFTY')].to_parquet('../PyAlgoMate-Strategies/data/2023/midcpnifty-10.parquet')

# Merge

In [33]:
spot = pd.read_parquet('../PyAlgoMate-Strategies/data/2023/spot.parquet')
banknifty = pd.read_parquet('../PyAlgoMate-Strategies/data/2023/banknifty-10.parquet')
nifty = pd.read_parquet('../PyAlgoMate-Strategies/data/2023/nifty-10.parquet')
finnifty = pd.read_parquet('../PyAlgoMate-Strategies/data/2023/finnifty-10.parquet')
midcpnifty = pd.read_parquet('../PyAlgoMate-Strategies/data/2023/midcpnifty-10.parquet')

In [34]:
pd.concat([banknifty, spot[spot.Ticker == 'BANKNIFTY']], ignore_index=True).sort_values(
    ['Ticker', 'Date/Time']).drop_duplicates(
        subset=['Ticker', 'Date/Time'], keep='first').to_parquet('../PyAlgoMate-Strategies/data/2023/banknifty/10.parquet')

In [35]:
pd.concat([nifty, spot[spot.Ticker == 'NIFTY']], ignore_index=True).sort_values(
    ['Ticker', 'Date/Time']).drop_duplicates(
        subset=['Ticker', 'Date/Time'], keep='first').to_parquet('../PyAlgoMate-Strategies/data/2023/nifty/10.parquet')

In [36]:
pd.concat([finnifty, spot[spot.Ticker == 'FINNIFTY']], ignore_index=True).sort_values(
    ['Ticker', 'Date/Time']).drop_duplicates(
        subset=['Ticker', 'Date/Time'], keep='first').to_parquet('../PyAlgoMate-Strategies/data/2023/finnifty/10.parquet')

In [37]:
pd.concat([midcpnifty, spot[spot.Ticker == 'MIDCPNIFTY']], ignore_index=True).sort_values(
    ['Ticker', 'Date/Time']).drop_duplicates(
        subset=['Ticker', 'Date/Time'], keep='first').to_parquet('../PyAlgoMate-Strategies/data/2023/midcpnifty/10.parquet')

In [45]:
pd.read_parquet('../PyAlgoMate-Strategies/data/2023/midcpnifty/10.parquet').head()

Unnamed: 0,Ticker,Date/Time,Open,High,Low,Close,Volume,Open Interest
737071,MIDCPNIFTY,2023-10-03 09:15:00,9112.9,9113.3,9062.55,9066.85,0.0,0.0
737072,MIDCPNIFTY,2023-10-03 09:16:00,9065.25,9068.05,9055.1,9058.4,0.0,0.0
737073,MIDCPNIFTY,2023-10-03 09:17:00,9057.15,9068.6,9055.9,9068.6,0.0,0.0
737074,MIDCPNIFTY,2023-10-03 09:18:00,9067.65,9072.95,9060.15,9061.35,0.0,0.0
737075,MIDCPNIFTY,2023-10-03 09:19:00,9062.05,9074.5,9062.05,9074.5,0.0,0.0
