In [3]:
import os
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt

pd.set_option("display.max_columns", None)

In [4]:
df = pd.read_csv(os.path.join("Data", "1_NIFTY500_5y.csv"), parse_dates=True, index_col=[0])

In [5]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits,Symbol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-11-09 00:00:00+05:30,20364.949219,20830.000000,20000.050781,20398.650391,19615.875000,873.0,0.0,0.0,3MINDIA.NS
2018-11-12 00:00:00+05:30,20650.000000,20655.750000,20380.000000,20464.300781,19679.005859,1134.0,0.0,0.0,3MINDIA.NS
2018-11-13 00:00:00+05:30,20469.949219,20522.750000,20300.000000,20439.300781,19654.966797,1585.0,0.0,0.0,3MINDIA.NS
2018-11-14 00:00:00+05:30,20453.800781,20900.000000,20453.800781,20748.199219,19952.011719,1002.0,0.0,0.0,3MINDIA.NS
2018-11-15 00:00:00+05:30,20889.900391,20899.900391,20500.000000,20608.650391,19817.818359,1593.0,0.0,0.0,3MINDIA.NS
...,...,...,...,...,...,...,...,...,...
2023-11-01 00:00:00+05:30,1961.000000,1986.949951,1926.000000,1980.400024,1980.400024,57973.0,0.0,0.0,ECLERX.NS
2023-11-02 00:00:00+05:30,1989.000000,2002.650024,1941.000000,1976.800049,1976.800049,52914.0,0.0,0.0,ECLERX.NS
2023-11-03 00:00:00+05:30,1978.550049,2005.550049,1960.050049,1966.900024,1966.900024,17424.0,0.0,0.0,ECLERX.NS
2023-11-06 00:00:00+05:30,1968.000000,2063.850098,1963.000000,2043.099976,2043.099976,70924.0,0.0,0.0,ECLERX.NS


In [6]:
df[df['Symbol'] == '3MINDIA.NS']

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits,Symbol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-11-09 00:00:00+05:30,20364.949219,20830.000000,20000.050781,20398.650391,19615.875000,873.0,0.0,0.0,3MINDIA.NS
2018-11-12 00:00:00+05:30,20650.000000,20655.750000,20380.000000,20464.300781,19679.005859,1134.0,0.0,0.0,3MINDIA.NS
2018-11-13 00:00:00+05:30,20469.949219,20522.750000,20300.000000,20439.300781,19654.966797,1585.0,0.0,0.0,3MINDIA.NS
2018-11-14 00:00:00+05:30,20453.800781,20900.000000,20453.800781,20748.199219,19952.011719,1002.0,0.0,0.0,3MINDIA.NS
2018-11-15 00:00:00+05:30,20889.900391,20899.900391,20500.000000,20608.650391,19817.818359,1593.0,0.0,0.0,3MINDIA.NS
...,...,...,...,...,...,...,...,...,...
2023-11-01 00:00:00+05:30,29829.099609,30500.050781,29590.500000,30320.900391,30320.900391,2663.0,0.0,0.0,3MINDIA.NS
2023-11-02 00:00:00+05:30,30320.900391,31450.000000,30320.900391,31325.250000,31325.250000,3070.0,0.0,0.0,3MINDIA.NS
2023-11-03 00:00:00+05:30,31329.000000,31450.000000,31059.900391,31289.099609,31289.099609,1210.0,0.0,0.0,3MINDIA.NS
2023-11-06 00:00:00+05:30,31310.000000,31661.949219,31310.000000,31450.750000,31450.750000,1258.0,0.0,0.0,3MINDIA.NS


## utils.py

In [7]:
def getPriceDifference(df: pd.DataFrame, prd: int):
    
    """
    Calculates price difference for past 1 row and past 5 rows 
    (i.e It will be price difference between past day and pass week if data is having 1-day interval)
    
    Params:
    df: pd.DataFrame
        Dataset in form of dataframe
    pdr: int
        Period to calculate returns
    
    Returns:
    Tuple of pd.Series
    """
    
    # shifted_open_1 = df["Open"].shift(-1)
    # shifted_open_prd = df["Open"].shift(-prd)
    
    # calc_fr_1_rw = ((df["High"] - shifted_open_1) / shifted_open_1) * 100
    # calc_fr_prd_rw = ((df["High"] - shifted_open_prd) / shifted_open_prd) * 100
    
    shifted_high_1 = df["High"].shift(-1)
    shifted_high_prd = df["High"].shift(-prd)
    
    # BullBro
    calc_fr_1_rw = ((shifted_high_1 - df["Low"]) / df["Low"]) * 100
    calc_fr_prd_rw = ((shifted_high_prd - df["Low"]) / df["Low"]) * 100
    
    # ShortStan
    # calc_fr_1_rw = ((df["Low"] - shifted_high_1) / shifted_high_1) * 100
    # calc_fr_prd_rw = ((df["Low"] - shifted_high_prd) / shifted_high_1) * 100

    return calc_fr_1_rw, calc_fr_prd_rw

# raw_df["calc_fr_1_rw"], raw_df["calc_fr_5_rw"] = getPriceDifference(raw_df)

In [8]:
temp = df[df['Symbol'] == "3MINDIA.NS"]
temp['1d'], temp['5d'] = getPriceDifference(temp, 4)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['1d'], temp['5d'] = getPriceDifference(temp, 4)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['1d'], temp['5d'] = getPriceDifference(temp, 4)


In [12]:
temp.tail(10)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits,Symbol,1d,5d
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-10-25 00:00:00+05:30,29615.0,29899.550781,28920.849609,29307.099609,29307.099609,2330.0,0.0,0.0,3MINDIA.NS,2.002016,3.281714
2023-10-26 00:00:00+05:30,29307.099609,29499.849609,28870.0,29170.150391,29170.150391,3270.0,0.0,0.0,3MINDIA.NS,3.910458,5.646175
2023-10-27 00:00:00+05:30,29101.5,29998.949219,29101.449219,29775.400391,29775.400391,2905.0,0.0,0.0,3MINDIA.NS,2.689044,8.070219
2023-10-30 00:00:00+05:30,29884.0,29884.0,29500.0,29597.599609,29597.599609,805.0,0.0,0.0,3MINDIA.NS,1.254065,6.610169
2023-10-31 00:00:00+05:30,29650.050781,29869.949219,29500.0,29762.699219,29762.699219,1045.0,0.0,0.0,3MINDIA.NS,3.390003,7.328641
2023-11-01 00:00:00+05:30,29829.099609,30500.050781,29590.5,30320.900391,30320.900391,2663.0,0.0,0.0,3MINDIA.NS,6.284111,7.04618
2023-11-02 00:00:00+05:30,30320.900391,31450.0,30320.900391,31325.25,31325.25,3070.0,0.0,0.0,3MINDIA.NS,3.723833,
2023-11-03 00:00:00+05:30,31329.0,31450.0,31059.900391,31289.099609,31289.099609,1210.0,0.0,0.0,3MINDIA.NS,1.938348,
2023-11-06 00:00:00+05:30,31310.0,31661.949219,31310.0,31450.75,31450.75,1258.0,0.0,0.0,3MINDIA.NS,1.167359,
2023-11-08 00:00:00+05:30,31294.699219,31675.5,31239.150391,31573.900391,31573.900391,491.0,0.0,0.0,3MINDIA.NS,,


In [13]:
from copy import deepcopy as dc

def prepare_dataframe_for_lstm(df, n_steps):
    df = dc(df)
    
    # df.set_index('Date', inplace=True)
    
    for i in range(1, n_steps+1):
        df[f'Open(t-{i})'] = df['Open'].shift(i)
        df[f'High(t-{i})'] = df['High'].shift(i)
        df[f'Low(t-{i})'] = df['Low'].shift(i)
        df[f'Close(t-{i})'] = df['Close'].shift(i)
        
    df.dropna(inplace=True)
    
    return df

# lookback = 4
# shifted_df = prepare_dataframe_for_lstm(data, lookback)
# shifted_df

In [15]:
sym_lst = df['Symbol'].unique()
df_grpby = df.groupby('Symbol')
targets = pd.DataFrame()

# adding returns based on number of trading days
for grp in df_grpby:
    # swing, positional, business cylic
    # _, grp[1]['1w'] = getPriceDifference(grp[1], 5)
    # _, grp[1]['2w'] = getPriceDifference(grp[1], 10)
    # _, grp[1]['3w'] = getPriceDifference(grp[1], 15)
    # _, grp[1]['1m'] = getPriceDifference(grp[1], 20)
    # _, grp[1]['5w'] = getPriceDifference(grp[1], 25)
    # _, grp[1]['6w'] = getPriceDifference(grp[1], 30)
    # _, grp[1]['7w'] = getPriceDifference(grp[1], 35)
    # _, grp[1]['2m'] = getPriceDifference(grp[1], 40)
    # _, grp[1]['9w'] = getPriceDifference(grp[1], 45)
    # _, grp[1]['10w'] = getPriceDifference(grp[1], 50)
    # _, grp[1]['11w'] = getPriceDifference(grp[1], 55)
    # _, grp[1]['3m'] = getPriceDifference(grp[1], 60)
    
    # # half-yearly
    # _, grp[1]['6m'] = getPriceDifference(grp[1], 125)
    
    # # annual rotators
    # _, grp[1]['1y'] = getPriceDifference(grp[1], 252)
    
    # # long term
    # _, grp[1]['3y'] = getPriceDifference(grp[1], 756)
    # _, grp[1]['5y'] = getPriceDifference(grp[1], 1260)

    grp[1]["calc_fr_1_rw"], grp[1]["calc_fr_5_rw"] = getPriceDifference(grp[1], 4)
    
    targets = targets.append(grp[1])


  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.

  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])
  targets = targets.append(grp[1])


In [16]:
targets.shape

(570593, 11)

In [17]:
targets

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits,Symbol,calc_fr_1_rw,calc_fr_5_rw
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-11-09 00:00:00+05:30,20364.949219,20830.000000,20000.050781,20398.650391,19615.875000,873.0,0.0,0.0,3MINDIA.NS,3.278488,4.499237
2018-11-12 00:00:00+05:30,20650.000000,20655.750000,20380.000000,20464.300781,19679.005859,1134.0,0.0,0.0,3MINDIA.NS,0.700442,2.011038
2018-11-13 00:00:00+05:30,20469.949219,20522.750000,20300.000000,20439.300781,19654.966797,1585.0,0.0,0.0,3MINDIA.NS,2.955665,1.969953
2018-11-14 00:00:00+05:30,20453.800781,20900.000000,20453.800781,20748.199219,19952.011719,1002.0,0.0,0.0,3MINDIA.NS,2.181011,0.224897
2018-11-15 00:00:00+05:30,20889.900391,20899.900391,20500.000000,20608.650391,19817.818359,1593.0,0.0,0.0,3MINDIA.NS,1.413901,1.170246
...,...,...,...,...,...,...,...,...,...,...,...
2023-11-01 00:00:00+05:30,1532.050049,1539.000000,1506.099976,1523.050049,1523.050049,12890.0,0.0,0.0,ZYDUSWELL.NS,3.578781,2.204374
2023-11-02 00:00:00+05:30,1530.000000,1560.000000,1522.000000,1547.849976,1547.849976,14789.0,0.0,0.0,ZYDUSWELL.NS,2.595269,
2023-11-03 00:00:00+05:30,1548.849976,1561.500000,1541.000000,1543.250000,1543.250000,23836.0,0.0,0.0,ZYDUSWELL.NS,3.046720,
2023-11-06 00:00:00+05:30,1540.000000,1587.949951,1512.400024,1555.599976,1555.599976,63393.0,0.0,0.0,ZYDUSWELL.NS,1.778632,


In [19]:
targets['rolling_return'] = np.where(targets['calc_fr_5_rw'] >= 10, 1, 0)
# targets['rolling_return'] = np.where(targets['calc_fr_5_rw'] <= -3, 1, 0)
print((targets[targets['rolling_return'] == 1].shape[0] / targets.shape[0]) * 100)

11.697830152139966


In [13]:
# getIndicators.py

def getRSI(df: pd.DataFrame, timeperiod: int = 14, bands: tuple = (30, 70)) -> pd.DataFrame:
    """
    MOMENTUM INDICATOR
    Calculates relative strength index indicator with features

    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 14 rows
        Time-periods in rows to consider while calculating RSI values
    bands: int, default = (30, 70)
        Bands representing rsi values to acknowledge oversold and overbought signals respectively
      
    Returns:
    pd.DataFrame of rsi, oversold, overbought & price difference in percentage
    """
    
    df_fn = pd.DataFrame(columns = ["rsi" + str(timeperiod) + "_RSI", "oversold_RSI", "overbought_RSI", "div_RSI", "conv_RSI", 
                                    "div_1_price_diff_values_RSI", "conv_1_price_diff_values_RSI",
                                    "div_5_price_diff_values_RSI", "conv_5_price_diff_values_RSI"],
                         index = df.index)
    
    rsi = pd.Series(ta.RSI(df["Close"], timeperiod = timeperiod))
    oversold = pd.Series(np.where(rsi <= bands[0], 1, 0)).values # upward momentum
    overbought = pd.Series(np.where(rsi >= bands[1], 1, 0)).values # downward momentum
    
    shifted_rsi = rsi.shift(1)
    div_RSI = pd.Series(np.where((rsi > bands[1]) & (shifted_rsi <=  bands[1]), 1, 0))
    conv_RSI = pd.Series(np.where((rsi < bands[0]) & (shifted_rsi >=  bands[0]), 1, 0))
    shifted_div_RSI = div_RSI.shift(1).values
    shifted_conv_RSI = conv_RSI.shift(1).values
    
    div_1_price_diff_values_RSI = pd.Series(np.where(shifted_div_RSI == 1, df["calc_fr_1_rw"], 0)).values
    conv_1_price_diff_values_RSI = pd.Series(np.where(shifted_conv_RSI == 1, df["calc_fr_1_rw"], 0)).values
    div_5_price_diff_values_RSI = pd.Series(np.where(shifted_div_RSI == 1, df["calc_fr_5_rw"], 0)).values
    conv_5_price_diff_values_RSI = pd.Series(np.where(shifted_conv_RSI == 1, df["calc_fr_5_rw"], 0)).values
    
    asgn_list = [rsi, oversold, overbought, div_RSI.values, conv_RSI.values,
                 div_1_price_diff_values_RSI, conv_1_price_diff_values_RSI,
                 div_5_price_diff_values_RSI, conv_5_price_diff_values_RSI]
    
    for cols, vals in zip(list(df_fn.columns), asgn_list):
        df_fn[cols] = vals
    
    return df_fn

def getSMA(df: pd.DataFrame, timeperiod: int = 7, other_sma: int = 21) -> pd.DataFrame:
    """
    TREND INDICATOR
    Calculates simple moving average indicator with features

    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 7 rows
        Time-periods in rows to consider while calculating SMA values
    other_sma: int, default = 21 rows
        Other SMA to calculate percentage price difference
    
    Returns:
    pd.DataFrame with calculated features
    """
    
    df_fn = pd.DataFrame(columns = [str(timeperiod) + "_SMA", str(other_sma) + "_SMA", "div_SMA", "conv_SMA", 
                                    "div_1_price_diff_values_SMA", "conv_1_price_diff_values_SMA",
                                    "div_5_price_diff_values_SMA", "conv_5_price_diff_values_SMA",
                                    "ratio_" + str(timeperiod) + "_SMA", "ratio_" + str(other_sma) + "_SMA",
                                    "std_" + str(timeperiod) + "_SMA", "std_" + str(other_sma) + "_SMA",
                                    "trend_" + str(timeperiod) + "_" + str(other_sma) + "_SMA"],
                         index = df.index)
    
    sma1 = pd.Series(ta.SMA(df["Close"], timeperiod = timeperiod))
    sma2 = pd.Series(ta.SMA(df["Close"], timeperiod = other_sma))
    
    shifted_sma1 = sma1.shift(1)
    shifted_sma2 = sma2.shift(1)
    div_SMA = pd.Series(np.where((sma1 < sma2) & (shifted_sma1 > shifted_sma2), 1, 0))
    conv_SMA = pd.Series(np.where((sma1 > sma2) & (shifted_sma1 < shifted_sma2), 1, 0))
    shifted_div_SMA = div_SMA.shift(1).values
    shifted_conv_SMA = conv_SMA.shift(1).values
    
    div_1_price_diff_values_SMA = pd.Series(np.where(shifted_div_SMA == 1, df["calc_fr_1_rw"], 0)).values
    conv_1_price_diff_values_SMA = pd.Series(np.where(shifted_conv_SMA == 1, df["calc_fr_1_rw"], 0)).values
    div_5_price_diff_values_SMA = pd.Series(np.where(shifted_div_SMA == 1, df["calc_fr_5_rw"], 0)).values
    conv_5_price_diff_values_SMA = pd.Series(np.where(shifted_conv_SMA == 1, df["calc_fr_5_rw"], 0)).values
    
    sma1_ratio = df["Close"] / sma1
    sma2_ratio = df["Close"] / sma2
    std1 = sma1.rolling(5).std()
    std2 = sma2.rolling(5).std()
    trend = pd.Series(np.where(sma1 > sma2, 1, 0)).values
    
    asgn_list = [sma1, sma2, div_SMA.values, conv_SMA.values,
                 div_1_price_diff_values_SMA, conv_1_price_diff_values_SMA,
                 div_5_price_diff_values_SMA, conv_5_price_diff_values_SMA,
                 sma1_ratio, sma2_ratio,
                 std1, std2,
                 trend]
    
    for cols, vals in zip(list(df_fn.columns), asgn_list):
        df_fn[cols] = vals
        
    return df_fn

def getEMA(df: pd.DataFrame, timeperiod: int = 7, other_ema: int = 21) -> pd.DataFrame:
    """
    TREND INDICATOR
    Calculates exponential moving average indicator with features

    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 7 rows
        Time-periods in rows to consider while calculating EMA values
    other_ema: int, default = 21 rows
        Other EMA to calculate percentage price difference
    
    Returns:
    pd.DataFrame with calculated features
    """
    
    df_fn = pd.DataFrame(columns = [str(timeperiod) + "_EMA", str(other_ema) + "_EMA", "div_EMA", "conv_EMA", 
                                    "div_1_price_diff_values_EMA", "conv_1_price_diff_values_EMA",
                                    "div_5_price_diff_values_EMA", "conv_5_price_diff_values_EMA",
                                    "ratio_" + str(timeperiod) + "_EMA", "ratio_" + str(other_ema) + "_EMA",
                                    "std_" + str(timeperiod) + "_EMA", "std_" + str(other_ema) + "_EMA",
                                    "trend_" + str(timeperiod) + str(other_ema) + "_EMA"],
                         index = df.index)
    
    ema1 = pd.Series(ta.EMA(df["Close"], timeperiod = timeperiod))
    ema2 = pd.Series(ta.EMA(df["Close"], timeperiod = other_ema))
    
    shifted_ema1 = ema1.shift(1)
    shifted_ema2 = ema2.shift(1)
    div_EMA = pd.Series(np.where((ema1 < ema2) & (shifted_ema1 > shifted_ema2), 1, 0))
    conv_EMA = pd.Series(np.where((ema1 > ema2) & (shifted_ema1 < shifted_ema2), 1, 0))
    shifted_div_EMA = div_EMA.shift(1).values
    shifted_conv_EMA = conv_EMA.shift(1).values
    
    div_1_price_diff_values_EMA = pd.Series(np.where(shifted_div_EMA == 1, df["calc_fr_1_rw"], 0)).values
    conv_1_price_diff_values_EMA = pd.Series(np.where(shifted_conv_EMA == 1, df["calc_fr_1_rw"], 0)).values
    div_5_price_diff_values_EMA = pd.Series(np.where(shifted_div_EMA == 1, df["calc_fr_5_rw"], 0)).values
    conv_5_price_diff_values_EMA = pd.Series(np.where(shifted_conv_EMA == 1, df["calc_fr_5_rw"], 0)).values
    
    ema1_ratio = df["Close"] / ema1
    ema2_ratio = df["Close"] / ema2
    std1 = ema1.rolling(5).std()
    std2 = ema2.rolling(5).std()
    trend = pd.Series(np.where(ema1 > ema2, 1, 0)).values
    
    asgn_list = [ema1, ema2, div_EMA.values, conv_EMA.values,
                 div_1_price_diff_values_EMA, conv_1_price_diff_values_EMA,
                 div_5_price_diff_values_EMA, conv_5_price_diff_values_EMA,
                 ema1_ratio, ema2_ratio,
                 std1, std2,
                 trend]
    
    for cols, vals in zip(list(df_fn.columns), asgn_list):
        df_fn[cols] = vals
        
    return df_fn

def getDEMA(df: pd.DataFrame, timeperiod: int = 7, other_dema: int = 21) -> pd.DataFrame:
    """
    TREND INDICATOR
    Calculates double exponential moving average indicator with features

    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 7 rows
        Time-periods in rows to consider while calculating DEMA values
    other_dema: int, default = 21 rows
        Other DEMA to calculate percentage price difference
    
    Returns:
    pd.DataFrame with calculated features
    """
    
    df_fn = pd.DataFrame(columns = [str(timeperiod) + "_DEMA", str(other_dema) + "_DEMA", "div_DEMA", "conv_DEMA", 
                                    "div_1_price_diff_values_DEMA", "conv_1_price_diff_values_DEMA",
                                    "div_5_price_diff_values_DEMA", "conv_5_price_diff_values_DEMA",
                                    "ratio_" + str(timeperiod) + "_DEMA", "ratio_" + str(other_dema) + "_DEMA",
                                    "std_" + str(timeperiod) + "_DEMA", "std_" + str(other_dema) + "_DEMA",
                                    "trend" + str(timeperiod) + str(other_dema) + "_DEMA"],
                         index = df.index)
    
    dema1 = pd.Series(ta.DEMA(df["Close"], timeperiod = timeperiod))
    dema2 = pd.Series(ta.DEMA(df["Close"], timeperiod = other_dema))
    
    shifted_dema1 = dema1.shift(1)
    shifted_dema2 = dema2.shift(1)
    div_DEMA = pd.Series(np.where((dema1 < dema2) & (shifted_dema1 > shifted_dema2), 1, 0))
    conv_DEMA = pd.Series(np.where((dema1 > dema2) & (shifted_dema1 < shifted_dema2), 1, 0))
    shifted_div_DEMA = div_DEMA.shift(1).values
    shifted_conv_DEMA = conv_DEMA.shift(1).values
    
    div_1_price_diff_values_DEMA = pd.Series(np.where(shifted_div_DEMA == 1, df["calc_fr_1_rw"], 0)).values
    conv_1_price_diff_values_DEMA = pd.Series(np.where(shifted_conv_DEMA == 1, df["calc_fr_1_rw"], 0)).values
    div_5_price_diff_values_DEMA = pd.Series(np.where(shifted_div_DEMA == 1, df["calc_fr_5_rw"], 0)).values
    conv_5_price_diff_values_DEMA = pd.Series(np.where(shifted_conv_DEMA == 1, df["calc_fr_5_rw"], 0)).values
    
    dema1_ratio = df["Close"] / dema1
    dema2_ratio = df["Close"] / dema2
    std1 = dema1.rolling(5).std()
    std2 = dema2.rolling(5).std()
    trend = pd.Series(np.where(dema1 > dema2, 1, 0)).values
    
    asgn_list = [dema1, dema2, div_DEMA.values, conv_DEMA.values,
                 div_1_price_diff_values_DEMA, conv_1_price_diff_values_DEMA,
                 div_5_price_diff_values_DEMA, conv_5_price_diff_values_DEMA,
                 dema1_ratio, dema2_ratio,
                 std1, std2,
                 trend]
    
    for cols, vals in zip(list(df_fn.columns), asgn_list):
        df_fn[cols] = vals
        
    return df_fn

def getBBANDS(df: pd.DataFrame, sma: int = 20) -> pd.DataFrame:
    """
    MOMENTUM INDICATOR
    Calculates bollinger bands ranging bands indicator with features
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    sma = int, default = 20 rows
        Time-periods to look back while calculating SMA values
        
    Returns:
    pd.DataFrame with calculated features
    """
    
    df_fn = pd.DataFrame(columns = ["lower_BBANDS", "upper_BBANDS", "pos_momentum_BBANDS", "neg_momentum_BBANDS",
                                    "positive_momentum_1_price_diff_BBANDS", "negative_momentum_1_price_diff_BBANDS",
                                    "positive_momentum_5_price_diff_BBANDS", "negative_momentum_5_price_diff_BBANDS"],
                         index = df.index)
    
    sma_val = df["Close"].rolling(sma).mean()
    lower = sma_val - 2 * (df["Close"].rolling(sma).std())
    upper = sma_val + 2 * (df["Close"].rolling(sma).std())
    
    pos_momentum = pd.Series(np.where(df["Close"] < lower, 1, 0))
    neg_momentum = pd.Series(np.where(df["Close"] > upper, 1, 0))
    shifted_pos_momentum = pos_momentum.shift(1).values
    shifted_neg_momentum = neg_momentum.shift(1).values
    
    positive_momentum_1_price_diff_BBANDS = pd.Series(np.where(shifted_pos_momentum == 1, df["calc_fr_1_rw"], 0)).values
    negative_momentum_1_price_diff_BBANDS = pd.Series(np.where(shifted_neg_momentum == 1, df["calc_fr_1_rw"], 0)).values
    positive_momentum_5_price_diff_BBANDS = pd.Series(np.where(shifted_pos_momentum == 1, df["calc_fr_5_rw"], 0)).values
    negative_momentum_5_price_diff_BBANDS = pd.Series(np.where(shifted_neg_momentum == 1, df["calc_fr_5_rw"], 0)).values
    
    asgn_list = [lower, upper, pos_momentum.values, neg_momentum.values,
                 positive_momentum_1_price_diff_BBANDS, negative_momentum_1_price_diff_BBANDS,
                 positive_momentum_5_price_diff_BBANDS, negative_momentum_5_price_diff_BBANDS]
    
    for cols, vals in zip(list(df_fn.columns), asgn_list):
        df_fn[cols] = vals
    
    return df_fn

def getCCI(df: pd.DataFrame, timeperiod: int = 14) -> pd.DataFrame:
    """
    TREND INDICATOR
    Calculates commodity channel index indicator with features
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 14 rows
        Time-periods in rows to consider while calculating CCI values
        
    Returns:
    pd.DataFrame with calculated features
    """
    
    df_fn = pd.DataFrame(columns = [str(timeperiod) + "_CCI", "div_CCI", "conv_CCI",
                                    "div_1_price_diff_values_CCI", "conv_1_price_diff_values_CCI",
                                    "div_5_price_diff_values_CCI", "conv_5_price_diff_values_CCI",
                                    "trend_CCI"],
                         index = df.index)
    
    cci = pd.Series(ta.CCI(df["High"], df["Low"], df["Close"], timeperiod = timeperiod))
    
    shifted_cci = cci.shift(1).values
    div_CCI = pd.Series(np.where((shifted_cci > 100) & (cci < 100), 1, 0))
    conv_CCI = pd.Series(np.where((shifted_cci < -100) & (cci > -100), 1, 0))
    shifted_div_CCI = div_CCI.shift(1).values
    shifted_conv_CCI = conv_CCI.shift(1).values

    div_1_price_diff_values_CCI = pd.Series(np.where(shifted_div_CCI == 1, df["calc_fr_1_rw"], 0)).values
    conv_1_price_diff_values_CCI = pd.Series(np.where(shifted_conv_CCI == 1, df["calc_fr_1_rw"], 0)).values
    div_5_price_diff_values_CCI = pd.Series(np.where(shifted_div_CCI == 1, df["calc_fr_5_rw"], 0)).values
    conv_5_price_diff_values_CCI = pd.Series(np.where(shifted_conv_CCI == 1, df["calc_fr_5_rw"], 0)).values
    
    trend = cci.diff().values
    
    asgn_list = [cci, div_CCI.values, conv_CCI.values,
                 div_1_price_diff_values_CCI, conv_1_price_diff_values_CCI,
                 div_5_price_diff_values_CCI, conv_5_price_diff_values_CCI,
                 trend]
    
    for cols, vals in zip(list(df_fn.columns), asgn_list):
        df_fn[cols] = vals
    
    return df_fn

def getMACD(df: pd.DataFrame, fast_ema: int = 12, slow_ema: int = 26, signal_period: int = 9) -> pd.DataFrame:
    """
    MOMENTUM INDICATOR
    Calcualtes moving average convergence and divergence with features
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    fast_ema: int, default = 12 rows
        Fast EMA lookback period to calculate
    slow_ema: int, default = 26 rows
        Slow EMA lookback period to calculate
    signal_period: int, default = 9 rows
        EMA lookback period of macd line
    
    Returns:
    pd.DataFrame with calculated features
    """
    
    df_fn = pd.DataFrame(columns = ["macd_MACD", "signal_MACD", "hist_MACD", "diff_MACD", "div_MACD", "conv_MACD",
                                    "div_1_price_diff_values_MACD", "conv_1_price_diff_values_MACD",
                                    "div_5_price_diff_values_MACD", "conv_5_price_diff_values_MACD",],
                         index = df.index)
    
    macd, signal, hist = ta.MACD(df["Close"], fastperiod = fast_ema, slowperiod = slow_ema,
                                 signalperiod = signal_period)
    
    diff = abs(macd - signal)
    
    shifted_macd = macd.shift(1)
    shifted_signal = signal.shift(1)
    div_MACD = pd.Series(np.where((macd < signal) & (shifted_macd > shifted_signal), 1, 0))
    conv_MACD = pd.Series(np.where((macd > signal) & (shifted_macd < shifted_signal), 1, 0))
    shifted_div_MACD = div_MACD.shift(1).values
    shifted_conv_MACD = conv_MACD.shift(1).values
    
    div_1_price_diff_values_MACD = pd.Series(np.where(shifted_div_MACD == 1, df["calc_fr_1_rw"], 0)).values
    conv_1_price_diff_values_MACD = pd.Series(np.where(shifted_conv_MACD == 1, df["calc_fr_1_rw"], 0)).values
    div_5_price_diff_values_MACD = pd.Series(np.where(shifted_div_MACD == 1, df["calc_fr_5_rw"], 0)).values
    conv_5_price_diff_values_MACD = pd.Series(np.where(shifted_conv_MACD == 1, df["calc_fr_5_rw"], 0)).values
    
    asgn_list = [macd, signal, hist, diff, div_MACD.values, conv_MACD.values,
                div_1_price_diff_values_MACD, conv_1_price_diff_values_MACD,
                div_5_price_diff_values_MACD, conv_5_price_diff_values_MACD]
    
    for cols, vals in zip(list(df_fn.columns), asgn_list):
        df_fn[cols] = vals
    
    return df_fn

def getROC(df: pd.DataFrame, timeperiod: int = 10) -> pd.DataFrame:
    """
    MOMENTUM INDICATOR
    Calculates rate of change indicator with features
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 10 rows
        Lookback rows to calculate ROC values
        
    Returns:
    pd.DataFrame with calculated features
    """
    
    df_fn = pd.DataFrame(columns = [str(timeperiod) + "_ROC", "div_ROC", "trend_ROC"],
                         index = df.index)
    
    roc = pd.Series(ta.ROC(df["Close"], timeperiod = timeperiod))
    
    div_ROC = df["Close"] - roc
    
    shifted_roc = roc.shift(1)
    trend = np.sign(roc - shifted_roc)
    
    asgn_list = [roc, div_ROC, trend]
    
    for cols, vals in zip(list(df_fn.columns), asgn_list):
        df_fn[cols] = vals
    
    return df_fn

def getSTOCH(df: pd.DataFrame, sma_period: int = 14, sma_period_for_k: int = 3, bands: tuple = (20, 80)) -> pd.DataFrame:
    """
    MOMENTUM INDICATOR
    Adds STOCH indicator based on index
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    sma_period: int, default = 14 rows
        Lookback rows to calculate SMA values
    sma_period_for_k: int, default = 3 rows
        Lookback period to calculate moving average of k-line
    bands: tuple, default = (20, 80)
        Threshold range to consider oversold and overbought
        
    Returns:
    pd.DataFrame with calculated features
    """
    
    df_fn = pd.DataFrame(columns = ["%K_STOCH", "%D_STOCH", "%KCross_STOCH", "%KDiv_STOCH", "%DDiv_STOCH",
                                    "%KOversold_STOCH", "%KOverbought_STOCH", "%DOversold_STOCH", "%DOverbought_STOCH",
                                    "%KRoc_STOCH", "%DRoc_STOCH"],
                         index = df.index)
    
    sma = ta.SMA(df["Close"], timeperiod = sma_period)
    high = sma.max()
    low = sma.min()
    
    k = 100 * ((df["Close"] - low) / (high - low))
    d = k.rolling(window = sma_period_for_k).mean()

    shifted_k = k.shift(1)
    shifted_d = k.shift(1)
    pos_k_cross = pd.Series(np.where((k > d) & (shifted_k <= shifted_d), 1, 0)).values
    neg_k_cross = pd.Series(np.where((k < d) & (shifted_k >= shifted_d), -1, 0)).values
    
    shifted_close = df["Close"].shift(1)
    pos_k_div = pd.Series(np.where((k < df["Close"]) & (shifted_k >= shifted_close), 1, 0)).values
    neg_k_div = pd.Series(np.where((k > df["Close"]) & (shifted_k <= shifted_close), -1, 0)).values
    pos_d_div = pd.Series(np.where((d < df["Close"]) & (shifted_d >= shifted_close), 1, 0)).values
    neg_d_div = pd.Series(np.where((d > df["Close"]) & (shifted_d <= shifted_close), -1, 0)).values
    
    k_oversold = pd.Series(np.where(k < bands[0], 1, 0)).values
    k_overbought = pd.Series(np.where(k > bands[1], 1, 0)).values
    d_oversold = pd.Series(np.where(d < bands[0], 1, 0)).values
    d_overbought = pd.Series(np.where(d > bands[1], 1, 0)).values
    
    k_roc = k.pct_change()
    d_roc = d.pct_change()

    
    asgn_list = [k, d, pos_k_cross + neg_k_cross, pos_k_div + neg_k_div, pos_d_div + neg_d_div,
                k_oversold, k_overbought, d_oversold, d_overbought,
                k_roc, d_roc,
                ]
    
    for cols, vals in zip(list(df_fn.columns), asgn_list):
        df_fn[cols] = vals
    
    return df_fn

def getMFI(df: pd.DataFrame, timeperiod: int = 14, pct_change_period: int = 5, bands: tuple = (20, 80)) -> pd.DataFrame:
    """
    MOMENTUM INDICATOR
    Adds MFI indicator based on index
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 14 rows
        Lookback rows to calculate MFI values
    pct_change_period: int, default = 5 rows
        Rows to consider when calculating percentage change in MFI values
     bands: tuple, default = (20, 80)
        Threshold range to consider oversold and overbought
    
    Returns:
    pd.DataFrame with calculated features
    """
    
    df_fn = pd.DataFrame(columns = ["mfi" + str(timeperiod) + "_MFI", "typicalPrc_MFI", "moneyFlow_MFI", "posMoneyFlow_MFI", "negMoneyFlow_MFI",
                                   "moneyFlowRatio_MFI", "oversold_MFI", "overbought_MFI", "change_MFI", "div_MFI",
                                   "movingAverage_MFI", "crossOverAbove_MFI", "momentum_MFI"],
                         index = df.index)
    
    mfi = pd.Series(ta.MFI(df["High"], df["Low"], df["Close"], df["Volume"], timeperiod = timeperiod))
    typical_price = (df["High"] + df["Low"] + df["Close"]) / 3
    
    shifted_typical_price = typical_price.shift(1)
    money_flow = typical_price * df["Volume"]
    pos_money_flow = pd.Series(np.where(typical_price > shifted_typical_price, money_flow, 0))
    neg_money_flow = pd.Series(np.where(typical_price <= shifted_typical_price, money_flow, 0))
    money_flow_ratio = pos_money_flow.rolling(window = 14).sum().values / neg_money_flow.rolling(window = 14).sum().values
    
    oversold = pd.Series(np.where(mfi < bands[0], 1, 0)).values
    overbought = pd.Series(np.where(mfi > bands[1], 1, 0)).values
    change = mfi.pct_change(periods = pct_change_period)
    
    div = mfi - df["Close"]
    moving_avg = mfi.rolling(window = 5).mean()
    crossover_above = pd.Series(np.where(mfi > moving_avg, 1, 0)).values
    momentum = mfi.diff(periods = 5)
    
    asgn_list = [mfi, typical_price, money_flow, pos_money_flow.values, neg_money_flow.values,
                 money_flow_ratio, oversold, overbought, change, div,
                 moving_avg, crossover_above, momentum]
    
    for cols, vals in zip(list(df_fn.columns), asgn_list):
        df_fn[cols] = vals
    
    return df_fn

In [14]:
# addIndicators.py

def addRSI(df: pd.DataFrame, timeperiod: int = 14, bands: tuple = (30, 70)) -> None:
    """
    Adds RSI indicator based on index
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 14 rows
        Time-periods in rows to consider while calculating RSI values
    bands: int, default = (30, 70)
        Bands representing rsi values to acknowledge oversold and overbought signals respectively
    """
    
    # get RSI related values from RSI indicator
    df_fn = getRSI(df, timeperiod, bands)
    # fill RSI related values in our original dataframe    
    for column in list(df_fn.columns):
        df.loc[df.index.isin(df_fn.index), column] = df_fn[column].values
    
    # print("addRSI() finished ...")

def addSMA(df: pd.DataFrame, timeperiod: int = 7, other_sma: int = 21) -> None:
    """
    Adds SMA indicator based on index
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 7 rows
        Time-periods in rows to consider while calculating SMA values
    other_sma: int, default = 21 rows
        Other SMA to calculate percentage price difference
    """
    
    # get SMA related values from SMA indicator
    df_fn = getSMA(df, timeperiod, other_sma)
    # fill SMA related values in our original dataframe    
    for column in list(df_fn.columns):
        df.loc[df.index.isin(df_fn.index), column] = df_fn[column].values
        
    # print("addSMA() finished ... ")

def addEMA(df: pd.DataFrame, timeperiod: int = 7, other_ema: int = 21) -> None:
    """
    Adds EMA indicator based on index
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 7 rows
        Time-periods in rows to consider while calculating EMA values
    other_dema: int, default = 21 rows
        Other EMA to calculate percentage price difference
    """
    
    # get EMA related values from EMA indicator
    df_fn = getEMA(df, timeperiod, other_ema)
    # fill EMA related values in our original dataframe    
    for column in list(df_fn.columns):
        df.loc[df.index.isin(df_fn.index), column] = df_fn[column].values
        
    # print("addEMA() finished ... ")

def addDEMA(df: pd.DataFrame, timeperiod: int = 7, other_dema: int = 21) -> None:
    """
    Adds DEMA indicator based on index
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 7 rows
        Time-periods in rows to consider while calculating DEMA values
    other_dema: int, default = 21 rows
        Other DEMA to calculate percentage price difference
    """
    
    # get DEMA related values from DEMA indicator
    df_fn = getDEMA(df, timeperiod, other_dema)
    # fill DEMA related values in our original dataframe    
    for column in list(df_fn.columns):
        df.loc[df.index.isin(df_fn.index), column] = df_fn[column].values
        
    # print("addDEMA() finished ... ")

def addBBANDS(df: pd.DataFrame, sma: int = 20) -> None:
    """
    Adds BBANDS indicator based on index
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    sma = int, default = 20 rows
        Time-periods to look back while calculating SMA values
    """
    
    # get BBANDS related values from BBANDS indicator
    df_fn = getBBANDS(df, sma)
    # fill BBANDS related values in our original dataframe    
    for column in list(df_fn.columns):
        df.loc[df.index.isin(df_fn.index), column] = df_fn[column].values
    
    # print("addBBANDS() finished ... ")

def addCCI(df: pd.DataFrame, timeperiod: int = 14) -> None:
    """
    Adds CCI indicator based on index
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 14 rows
        Time-periods in rows to consider while calculating CCI values
    """
    
    # get CCI related values from CCI indicator
    df_fn = getCCI(df, timeperiod)
    # fill CCI related values in our original dataframe
    for column in list(df_fn.columns):
        df.loc[df.index.isin(df_fn.index), column] = df_fn[column].values
    
    # print("addCCI() finished ... ")
    
def addMACD(df: pd.DataFrame, fast_ema: int = 12, slow_ema: int = 26, signal_period: int = 9) -> None:
    """
    Adds MACD indicator based on index
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    fast_ema: int, default = 12 rows
        Fast EMA lookback period to calculate
    slow_ema: int, default = 26 rows
        Slow EMA lookback period to calculate
    signal_period: int, default = 9 rows
        EMA lookback period of macd line
    """
    
    # get MACD related values from MACD indicator
    df_fn = getMACD(df, fast_ema, slow_ema, signal_period)
    # fill MACD related values in our original dataframe    
    for column in list(df_fn.columns):
        df.loc[df.index.isin(df_fn.index), column] = df_fn[column].values
    
    # print("addMACD() finished ... ")

def addROC(df: pd.DataFrame, timeperiod: int = 10):
    """
    Adds ROC indicator based on index
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 10 rows
        Lookback rows to calculate ROC values
    """
    
    # get ROC related values from ROC indicator
    df_fn = getROC(df, timeperiod)
    # fill ROC related values in our original dataframe    
    for column in list(df_fn.columns):
        df.loc[df.index.isin(df_fn.index), column] = df_fn[column].values
    
    # print("addROC() finished ... ")

def addSTOCH(df: pd.DataFrame, sma_period: int = 14, sma_period_for_k: int = 3, bands: tuple = (20, 80)) -> None:
    """
    Adds STOCH indicator based on index
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    sma_period: int, default = 14 rows
        Lookback rows to calculate SMA values
    sma_period_for_k: int, default = 3 rows
        Lookback period to calculate moving average of k-line
    bands: tuple, default = (20, 80)
        Threshold range to consider oversold and overbought
    """
    
    # get STOCH related values from STOCH indicator
    df_fn = getSTOCH(df, sma_period, sma_period_for_k, bands)
    # fill STOCH related values in our original dataframe    
    for column in list(df_fn.columns):
        df.loc[df.index.isin(df_fn.index), column] = df_fn[column].values
    
    # print("addSTOCH() finished ... ")

def addMFI(df: pd.DataFrame, timeperiod: int = 14, pct_change_period: int = 5, bands: tuple = (20, 80)) -> None:
    """
    Adds MFI indicator based on index
    
    Params:
    df = pd.DataFrame
        Dataset in form of dataframe
    timeperiod: int, default = 14 rows
        Lookback rows to calculate MFI values
    pct_change_period: int, default = 5 rows
        Rows to calculate percentage change in MFI values
     bands: tuple, default = (20, 80)
        Threshold range to consider oversold and overbought    
    """
    
    # get MFI related values from MFI indicator
    df_fn = getMFI(df, timeperiod)
    # fill MFI related values in our original dataframe    
    for column in list(df_fn.columns):
        df.loc[df.index.isin(df_fn.index), column] = df_fn[column].values
    
    # print("addMFI() finished ... ")

In [15]:
# indicators.py

def addAllIndicators(df):
    addRSI(df) # 9 features
    addSMA(df) # 11 features
    addEMA(df) # 11 features
    addDEMA(df) # 11 features
    addBBANDS(df) # 7 features
    addCCI(df) # 8 features
    addMACD(df) # 10 features
    addROC(df) # 3 features
    addMFI(df) # 13 features
    addSTOCH(df) # 13 features
    
    
    # TODO: Other technical indicators
    # addWILLR(df)
    # addWCLPRICE(df)
    # addOBV(df)

In [16]:
# train_targets = targets.loc[targets.index <= "2023-08-04"]
# test_targets = targets.loc[targets.index == "2023-08-07"]

In [25]:
# main.py
import time
grouped = targets.groupby('Symbol')
# initializing empty dataframe
final_df = pd.DataFrame()

# traversing all groups in grouped dataframe
start = time.time()
for group in grouped:
    # passing values at 1 index from tuple named group
    addAllIndicators(group[1])
    # final_df = final_df.append(prepare_dataframe_for_lstm(group[1], 4))
    final_df = final_df.append(group[1])
    
print(time.time() - start)    
    
final_df

  self.obj[key] = empty_value
  money_flow_ratio = pos_money_flow.rolling(window = 14).sum().values / neg_money_flow.rolling(window = 14).sum().values
  money_flow_ratio = pos_money_flow.rolling(window = 14).sum().values / neg_money_flow.rolling(window = 14).sum().values


78.75775241851807


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits,Symbol,calc_fr_1_rw,calc_fr_5_rw,rolling_return,rsi14_RSI,oversold_RSI,overbought_RSI,div_RSI,conv_RSI,div_1_price_diff_values_RSI,conv_1_price_diff_values_RSI,div_5_price_diff_values_RSI,conv_5_price_diff_values_RSI,7_SMA,21_SMA,div_SMA,conv_SMA,div_1_price_diff_values_SMA,conv_1_price_diff_values_SMA,div_5_price_diff_values_SMA,conv_5_price_diff_values_SMA,ratio_7_SMA,ratio_21_SMA,std_7_SMA,std_21_SMA,trend_7_21_SMA,7_EMA,21_EMA,div_EMA,conv_EMA,div_1_price_diff_values_EMA,conv_1_price_diff_values_EMA,div_5_price_diff_values_EMA,conv_5_price_diff_values_EMA,ratio_7_EMA,ratio_21_EMA,std_7_EMA,std_21_EMA,trend_721_EMA,7_DEMA,21_DEMA,div_DEMA,conv_DEMA,div_1_price_diff_values_DEMA,conv_1_price_diff_values_DEMA,div_5_price_diff_values_DEMA,conv_5_price_diff_values_DEMA,ratio_7_DEMA,ratio_21_DEMA,std_7_DEMA,std_21_DEMA,trend721_DEMA,lower_BBANDS,upper_BBANDS,pos_momentum_BBANDS,neg_momentum_BBANDS,positive_momentum_1_price_diff_BBANDS,negative_momentum_1_price_diff_BBANDS,positive_momentum_5_price_diff_BBANDS,negative_momentum_5_price_diff_BBANDS,14_CCI,div_CCI,conv_CCI,div_1_price_diff_values_CCI,conv_1_price_diff_values_CCI,div_5_price_diff_values_CCI,conv_5_price_diff_values_CCI,trend_CCI,macd_MACD,signal_MACD,hist_MACD,diff_MACD,div_MACD,conv_MACD,div_1_price_diff_values_MACD,conv_1_price_diff_values_MACD,div_5_price_diff_values_MACD,conv_5_price_diff_values_MACD,10_ROC,div_ROC,trend_ROC,mfi14_MFI,typicalPrc_MFI,moneyFlow_MFI,posMoneyFlow_MFI,negMoneyFlow_MFI,moneyFlowRatio_MFI,oversold_MFI,overbought_MFI,change_MFI,div_MFI,movingAverage_MFI,crossOverAbove_MFI,momentum_MFI,%K_STOCH,%D_STOCH,%KCross_STOCH,%KDiv_STOCH,%DDiv_STOCH,%KOversold_STOCH,%KOverbought_STOCH,%DOversold_STOCH,%DOverbought_STOCH,%KRoc_STOCH,%DRoc_STOCH
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1
2018-08-13 00:00:00+05:30,24550.050781,25400.000000,24400.000000,25031.750000,23986.070312,25457.0,0.0,0.0,3MINDIA.NS,4.096520,6.557377,0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.000000,0.000000,0.000000,0.0,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,24943.916667,6.349973e+08,0.000000e+00,0.000000e+00,,0.0,0.0,,,,0.0,,69.107163,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,
2018-08-14 00:00:00+05:30,25080.000000,25399.550781,24713.000000,24879.199219,23839.894531,6916.0,0.0,0.0,3MINDIA.NS,2.779305,5.207580,0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.000000,0.000000,0.000000,0.0,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,24997.250000,1.728810e+08,1.728810e+08,0.000000e+00,,0.0,0.0,,,,0.0,,67.705571,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.020281,
2018-08-16 00:00:00+05:30,24879.199219,25399.849609,24847.949219,25336.800781,24278.378906,1357.0,0.0,0.0,3MINDIA.NS,3.867522,4.582276,0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.000000,0.000000,0.000000,0.0,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,25194.866536,3.418943e+07,3.418943e+07,0.000000e+00,,0.0,0.0,,,,0.0,,71.909879,69.574205,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.062097,
2018-08-17 00:00:00+05:30,25550.000000,25808.949219,25101.000000,25223.449219,24169.761719,5070.0,0.0,0.0,3MINDIA.NS,3.581531,3.577547,0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.000000,0.000000,0.000000,0.0,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,25377.799479,1.286654e+08,1.286654e+08,0.000000e+00,,0.0,0.0,,,,0.0,,70.868439,70.161296,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.014483,0.008438
2018-08-20 00:00:00+05:30,25390.000000,26000.000000,25103.599609,25808.250000,24730.132812,14862.0,0.0,0.0,3MINDIA.NS,3.570602,3.075653,0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,0.0,,,0.0,0.0,0.000000,0.000000,0.000000,0.0,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,,0.0,0.0,0.0,0.000000,0.0,0.0,,,,,25637.283203,3.810213e+08,3.810213e+08,0.000000e+00,,0.0,0.0,,,,0.0,,76.241416,73.006578,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.075816,0.040553
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-07 00:00:00+05:30,1430.800049,1484.000000,1430.750000,1464.449951,1464.449951,396627.0,0.0,0.0,ZYDUSWELL.NS,6.933423,8.195003,0,47.835826,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1460.407122,1469.230940,0.0,0.0,0.0,0.000000,0.0,0.0,1.002768,0.996746,6.780865,2.655020,0.0,1458.550054,1468.944564,0.0,0.0,0.0,0.000000,0.0,0.0,1.004045,0.996940,9.251502,4.100561,0.0,1450.642956,1459.608814,0.0,0.0,0.0,0.000000,0.0,0.0,1.009518,1.003317,14.635152,7.034135,0.0,1436.953340,1501.731633,0.0,0.0,6.933423,0.000000,8.195003,0.0,-79.538938,0.0,1.0,0.0,0.000000,0.0,0.0,198.220627,-7.453568,-4.986966,-2.466602,2.466602,0.0,0.0,0.0,0.000000,0.0,0.0,-0.674856,1465.124807,1.0,58.954172,1459.733317,5.789696e+08,5.789696e+08,0.000000e+00,1.436301,0.0,0.0,-0.073012,-1405.495779,54.600276,1.0,-4.643374,25.777907,24.010592,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.119057,-0.012551
2023-08-08 00:00:00+05:30,1482.050049,1529.949951,1482.000000,1515.750000,1515.750000,363660.0,0.0,0.0,ZYDUSWELL.NS,7.213227,,0,60.355784,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1466.949986,1471.552368,0.0,0.0,0.0,0.000000,0.0,0.0,1.033266,1.030035,5.816068,2.185322,0.0,1472.850040,1473.199604,0.0,0.0,0.0,0.000000,0.0,0.0,1.029127,1.028883,8.516338,3.398786,0.0,1477.644707,1468.580776,0.0,1.0,0.0,0.000000,0.0,0.0,1.025788,1.032119,15.064798,6.059449,1.0,1432.976193,1509.913785,0.0,1.0,0.000000,0.000000,0.000000,0.0,171.987079,0.0,0.0,0.0,7.213227,0.0,,251.526017,-2.744245,-4.538422,1.794176,1.794176,0.0,1.0,0.0,0.000000,0.0,0.0,3.094712,1512.655288,1.0,70.694938,1509.233317,5.488478e+08,5.488478e+08,0.000000e+00,2.412380,0.0,0.0,0.105585,-1445.055062,55.950568,1.0,6.751457,29.952739,26.255343,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.161954,0.093490
2023-08-09 00:00:00+05:30,1532.000000,1588.900024,1514.650024,1559.250000,1559.250000,349883.0,0.0,0.0,ZYDUSWELL.NS,4.182483,,0,67.482693,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1479.807129,1475.626180,0.0,1.0,0.0,0.000000,0.0,0.0,1.053685,1.056670,7.391216,2.498761,1.0,1494.450030,1481.022367,0.0,1.0,0.0,0.000000,0.0,0.0,1.043360,1.052820,15.321550,4.842016,1.0,1514.246022,1483.935036,0.0,0.0,0.0,4.182483,0.0,,1.029720,1.050754,28.376403,9.971262,1.0,1420.887700,1530.817281,0.0,1.0,0.000000,4.182483,0.000000,,289.540187,0.0,0.0,0.0,0.000000,0.0,0.0,117.553108,4.446754,-2.741386,7.188140,7.188140,0.0,0.0,0.0,4.182483,0.0,,5.765644,1553.484356,1.0,75.989440,1554.266683,5.438115e+08,5.438115e+08,0.000000e+00,3.164834,0.0,0.0,0.245420,-1483.260560,58.945435,1.0,14.974335,33.492799,29.741148,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.118188,0.132766
2023-08-10 00:00:00+05:30,1560.000000,1578.000000,1522.250000,1533.000000,1533.000000,87253.0,0.0,0.0,ZYDUSWELL.NS,1.691575,,0,60.423544,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1487.414272,1478.573800,0.0,0.0,0.0,1.691575,0.0,,1.030648,1.036810,11.418127,3.968502,1.0,1504.087523,1485.747606,0.0,0.0,0.0,1.691575,0.0,,1.019223,1.031804,21.285171,7.431821,1.0,1526.162636,1492.691160,0.0,0.0,0.0,0.000000,0.0,0.0,1.004480,1.027004,36.841636,14.885806,1.0,1419.452309,1539.507676,0.0,0.0,0.000000,1.691575,0.000000,,175.169026,0.0,0.0,0.0,0.000000,0.0,0.0,-114.371161,7.936035,-0.605902,8.541937,8.541937,0.0,0.0,0.0,0.000000,0.0,0.0,2.896266,1530.103734,-1.0,73.327099,1544.416667,1.347550e+08,0.000000e+00,1.347550e+08,2.749124,0.0,0.0,0.357442,-1459.672901,62.807134,1.0,19.308498,31.356556,31.600698,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.063782,0.062524


In [18]:
# # main.py
# import time
# grouped = train_targets.groupby('Symbol')
# # initializing empty dataframe
# train_df = pd.DataFrame()

# # traversing all groups in grouped dataframe
# start = time.time()
# for group in grouped:
#     # passing values at 1 index from tuple named group
#     addAllIndicators(group[1])
#     train_df = train_df.append(prepare_dataframe_for_lstm(group[1], 4))
    
# print(time.time() - start)    
    
# train_df

In [19]:
# # main.py
# import time
# grouped = test_targets.groupby('Symbol')
# # initializing empty dataframe
# test_df = pd.DataFrame()

# # traversing all groups in grouped dataframe
# start = time.time()
# for group in grouped:
#     # passing values at 1 index from tuple named group
#     addAllIndicators(group[1])
#     test_df = test_df.append(prepare_dataframe_for_lstm(group[1], 4))
    
# print(time.time() - start)    
    
# test_df

In [26]:
final_df.to_csv(os.path.join('Data', '2_NIFTY500_5y_WithIndicator.csv'))

In [21]:
# train_df.to_csv(os.path.join('Data', '2_NIFTY500_5y_WithIndicator_Train.csv'))

In [24]:
targets

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits,Symbol,calc_fr_1_rw,calc_fr_5_rw,rolling_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-08-13 00:00:00+05:30,24550.050781,25400.000000,24400.000000,25031.750000,23986.070312,25457.0,0.0,0.0,3MINDIA.NS,4.096520,6.557377,0
2018-08-14 00:00:00+05:30,25080.000000,25399.550781,24713.000000,24879.199219,23839.894531,6916.0,0.0,0.0,3MINDIA.NS,2.779305,5.207580,0
2018-08-16 00:00:00+05:30,24879.199219,25399.849609,24847.949219,25336.800781,24278.378906,1357.0,0.0,0.0,3MINDIA.NS,3.867522,4.582276,0
2018-08-17 00:00:00+05:30,25550.000000,25808.949219,25101.000000,25223.449219,24169.761719,5070.0,0.0,0.0,3MINDIA.NS,3.581531,3.577547,0
2018-08-20 00:00:00+05:30,25390.000000,26000.000000,25103.599609,25808.250000,24730.132812,14862.0,0.0,0.0,3MINDIA.NS,3.570602,3.075653,0
...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-07 00:00:00+05:30,1430.800049,1484.000000,1430.750000,1464.449951,1464.449951,396627.0,0.0,0.0,ZYDUSWELL.NS,6.933423,8.195003,0
2023-08-08 00:00:00+05:30,1482.050049,1529.949951,1482.000000,1515.750000,1515.750000,363660.0,0.0,0.0,ZYDUSWELL.NS,7.213227,,0
2023-08-09 00:00:00+05:30,1532.000000,1588.900024,1514.650024,1559.250000,1559.250000,349883.0,0.0,0.0,ZYDUSWELL.NS,4.182483,,0
2023-08-10 00:00:00+05:30,1560.000000,1578.000000,1522.250000,1533.000000,1533.000000,87253.0,0.0,0.0,ZYDUSWELL.NS,1.691575,,0
