In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import pytse_client as tse
from pytse_client.download import download_financial_indexes

In [2]:
marketIndex = download_financial_indexes(symbols = 'شاخص كل', write_to_csv = True, include_jdate = True)
marketIndex = marketIndex['شاخص كل']
marketIndex.rename(columns = {'value': 'TEDPIX'}, inplace = True)
print(f'Shape of TEDPIX: {marketIndex.shape}')
marketIndex.head()

Shape of TEDPIX: (3328, 3)


Unnamed: 0,jdate,TEDPIX,date
0,1387-09-14,9248.9,2008-12-04
1,1387-09-15,9248.9,2008-12-05
2,1387-09-16,9178.3,2008-12-06
3,1387-09-17,9130.5,2008-12-07
4,1387-09-18,9089.2,2008-12-08


In [3]:
tse.download(symbols = ['فولاد'], write_to_csv = True, include_jdate = True)
folad = tse.Ticker(symbol = 'فولاد',  adjust = True)
foladHist = folad.history
foladHist['pct_change'] = foladHist['adjClose'].pct_change() * 100
print(f'Shape of Folad price history: {foladHist.shape}')

foladHist.head()

Shape of Folad price history: (3417, 11)


Unnamed: 0,date,open,high,low,adjClose,value,volume,count,yesterday,close,pct_change
0,2007-03-11,16.0,16.0,16.0,16.0,889437216900,468077431,7736,16.0,16.0,
1,2007-03-12,16.0,16.0,16.0,16.0,193879458000,100041000,9214,16.0,16.0,0.0
2,2007-03-13,16.0,16.0,16.0,16.0,249241504527,126270939,5862,16.0,16.0,0.0
3,2007-03-14,16.0,16.0,16.0,16.0,51666379451,26705128,1901,16.0,16.0,0.0
4,2007-03-17,16.0,16.0,16.0,16.0,28239006789,14877283,1514,16.0,16.0,0.0


In [4]:
foladClient = folad.client_types
print(f"Shape of Folad's client history: {foladClient.shape}")
foladClient.to_csv('foladClient.csv')
foladClient.head()

Shape of Folad's client history: (3032, 18)


Unnamed: 0,date,individual_buy_count,corporate_buy_count,individual_sell_count,corporate_sell_count,individual_buy_vol,corporate_buy_vol,individual_sell_vol,corporate_sell_vol,individual_buy_value,corporate_buy_value,individual_sell_value,corporate_sell_value,individual_buy_mean_price,individual_sell_mean_price,corporate_buy_mean_price,corporate_sell_mean_price,individual_ownership_change
0,20220914,1987,8,983,9,35291055,9763396,35645122,9409329,182495693610,50332186110,183986143760,48841735960,5171.160046,5161.607913,5155.192528,5190.777787,-354067.0
1,20220913,2337,8,958,20,55162895,22115512,28376476,48901931,285905053830,114743237140,147157944330,253490346640,5182.923301,5185.913301,5188.359968,5183.646974,26786419.0
2,20220912,3240,7,899,17,45888066,8109930,21720353,32277643,234063072560,41313078170,110825079310,164551071420,5100.739538,5102.360874,5094.134989,5097.989076,24167713.0
3,20220911,1371,7,867,13,27460629,10290000,20446098,17304531,138161463190,51693700000,102801001160,87054162030,5031.256319,5027.90318,5023.683188,5030.714905,7014531.0
4,20220910,1456,8,1076,13,29062906,21378562,27200089,23241379,146260987130,107612798860,137059128840,116814657150,5032.565812,5038.922073,5033.678077,5026.150004,1862817.0


In [5]:
foladHist.insert(1,'step', np.arange(1, len(foladHist) + 1))
foladHist = pd.merge(foladHist, marketIndex, how = 'left', on = 'date')

cols = ['date', 'jdate', 'step', 'TEDPIX',
         'open', 'close', 'high', 'low', 'adjClose', 
        'value', 'volume', 'count', 'yesterday']
foladHist = foladHist[cols]

foladHist.head()

Unnamed: 0,date,jdate,step,TEDPIX,open,close,high,low,adjClose,value,volume,count,yesterday
0,2007-03-11,,1,,16.0,16.0,16.0,16.0,16.0,889437216900,468077431,7736,16.0
1,2007-03-12,,2,,16.0,16.0,16.0,16.0,16.0,193879458000,100041000,9214,16.0
2,2007-03-13,,3,,16.0,16.0,16.0,16.0,16.0,249241504527,126270939,5862,16.0
3,2007-03-14,,4,,16.0,16.0,16.0,16.0,16.0,51666379451,26705128,1901,16.0
4,2007-03-17,,5,,16.0,16.0,16.0,16.0,16.0,28239006789,14877283,1514,16.0


In [6]:
foladClient['date'] = pd.to_datetime(foladClient['date'])
foladClient.iloc[:, 1: 13] = foladClient.iloc[:, 1: 13].astype('float')

foladClient['Enter_individual_money'] = foladClient['individual_buy_value'] - foladClient['individual_sell_value']
foladClient['Individual_buy_per_capita'] = foladClient['individual_buy_value'] / foladClient['individual_buy_count']
foladClient['Individual_sell_per_capita'] = foladClient['individual_sell_value'] / foladClient['individual_sell_count']
foladClient['Individual_buy_power_to_Individual_sell'] = foladClient['Individual_buy_per_capita'] / foladClient['Individual_sell_per_capita']

foladClient.head()

Unnamed: 0,date,individual_buy_count,corporate_buy_count,individual_sell_count,corporate_sell_count,individual_buy_vol,corporate_buy_vol,individual_sell_vol,corporate_sell_vol,individual_buy_value,...,corporate_sell_value,individual_buy_mean_price,individual_sell_mean_price,corporate_buy_mean_price,corporate_sell_mean_price,individual_ownership_change,Enter_individual_money,Individual_buy_per_capita,Individual_sell_per_capita,Individual_buy_power_to_Individual_sell
0,2022-09-14,1987.0,8.0,983.0,9.0,35291055.0,9763396.0,35645122.0,9409329.0,182495700000.0,...,48841740000.0,5171.160046,5161.607913,5155.192528,5190.777787,-354067.0,-1490450000.0,91844840.0,187168000.0,0.490708
1,2022-09-13,2337.0,8.0,958.0,20.0,55162895.0,22115512.0,28376476.0,48901931.0,285905100000.0,...,253490300000.0,5182.923301,5185.913301,5188.359968,5183.646974,26786419.0,138747100000.0,122338500.0,153609500.0,0.796425
2,2022-09-12,3240.0,7.0,899.0,17.0,45888066.0,8109930.0,21720353.0,32277643.0,234063100000.0,...,164551100000.0,5100.739538,5102.360874,5094.134989,5097.989076,24167713.0,123238000000.0,72241690.0,123276000.0,0.586016
3,2022-09-11,1371.0,7.0,867.0,13.0,27460629.0,10290000.0,20446098.0,17304531.0,138161500000.0,...,87054160000.0,5031.256319,5027.90318,5023.683188,5030.714905,7014531.0,35360460000.0,100774200.0,118570900.0,0.849907
4,2022-09-10,1456.0,8.0,1076.0,13.0,29062906.0,21378562.0,27200089.0,23241379.0,146261000000.0,...,116814700000.0,5032.565812,5038.922073,5033.678077,5026.150004,1862817.0,9201858000.0,100454000.0,127378400.0,0.788627


In [18]:
foladMerge = pd.merge(foladHist, foladClient, how = 'left', on = 'date')
foladMerge.columns

Index(['date', 'jdate', 'step', 'TEDPIX', 'open', 'close', 'high', 'low',
       'adjClose', 'value', 'volume', 'count', 'yesterday',
       'individual_buy_count', 'corporate_buy_count', 'individual_sell_count',
       'corporate_sell_count', 'individual_buy_vol', 'corporate_buy_vol',
       'individual_sell_vol', 'corporate_sell_vol', 'individual_buy_value',
       'corporate_buy_value', 'individual_sell_value', 'corporate_sell_value',
       'individual_buy_mean_price', 'individual_sell_mean_price',
       'corporate_buy_mean_price', 'corporate_sell_mean_price',
       'individual_ownership_change', 'Enter_individual_money',
       'Individual_buy_per_capita', 'Individual_sell_per_capita',
       'Individual_buy_power_to_Individual_sell'],
      dtype='object')

In [9]:
# Log is for better mapping to target prediction

def ret_based_func(new_data):
    df = new_data
    return_df = pd.DataFrame({\
                              'ret1d_log':(np.log(df.adjClose) - np.log(df.yesterday)), \
                              'ret3d_log':(np.log(df.adjClose) - np.log(df.yesterday)).rolling(3).sum(), \
                              'ret7d_log':(np.log(df.adjClose) - np.log(df.yesterday)).rolling(7).sum(), \
                              'ret14d_log':(np.log(df.adjClose) - np.log(df.yesterday)).rolling(14).sum(), \
                              'ret30d_log':(np.log(df.adjClose) - np.log(df.yesterday)).rolling(30).sum(), \
                              'ret60d_log':(np.log(df.adjClose) - np.log(df.yesterday)).rolling(60).sum(), \
                              'ret90d_log':(np.log(df.adjClose) - np.log(df.yesterday)).rolling(90).sum(), \
                              'ret120d_log':(np.log(df.adjClose) - np.log(df.yesterday)).rolling(120).sum(), \
                              'ret300d_log':(np.log(df.adjClose) - np.log(df.yesterday)).rolling(300).sum(), \
                              'lastclose_log':(np.log(df['adjClose']) - np.log(df['close'])), \
                              'buy_queue_locked' : np.logical_and(np.isclose(df['high'], df['low']), df['high'] > df['yesterday']), \
                              'sell_queue_locked' : np.logical_and(np.isclose(df['high'], df['low']), df['low'] < df['yesterday']), \
                              })
    return return_df

price_features = ret_based_func(foladMerge)
price_features

Unnamed: 0,ret1d_log,ret3d_log,ret7d_log,ret14d_log,ret30d_log,ret60d_log,ret90d_log,ret120d_log,ret300d_log,lastclose_log,buy_queue_locked,sell_queue_locked
0,0.000000,,,,,,,,,0.000000,False,False
1,0.000000,,,,,,,,,0.000000,False,False
2,0.000000,0.000000,,,,,,,,0.000000,False,False
3,0.000000,0.000000,,,,,,,,0.000000,False,False
4,0.000000,0.000000,,,,,,,,0.000000,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
3412,-0.005946,-0.009891,-0.027453,-0.074676,-0.053418,-0.109309,-0.110912,0.021908,0.268646,-0.001986,False,False
3413,0.000000,-0.005946,-0.021633,-0.054172,-0.032854,-0.089873,-0.130819,0.021908,0.298476,0.000000,False,False
3414,0.013821,0.007874,0.001963,-0.032790,-0.010921,-0.077687,-0.120133,0.035729,0.274953,-0.005865,False,False
3415,0.015565,0.029385,0.017527,-0.028546,0.010284,-0.062122,-0.056118,0.063767,0.238708,-0.007692,False,False


In [11]:
def prp_based_func(foladData):
    df = foladData
    df.index = [pd.Timestamp(item) for item in df['date']]
    return_df = pd.DataFrame({\
        'prp_high30d':df['adjClose']/df['high'].rolling('30d').max(), \
        'prp_high60d':df['adjClose']/df['high'].rolling('60d').max(), \
        'prp_high90d':df['adjClose']/df['high'].rolling('90d').max(), \
        'prp_high120d':df['adjClose']/df['high'].rolling('120d').max(), \
        'prp_high300d':df['adjClose']/df['high'].rolling('300d').max(), \
        'prp_low30d':df['adjClose']/df['low'].rolling('30d').min(), \
        'prp_low60d':df['adjClose']/df['low'].rolling('60d').min(), \
        'prp_low90d':df['adjClose']/df['low'].rolling('90d').min(), \
        'prp_low120d':df['adjClose']/df['low'].rolling('120d').min(), \
        'prp_low300d':df['adjClose']/df['low'].rolling('300d').min(), \
        'prp_value3d30d':df['value'].rolling('3d').mean()/df['value'].rolling('30d').mean(), \
        'prp_value5d60d':df['value'].rolling('5d').mean()/df['value'].rolling('60d').mean(), \
        'prp_value15d120d':df['value'].rolling('15d').mean()/df['value'].rolling('120d').mean(), \
        'prp_value30d200d':df['value'].rolling('30d').mean()/df['value'].rolling('200d').mean(), \
        })
    return return_df

prop_features = prp_based_func(foladMerge)
prop_features

Unnamed: 0,prp_high30d,prp_high60d,prp_high90d,prp_high120d,prp_high300d,prp_low30d,prp_low60d,prp_low90d,prp_low120d,prp_low300d,prp_value3d30d,prp_value5d60d,prp_value15d120d,prp_value30d200d
2007-03-11,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
2007-03-12,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
2007-03-13,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
2007-03-14,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.476596,1.000000,1.000000,1.000000
2007-03-17,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.099964,0.388384,1.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-09-10,0.859829,0.859829,0.859829,0.831817,0.831817,1.008016,1.021112,1.021112,1.021112,1.178262,0.761712,0.486124,0.376277,0.470836
2022-09-11,0.859829,0.859829,0.859829,0.831817,0.831817,1.008016,1.021112,1.021112,1.021112,1.178262,0.679600,0.508841,0.370173,0.462695
2022-09-12,0.871795,0.871795,0.871795,0.843393,0.843393,1.022044,1.035323,1.035323,1.035323,1.194659,0.803278,0.561589,0.375486,0.425143
2022-09-13,0.900870,0.885470,0.885470,0.856623,0.856623,1.038076,1.051563,1.051563,1.051563,1.213399,0.960099,0.657079,0.407252,0.429865


In [12]:
def wght_based_feature(foladData):
    print('weight based features')
    
    def value20d_based_func(foladData):
        return_df = pd.DataFrame({\
            'value_20d':foladData['value'].rolling(20).mean(), \
            })
        return return_df
    
    df_value20_data = pd.concat([foladData[['date','value']], value20d_based_func(foladData)], axis=1)
    
    def wght_based_func(stock_date_group):
        return_df = pd.DataFrame({\
            'value_weight':foladData['value']/foladData['value'].sum(), \
            'value_weight20d':foladData['value_20d']/foladData['value_20d'].sum(), \
            })
        return return_df

    df_wght_based = value20d_based_func(foladData)


    df_feature = pd.concat([df_value20_data, df_wght_based], axis=1)
    return df_feature

wght_data = wght_based_feature(foladMerge)
wght_data

weight based features


Unnamed: 0,date,value,value_20d,value_20d.1
2007-03-11,2007-03-11,889437216900,,
2007-03-12,2007-03-12,193879458000,,
2007-03-13,2007-03-13,249241504527,,
2007-03-14,2007-03-14,51666379451,,
2007-03-17,2007-03-17,28239006789,,
...,...,...,...,...
2022-09-10,2022-09-10,253873785990,3.332936e+11,3.332936e+11
2022-09-11,2022-09-11,189855163190,2.995558e+11,2.995558e+11
2022-09-12,2022-09-12,275376150730,2.956202e+11,2.956202e+11
2022-09-13,2022-09-13,400648290970,2.958984e+11,2.958984e+11


In [25]:
def indv_nonindv_based_feature(new_data, wght_data, prp_data):
    print('individual and non-individual features')
    features = pd.DataFrame([])

    # per capita map function
    def pcap_based_func(new_data):
        #initial_index = stock_group.index
        df = new_data
        #df.index = [pd.Timestamp(item) for item in df['date']]
        return_df = pd.DataFrame({\
            'Individual_buy_per_capita':np.where(df['individual_buy_count'] <= 0, \
                0.0, df['individual_buy_value']/df['individual_buy_count']), \
            'Individual_sell_per_capita':np.where(df['individual_sell_count'] <= 0, \
                0.0, df['individual_sell_value']/df['individual_sell_count']), \
            'indv_net_count_prp7d30':(df['individual_buy_count']-df['individual_sell_count']).rolling(7).sum()/\
                (df['individual_buy_count']+df['individual_sell_count']).rolling(30).sum(), \
            'indv_net_count_prp7d120':(df['individual_buy_count']-df['individual_sell_count']).rolling(7).sum()/\
                (df['individual_buy_count']+df['individual_sell_count']).rolling(120).sum(), \
            'indv_net_value':(df['individual_buy_value']-df['individual_sell_value']), \
            'indv_relnet_value_d14':((df['individual_buy_value']-df['individual_sell_value'])/\
                (df['individual_buy_value']+df['individual_sell_value'])).rolling(14).sum(), \
            'indv_relnet_value_d30':((df['individual_buy_value']-df['individual_sell_value'])/\
                (df['individual_buy_value']+df['individual_sell_value'])).rolling(30).sum(), \
            'indv_relnet_value_d60':((df['individual_buy_value']-df['individual_sell_value'])/\
                (df['individual_buy_value']+df['individual_sell_value'])).rolling(60).sum(), \
            'indv_relnet_value_d90':((df['individual_buy_value']-df['individual_sell_value'])/\
                (df['individual_buy_value']+df['individual_sell_value'])).rolling(90).sum(), \
            'indv_relnet_value_d120':((df['individual_buy_value']-df['individual_sell_value'])/\
                (df['individual_buy_value']+df['individual_sell_value'])).rolling(120).sum(), \
            'indv_relnet_value_d300':((df['individual_buy_value']-df['individual_sell_value'])/\
                (df['individual_buy_value']+df['individual_sell_value'])).rolling(300).sum(), \
            'indv_relnet5d10d_value_d10':((df['individual_buy_value'].rolling(5).mean()-df['individual_sell_value'].rolling(10).mean())/\
                (df['individual_buy_value']+df['individual_sell_value'])).rolling(10).mean(), \
            })
        #return_df.index = initial_index
        return return_df
    
    pcap_feature_data = pcap_based_func(new_data)
    #pcap_feature_data = stock_groups.apply(pcap_based_func).reset_index(drop=True)

    # stock percapita dataframe 
    stock_percapita = pd.concat([new_data[[ 'date', 'individual_buy_value', 'individual_sell_value']], \
        pcap_feature_data, wght_data, prp_data], axis=1)
    
    # per capita map function
    #stock_percapita_date_groups = stock_percapita.groupby('date')
    def mrkt_pcap_based_func(stock_date_group):
        initial_index = stock_date_group.index
        df = stock_date_group
        one_vec = np.ones([len(df)])
        return_df = pd.DataFrame({\
            'mrkt_indv_buy_pcap':(df['Individual_buy_per_capita']*df['value_weight20d']).sum()*one_vec, \
            'mrkt_indv_sell_pcap':(df['Individual_sell_per_capita']*df['value_weight20d']).sum()*one_vec, \
            'mrkt_indv_net_value':((df['individual_buy_value'] - df['Individual_sell_value'])*df['value_weight20d']).sum()*one_vec, \
            'mrkt_indv_absnet_value':(np.abs(df['individual_buy_value'] - df['individual_sell_value'])*df['value_weight20d']).sum()*one_vec, \
            'mrkt_prp_high30d':df['prp_high30d'].mean()*one_vec,\
            'mrkt_prp_high60d':df['prp_high60d'].mean()*one_vec,\
            'mrkt_prp_high90d':df['prp_high90d'].mean()*one_vec,\
            'mrkt_prp_high120d':df['prp_high120d'].mean()*one_vec,\
            'mrkt_prp_high300d':df['prp_high300d'].mean()*one_vec,\
            'mrkt_prp_low30d':df['prp_low30d'].mean()*one_vec,\
            'mrkt_prp_low60d':df['prp_low60d'].mean()*one_vec,\
            'mrkt_prp_low90d':df['prp_low90d'].mean()*one_vec,\
            'mrkt_prp_low120d':df['prp_low120d'].mean()*one_vec,\
            'mrkt_prp_low300d':df['prp_low300d'].mean()*one_vec\
            })
        return_df.index = initial_index
        return return_df
    
    #moh
    #mrkt_pcap_feature_data = pcap_based_func(new_data)
    #mrkt_pcap_feature_data = pd.concat([df_value20_data, df_wght_based], axis=1)

    #mrkt_pcap_feature_data = stock_percapita_date_groups.apply(mrkt_pcap_based_func).reset_index(drop=True)
    mrkt_pcap_feature_data = stock_percapita.apply(mrkt_pcap_based_func).reset_index(drop=True)
    
    # stock percapita dataframe 
    stock_percapita = pd.concat([stock_percapita, mrkt_pcap_feature_data], axis=1)

    # market rolling base
    #stock_percapita_groups = stock_percapita.groupby('stock_name')
    def mrkt_rolling_based_func(stock_percapita):
        #initial_index = stock_percapita_group.index
        df = stock_percapita
        df.index = [pd.Timestamp(item) for item in df['date']]
        return_df = pd.DataFrame({\
        'mrkt_indv_absnet_value3d': df['mrkt_indv_absnet_value'].rolling(3).sum(),\
        })
        #return_df.index = initial_index
        return return_df
    
    
    mrkt_rolling_data = stock_percapita.apply(mrkt_rolling_based_func).reset_index(drop=True)

    prc_mrkt_data = pd.concat([stock_percapita,mrkt_rolling_data],axis=1)

    # per capita map function
    # TODO: for daily powers => make them bolder!/ for others: use mean instead of sum
    #prc_mrkt_data_groups = prc_mrkt_data.groupby('stock_name')
    def relpcap_based_func(prc_mrkt_data):
        #initial_index = prc_mrkt_data_group.index
        df = prc_mrkt_data
        df.index = [pd.Timestamp(item) for item in df['date']]
        return_df = pd.DataFrame({\
            'indv_power':((df['Individual_buy_per_capita'] - df['Individual_sell_per_capita'])\
                /(df['Individual_buy_per_capita'] + df['Individual_sell_per_capita'])), \
            'indv_power3d':(df['Individual_buy_per_capita'] - df['Individual_sell_per_capita']).rolling(3).sum()\
                /(df['Individual_buy_per_capita'] + df['Individual_sell_per_capita']).rolling(3).mean(), \
            'indv_power5d':(df['Individual_buy_per_capita'] - df['Individual_sell_per_capita']).rolling(5).sum()\
                /(df['Individual_buy_per_capita'] + df['Individual_sell_per_capita']).rolling(5).mean(), \
            'indv_power7d':(df['Individual_buy_per_capita'] - df['Individual_sell_per_capita']).rolling(7).sum()\
                /(df['Individual_buy_per_capita'] + df['Individual_sell_per_capita']).rolling(7).mean(), \
            'indv_power14d':(df['Individual_buy_per_capita'] - df['Individual_sell_per_capita']).rolling(14).sum()\
                /(df['Individual_buy_per_capita'] + df['Individual_sell_per_capita']).rolling(14).mean(), \
            'indv_power30d':(df['Individual_buy_per_capita'] - df['Individual_sell_per_capita']).rolling(30).sum()\
                /(df['Individual_buy_per_capita'] + df['Individual_sell_per_capita']).rolling(30).mean(), \
            'indv_power5d25':(df['Individual_buy_per_capita'] - df['Individual_sell_per_capita']).rolling(5).sum()\
                /(df['Individual_buy_per_capita'] - df['Individual_sell_per_capita']).rolling(25).sum(),\
            'indv_buy_pcap_prpd7d30':df['Individual_buy_per_capita'].rolling(7).mean()/df['Individual_buy_per_capita'].rolling(30).mean(), \
            'indv_buy_pcap_prpd14d60':df['Individual_buy_per_capita'].rolling(14).mean()/df['Individual_buy_per_capita'].rolling(60).mean(), \
            'indv_buy_pcap_prpd30d120':df['Individual_buy_per_capita'].rolling(30).mean()/df['Individual_buy_per_capita'].rolling(120).mean(), \
            'indv_sell_pcap_prpd7d30':df['Individual_buy_per_capita'].rolling(7).mean()/df['indv_sell_pcap'].rolling(30).mean(), \
            'indv_sell_pcap_prpd14d60':df['Individual_buy_per_capita'].rolling(14).mean()/df['indv_sell_pcap'].rolling(60).mean(), \
            'indv_sell_pcap_prpd30d120':df['Individual_sell_per_capita'].rolling(30).mean()/df['Individual_sell_per_capita'].rolling(120).mean(), \
            'indv_buy_pcap_prp_mrkt':df['Individual_buy_per_capita']/df['mrkt_indv_buy_pcap'], \
            'indv_sell_pcap_prp_mrkt':df['Individual_sell_per_capita']/df['mrkt_indv_sell_pcap'], \
            'indv_net_dev_mrkt':(df['indv_net_value'] - df['mrkt_indv_net_value'])/df['mrkt_indv_absnet_value'], \
            'indv_net_dev_mrkt3d':((df['indv_net_value'] - df['mrkt_indv_net_value']).rolling(3).sum())/df['mrkt_indv_absnet_value3d'], \
            'prp_high30d_dev_mrkt':df['prp_high30d'] - df['mrkt_prp_high30d'], \
            'prp_high60d_dev_mrkt':df['prp_high60d'] - df['mrkt_prp_high60d'], \
            'prp_high90d_dev_mrkt':df['prp_high90d'] - df['mrkt_prp_high90d'], \
            'prp_high120d_dev_mrkt':df['prp_high120d'] - df['mrkt_prp_high120d'], \
            'prp_high300d_dev_mrkt':df['prp_high300d'] - df['mrkt_prp_high300d'], \
            'prp_low30d_dev_mrkt':df['prp_low30d'] - df['mrkt_prp_low30d'], \
            'prp_low60d_dev_mrkt':df['prp_low60d'] - df['mrkt_prp_low60d'], \
            'prp_low90d_dev_mrkt':df['prp_low90d'] - df['mrkt_prp_low90d'], \
            'prp_low120d_dev_mrkt':df['prp_low120d'] - df['mrkt_prp_low120d'], \
            'prp_low300d_dev_mrkt':df['prp_low300d'] - df['mrkt_prp_low300d'], \
            })
        #return_df.index = initial_index
        return return_df
    
    relpcap_feature_data = prc_mrkt_data.apply(relpcap_based_func).reset_index(drop=True)

    relpcap_feature = pd.concat([stock_percapita,relpcap_feature_data],axis=1)
    #relpcap_feature_groups = relpcap_feature_.groupby('stock_name')

    def power_count_features(relpcap_feature):
        #initial_index = relpcap_feature_group.index
        df = relpcap_feature
        df.index = [pd.Timestamp(item) for item in df['date']]

        count_indices = np.where(df['indv_power']>=0)
        temp = np.zeros(len(df['indv_power']),)
        temp[count_indices,] = 1
        df['count_indices'] = temp
        count_5d = df['count_indices'].rolling(5).sum()
        count_30d = df['count_indices'].rolling(30).sum()

        return_df = pd.DataFrame({\
            'ind_power_count5d30' : count_5d/count_30d} )
        return_df.index = initial_index
        return return_df

    power_count_data = relpcap_feature.apply(power_count_features).reset_index(drop=True)
  
    # concate the results
    features = pd.concat([pcap_feature_data, mrkt_pcap_feature_data, mrkt_rolling_data, relpcap_feature_data, power_count_data], \
        axis=1)

    return features

In [26]:
indv_nonindv_based_feature(foladMerge, wght_data, prop_features)

individual and non-individual features


KeyError: 'Individual_buy_per_capita'