In [1]:
import pandas as pd
import numpy as np
import talib as ta

In [2]:
df_train = pd.read_feather('./data'+'/new_data.ftr')
df_train.set_index(["timestamp",'Asset_ID'], inplace=True)
df_train

Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Open,High,Low,Close,Volume,VWAP,Target,Weight,lr_15,Mkt_lrt_15,Crypto_Index
timestamp,Asset_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1514764860,2,40.0,2376.580000,2399.500000,2357.140000,2374.590000,1.923301e+01,2373.116392,-0.004218,0.058657,,0.000000,2571.477256
1514764860,0,5.0,8.530000,8.530000,8.530000,8.530000,7.838000e+01,8.530000,-0.014399,0.105286,,0.000000,2571.477256
1514764860,1,229.0,13835.194000,14013.800000,13666.110000,13850.176000,3.155006e+01,13827.062093,-0.014643,0.165850,,0.000000,2571.477256
1514764860,5,32.0,7.659600,7.659600,7.656700,7.657600,6.626713e+03,7.657713,-0.013922,0.033911,,0.000000,2571.477256
1514764860,7,5.0,25.920000,25.920000,25.874000,25.877000,1.210873e+02,25.891363,-0.008264,0.050867,,0.000000,2571.477256
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1641772800,9,218.0,130.842571,130.940000,130.478000,130.707429,1.011042e+03,130.734076,,0.058657,-0.004696,-0.003263,7532.261935
1641772800,10,20.0,2122.416667,2124.010000,2116.950000,2119.743333,2.084755e+00,2120.570582,,0.026874,-0.003462,-0.003263,7532.261935
1641772800,13,118.0,0.066109,0.066130,0.066058,0.066082,1.265238e+06,0.066086,,0.043830,-0.003493,-0.003263,7532.261935
1641772800,12,112.0,0.261055,0.261185,0.260474,0.260682,1.187581e+05,0.260913,,0.050867,-0.003568,-0.003263,7532.261935


Weighted info `lr_mkt_resid` similar to `Target` which can remove the market signal from individual asset returns. 

$$R^{a}(t)=\log P^{a}_t - \log P^{a}_{t-15}$$

$$M(t)=\frac{\sum_{a} w^{a} R^{a}(t)}{\sum_{a} w^{a}}$$

$$\beta^{a}=\frac{\left\langle M \cdot R^{a}\right\rangle_{60}}{\left\langle M^{2}\right\rangle_{60}}$$

$$lrmktresid^{a}(t)=R^{a}(t)-\beta^{a} M(t)$$

where the bracket  $⟨.⟩_{60}$ represent the rolling average over time (60 minute windows).

In [3]:
def beta(df, window=60): 
    b = (ta.MULT(df.Mkt_lrt_15,df.lr_15).rolling(window).mean())/(ta.MULT(df.Mkt_lrt_15,df.Mkt_lrt_15).rolling(window).mean())
    return b

def lr_mkt_resid(df):
    return ta.SUB(df.lr_15, ta.MULT(df.beta, df.Mkt_lrt_15))

def rolling_volatility(df, window=60):
    return df.rolling(window).std()

def lrtn_index(crypto_index):
    lrtn_index_5 = np.log(crypto_index).diff(5).rename('lrtn_index_5')
    lrtn_index_15 = np.log(crypto_index).diff(15).rename('lrtn_index_15')
    return pd.concat([lrtn_index_5,lrtn_index_15],axis=1)

def lr_mkt_resid_sma(lr_mkt_resid):
    return ta.SMA(lr_mkt_resid,5)
#############################################
def lambda_by_Asset(x):
    ##vol_sum_15
    vol_sum_15 = (ta.SMA(x['Volume'],15)*15).rename('vol_sum_15')
    ##std
    stds = rolling_volatility(x[['lr_15','Mkt_lrt_15','Crypto_Index']])
    stds.columns = ['std_lr_15','std_Mkt_lrt_15','std_Crypto_Index']
    ##lrtn_index
    lrtn_index_5_15 = lrtn_index(x['Crypto_Index'])
    ##beta
    b = beta(x).rename("beta")#.to_frame()#.reset_index(0,drop=True)
    b = b.replace([np.nan,np.inf,-np.inf], 0)
    xx = pd.concat([vol_sum_15, stds,lrtn_index_5_15,b], axis=1)
    x= x.merge(xx, on =['timestamp','Asset_ID'],how='left')
    ##
    x['lr_mkt_resid'] = lr_mkt_resid(x)
    ##
    lr_mkt_resid_sma_5 = lr_mkt_resid_sma(x['lr_mkt_resid']).rename("lr_mkt_resid_sma")
    return pd.concat([x,lr_mkt_resid_sma_5], axis=1)

In [4]:
#######################################add features by asset_id
df_train = df_train.groupby('Asset_ID').apply(lambda x: lambda_by_Asset(x))
df_train.iloc[-14:,]


Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Open,High,Low,Close,Volume,VWAP,Target,Weight,lr_15,...,Crypto_Index,vol_sum_15,std_lr_15,std_Mkt_lrt_15,std_Crypto_Index,lrtn_index_5,lrtn_index_15,beta,lr_mkt_resid,lr_mkt_resid_sma
timestamp,Asset_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1641772800,3,351.0,1.170103,1.170729,1.167,1.167863,169223.5,1.168722,,0.107797,-0.00519,...,7532.261935,1410050.0,0.003322,0.002552,20.825669,-0.001497,-0.002382,1.345836,-0.000798,-0.001446
1641772800,2,106.0,376.696667,376.95,376.0,376.48,144.318,376.560393,,0.058657,-0.002764,...,7532.261935,729.5325,0.002486,0.002552,20.825669,-0.001497,-0.002382,0.941773,0.000309,-0.000432
1641772800,0,288.0,438.722333,438.8,437.7,437.990333,401.6819,438.278705,,0.105286,-0.002281,...,7532.261935,5152.49,0.002149,0.002552,20.825669,-0.001497,-0.002382,0.753021,0.000177,0.00026
1641772800,1,2111.0,41860.168571,41877.7,41766.7,41823.085714,89.82209,41830.240059,,0.16585,-0.002324,...,7532.261935,530.9589,0.002688,0.002552,20.825669,-0.001497,-0.002382,1.083834,0.001212,0.00027
1641772800,4,197.0,0.150498,0.1506,0.1502,0.150307,1145598.0,0.150394,,0.086971,-0.0035,...,7532.261935,4875971.0,0.0022,0.002552,20.825669,-0.001497,-0.002382,0.789767,-0.000923,-0.00091
1641772800,5,88.0,2.79448,2.8,2.788562,2.78964,24113.53,2.792239,,0.033911,-0.005577,...,7532.261935,171680.0,0.002556,0.002552,20.825669,-0.001497,-0.002382,0.891223,-0.002668,-0.002464
1641772800,7,75.0,29.73178,29.751,29.6982,29.7151,1063.943,29.721728,,0.050867,-0.003969,...,7532.261935,5021.665,0.002289,0.002552,20.825669,-0.001497,-0.002382,0.728955,-0.001591,-0.001314
1641772800,6,2059.0,3151.504286,3152.15,3143.79,3147.158571,578.6272,3148.128143,,0.144188,-0.00311,...,7532.261935,5335.395,0.002401,0.002552,20.825669,-0.001497,-0.002382,0.972044,6.2e-05,-0.000256
1641772800,8,29.0,1.034633,1.075,0.9578,1.0344,8585.877,1.034341,,0.026874,-0.001899,...,7532.261935,53506.08,0.025131,0.002552,20.825669,-0.001497,-0.002382,2.351759,0.005774,0.014838
1641772800,9,218.0,130.842571,130.94,130.478,130.707429,1011.042,130.734076,,0.058657,-0.004696,...,7532.261935,5065.137,0.002495,0.002552,20.825669,-0.001497,-0.002382,0.963412,-0.001552,-0.001613


In [5]:
df_train.reset_index().to_feather('./data/new_data.ftr')