In [1]:
import pandas as pd
import numpy as np
import talib as ta

In [9]:
df_train = pd.read_feather('./data'+'/new_data.ftr', 
                           columns=["timestamp",'Asset_ID', 'Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'Target',
                                    'Weight', 'lr_15', 'Mkt_lrt_15', 'Crypto_Index','beta','lr_mkt_resid'])
df_train.rename(columns={"beta": "beta_60m","lr_mkt_resid":"lr_mkt_resid_60m"},inplace=True)
df_train.set_index(["timestamp",'Asset_ID'], inplace=True)
df_train

Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Open,High,Low,Close,Volume,Target,Weight,lr_15,Mkt_lrt_15,Crypto_Index,beta_60m,lr_mkt_resid_60m
timestamp,Asset_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1514764860,2,40.0,2376.580000,2399.500000,2357.140000,2374.590000,1.923301e+01,-0.004218,0.058657,,0.000000,2571.477256,0.000000,
1514764860,0,5.0,8.530000,8.530000,8.530000,8.530000,7.838000e+01,-0.014399,0.105286,,0.000000,2571.477256,0.000000,
1514764860,1,229.0,13835.194000,14013.800000,13666.110000,13850.176000,3.155006e+01,-0.014643,0.165850,,0.000000,2571.477256,0.000000,
1514764860,5,32.0,7.659600,7.659600,7.656700,7.657600,6.626713e+03,-0.013922,0.033911,,0.000000,2571.477256,0.000000,
1514764860,7,5.0,25.920000,25.920000,25.874000,25.877000,1.210873e+02,-0.008264,0.050867,,0.000000,2571.477256,0.000000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1641772800,9,218.0,130.842571,130.940000,130.478000,130.707429,1.011042e+03,,0.058657,-0.004696,-0.003263,7532.261935,0.963412,-0.001552
1641772800,10,20.0,2122.416667,2124.010000,2116.950000,2119.743333,2.084755e+00,,0.026874,-0.003462,-0.003263,7532.261935,1.507092,0.001455
1641772800,13,118.0,0.066109,0.066130,0.066058,0.066082,1.265238e+06,,0.043830,-0.003493,-0.003263,7532.261935,0.772527,-0.000973
1641772800,12,112.0,0.261055,0.261185,0.260474,0.260682,1.187581e+05,,0.050867,-0.003568,-0.003263,7532.261935,0.938556,-0.000505


In [10]:
df_train.columns

Index(['Count', 'Open', 'High', 'Low', 'Close', 'Volume', 'Target', 'Weight',
       'lr_15', 'Mkt_lrt_15', 'Crypto_Index', 'beta_60m', 'lr_mkt_resid_60m'],
      dtype='object')

Weighted info `lr_mkt_resid` similar to `Target` which can remove the market signal from individual asset returns. 

$$R^{a}(t)=\log P^{a}_t - \log P^{a}_{t-15}$$

$$M(t)=\frac{\sum_{a} w^{a} R^{a}(t)}{\sum_{a} w^{a}}$$

$$\beta^{a}=\frac{\left\langle M \cdot R^{a}\right\rangle_{60}}{\left\langle M^{2}\right\rangle_{60}}$$

$$lrmktresid^{a}(t)=R^{a}(t)-\beta^{a} M(t)$$

where the bracket  $⟨.⟩_{60}$ represent the rolling average over time (60 minute windows).

In [13]:
def beta(df, window=60*24*2): 
    b = (ta.MULT(df.Mkt_lrt_15,df.lr_15).rolling(window).mean())/(ta.MULT(df.Mkt_lrt_15,df.Mkt_lrt_15).rolling(window).mean())
    return b

def lr_mkt_resid(df):
    return ta.SUB(df.lr_15, ta.MULT(df.beta_2d, df.Mkt_lrt_15))

# def rolling_volatility(df, window=60):
#     return df.rolling(window).std()

# def lrtn_index(crypto_index):
#     lrtn_index_5 = np.log(crypto_index).diff(5).rename('lrtn_index_5')
#     lrtn_index_15 = np.log(crypto_index).diff(15).rename('lrtn_index_15')
#     return pd.concat([lrtn_index_5,lrtn_index_15],axis=1)

# def lr_mkt_resid_sma(lr_mkt_resid):
#     return ta.SMA(lr_mkt_resid,5)
#############################################
def lambda_by_Asset(x):
    ##beta
    b = beta(x).rename("beta_2d")
    b = b.replace([np.nan,np.inf,-np.inf], 0)
    x= x.merge(b, on =['timestamp','Asset_ID'],how='left')
    ##
    x['lr_mkt_resid_2d'] = lr_mkt_resid(x)
    return x

In [23]:
beta(df_train.loc[df_train.index.get_level_values('Asset_ID') == 0]).rename("beta_2d")

timestamp   Asset_ID
1514764860  0                NaN
1514764920  0                NaN
1514764980  0                NaN
1514765040  0                NaN
1514765100  0                NaN
                          ...   
1641772560  0           0.892210
1641772620  0           0.892218
1641772680  0           0.892261
1641772740  0           0.892208
1641772800  0           0.892242
Name: beta_2d, Length: 2102319, dtype: float64

In [14]:
#######################################add features by asset_id
df_train = df_train.groupby('Asset_ID').apply(lambda x: lambda_by_Asset(x))
df_train.iloc[-14:,]


Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Open,High,Low,Close,Volume,Target,Weight,lr_15,Mkt_lrt_15,Crypto_Index,beta_60m,lr_mkt_resid_60m,beta_2d,lr_mkt_resid_2d
timestamp,Asset_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1641772800,3,351.0,1.170103,1.170729,1.167,1.167863,169223.5,,0.107797,-0.00519,-0.003263,7532.261935,1.345836,-0.000798,1.282272,-0.001006
1641772800,2,106.0,376.696667,376.95,376.0,376.48,144.318,,0.058657,-0.002764,-0.003263,7532.261935,0.941773,0.000309,0.89846,0.000168
1641772800,0,288.0,438.722333,438.8,437.7,437.990333,401.6819,,0.105286,-0.002281,-0.003263,7532.261935,0.753021,0.000177,0.892242,0.000631
1641772800,1,2111.0,41860.168571,41877.7,41766.7,41823.085714,89.82209,,0.16585,-0.002324,-0.003263,7532.261935,1.083834,0.001212,0.717869,1.8e-05
1641772800,4,197.0,0.150498,0.1506,0.1502,0.150307,1145598.0,,0.086971,-0.0035,-0.003263,7532.261935,0.789767,-0.000923,0.953432,-0.000389
1641772800,5,88.0,2.79448,2.8,2.788562,2.78964,24113.53,,0.033911,-0.005577,-0.003263,7532.261935,0.891223,-0.002668,0.966015,-0.002424
1641772800,7,75.0,29.73178,29.751,29.6982,29.7151,1063.943,,0.050867,-0.003969,-0.003263,7532.261935,0.728955,-0.001591,0.992595,-0.00073
1641772800,6,2059.0,3151.504286,3152.15,3143.79,3147.158571,578.6272,,0.144188,-0.00311,-0.003263,7532.261935,0.972044,6.2e-05,0.986896,0.00011
1641772800,8,29.0,1.034633,1.075,0.9578,1.0344,8585.877,,0.026874,-0.001899,-0.003263,7532.261935,2.351759,0.005774,2.725428,0.006994
1641772800,9,218.0,130.842571,130.94,130.478,130.707429,1011.042,,0.058657,-0.004696,-0.003263,7532.261935,0.963412,-0.001552,0.997569,-0.001441


In [15]:
df_train.reset_index().to_feather('./data/new_data2.ftr')