In [1]:
import math
import os

import numpy as np
import pandas as pd
import empyrical as empy
from sklearn.metrics import (
    precision_score, accuracy_score, confusion_matrix, 
    mean_squared_error, recall_score, f1_score
)
import itertools

import warnings
warnings.filterwarnings("ignore")


In [2]:
def predict_freq(pred, freq='M'):
    
    #print(pred)
    #print(pred[pred.index.duplicated()])
    
    idx = pred.index.to_series()
    idx_m = idx.groupby(pd.Grouper(freq=freq)).first()
    pred_m = pred.groupby(pd.Grouper(freq=freq)).first()
    pred_m.index = idx_m
    
    #print(idx)
    #print(idx[idx.index.duplicated()])
    #print(pred_m)
    #print(pred_m[pred_m.index.duplicated()])
    
    pred_m = pred_m.dropna().reindex(idx).ffill()
    
    return pred_m

def predict_cont_n(vote, n_quit, n_back, rb_lookback=True):
    if not rb_lookback:
        v_tmp = vote.groupby(pd.Grouper(freq='M')).apply(lambda x: predict_cont_n(x, n_quit, n_back)).values
        return pd.Series(v_tmp, index=vote.index)
    
    #vres = vote.rolling(n).apply(lambda x: exp_sum(x))
    #vote_c = vres.apply(lambda x: cont_n_01(x, n))
    
    vote_c = pd.Series(np.nan, vote.index)
    
    vres_quit = vote.rolling(n_quit).sum()
    vres_back = vote.rolling(n_back).sum()
    
    
    vote_c[vres_quit==0] = 0
    vote_c[vres_back==n_back] = 1
    
    
    if np.isnan(vote_c.iloc[0]):
        vote_c.iloc[0] = vote.iloc[0]
    vote_c = vote_c.ffill()
    return vote_c

In [160]:
vote_down_buy_ratio_csv = './result/2022/202205v0_till202208/pp10/vote-pp10-voteAll_SD-2022-Aug-24/trans_preds.csv'
name = '202205v0_till202208_voteAll_SD'
vote_assets = ['GLD', 'TLT', 'TIP']

save_file = vote_down_buy_ratio_csv.replace('trans_preds', '3voteContNdays_'+name)
save_file

'./result/2022/202205v0_till202208/pp10/vote-pp10-voteAll_SD-2022-Aug-24/3voteContNdays_202205v0_till202208_voteAll_SD.csv'

In [161]:
dftmp = pd.read_csv(vote_down_buy_ratio_csv)
proba_df = pd.pivot_table(dftmp, columns='Symbol', index='Trading Date', values='proba_1')
proba_df.index = pd.to_datetime(proba_df.index)
proba_df

Symbol,DBC,EEM,EWJ,GLD,IYR,LQD,QQQ,SPY,TIP,TLT,VGK,VTI,VYM
Trading Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2008-01-02,0.807802,0.776699,1.000000,0.365611,1.000000,0.000000,0.806413,1.000000,1.000000,0.666296,1.000000,0.62231,0.043478
2008-01-03,0.807802,0.776699,1.000000,0.365611,1.000000,0.409398,0.562989,0.802625,1.000000,0.668731,1.000000,0.62231,0.043478
2008-01-04,0.807802,0.776699,1.000000,0.365611,1.000000,0.409398,0.562989,1.000000,1.000000,0.333704,1.000000,0.62231,0.043478
2008-01-07,0.807802,0.872476,1.000000,0.365611,1.000000,0.000000,0.562989,1.000000,0.573880,0.333704,1.000000,0.62231,0.043478
2008-01-08,0.807802,0.776699,1.000000,0.365611,1.000000,0.000000,0.562989,1.000000,0.573880,0.331269,1.000000,0.62231,0.043478
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-29,0.605282,0.781052,0.306351,0.433265,0.650442,0.000000,1.000000,0.904148,0.110339,0.236970,0.605730,1.00000,0.499701
2022-08-01,0.000000,0.795912,0.000000,0.271517,0.695288,1.000000,1.000000,1.000000,0.129835,0.555974,0.721329,1.00000,0.540090
2022-08-02,0.000000,1.000000,0.000000,0.271517,0.695288,1.000000,1.000000,1.000000,0.129835,0.483874,0.721329,1.00000,0.776332
2022-08-03,0.000000,1.000000,0.000000,0.271517,0.695288,1.000000,1.000000,1.000000,0.129835,0.779113,1.000000,1.00000,1.000000


In [162]:
preds_df = (proba_df >=0.5).astype(int)
preds_df

Symbol,DBC,EEM,EWJ,GLD,IYR,LQD,QQQ,SPY,TIP,TLT,VGK,VTI,VYM
Trading Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2008-01-02,1,1,1,0,1,0,1,1,1,1,1,1,0
2008-01-03,1,1,1,0,1,0,1,1,1,1,1,1,0
2008-01-04,1,1,1,0,1,0,1,1,1,0,1,1,0
2008-01-07,1,1,1,0,1,0,1,1,1,0,1,1,0
2008-01-08,1,1,1,0,1,0,1,1,1,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-29,1,1,0,0,1,0,1,1,0,0,1,1,0
2022-08-01,0,1,0,0,1,1,1,1,0,1,1,1,1
2022-08-02,0,1,0,0,1,1,1,1,0,0,1,1,1
2022-08-03,0,1,0,0,1,1,1,1,0,1,1,1,1


In [163]:
preds_vote_assets_df = preds_df[vote_assets]
preds_vote_assets_df

Symbol,GLD,TLT,TIP
Trading Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2008-01-02,0,1,1
2008-01-03,0,1,1
2008-01-04,0,0,1
2008-01-07,0,0,1
2008-01-08,0,0,1
...,...,...,...
2022-07-29,0,0,0
2022-08-01,0,1,0
2022-08-02,0,0,0
2022-08-03,0,1,0


In [164]:
vote_down_proba_ser = preds_vote_assets_df.mean(axis=1)
vote_down_preds_ser = (vote_down_proba_ser>=0.5).astype(int)
vote_down_preds_ser.name = name
vote_down_preds_ser

Trading Date
2008-01-02    1
2008-01-03    1
2008-01-04    0
2008-01-07    0
2008-01-08    0
             ..
2022-07-29    0
2022-08-01    0
2022-08-02    0
2022-08-03    0
2022-08-04    0
Name: 202205v0_till202208_voteAll_SD, Length: 3674, dtype: int32

In [165]:
vote_df = vote_down_preds_ser.to_frame()
vote_df = vote_df.sort_index(1)
vote_df.index = pd.to_datetime(vote_df.index)
# vote_df

In [166]:
freq_list = ['W', 'M']
cont_list = [1,2,3,4]
cont_pair = list(itertools.product(cont_list, cont_list))
cont_pair

[(1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (2, 1),
 (2, 2),
 (2, 3),
 (2, 4),
 (3, 1),
 (3, 2),
 (3, 3),
 (3, 4),
 (4, 1),
 (4, 2),
 (4, 3),
 (4, 4)]

In [167]:
vote_freq_cont_list = []
for f in freq_list:
    v = vote_df.apply(lambda x: predict_freq(x, freq=f))
    v.columns = v.columns.map(lambda x: x+'___freq{}'.format(f))
    vote_freq_cont_list.append(v)
    
for n_quit, n_back in cont_pair:

    name_tmp = "___cont{}".format(n_quit) if n_quit==n_back else "___cont{}quit_cont{}back".format(n_quit,n_back)

    v = vote_df.apply(lambda x: predict_cont_n(x, n_quit, n_back, rb_lookback=True))
    v.columns = v.columns.map(lambda x: x+name_tmp+'_rb_lookback')
    vote_freq_cont_list.append(v)

    v = vote_df.apply(lambda x: predict_cont_n(x, n_quit, n_back, rb_lookback=False))
    v.columns = v.columns.map(lambda x: x+name_tmp+'_rb_follow')
    vote_freq_cont_list.append(v)

vote_freq_cont_df = pd.concat(vote_freq_cont_list, 1)
vote_freq_cont_df

Unnamed: 0_level_0,202205v0_till202208_voteAll_SD___freqW,202205v0_till202208_voteAll_SD___freqM,202205v0_till202208_voteAll_SD___cont1_rb_lookback,202205v0_till202208_voteAll_SD___cont1_rb_follow,202205v0_till202208_voteAll_SD___cont1quit_cont2back_rb_lookback,202205v0_till202208_voteAll_SD___cont1quit_cont2back_rb_follow,202205v0_till202208_voteAll_SD___cont1quit_cont3back_rb_lookback,202205v0_till202208_voteAll_SD___cont1quit_cont3back_rb_follow,202205v0_till202208_voteAll_SD___cont1quit_cont4back_rb_lookback,202205v0_till202208_voteAll_SD___cont1quit_cont4back_rb_follow,...,202205v0_till202208_voteAll_SD___cont3quit_cont4back_rb_lookback,202205v0_till202208_voteAll_SD___cont3quit_cont4back_rb_follow,202205v0_till202208_voteAll_SD___cont4quit_cont1back_rb_lookback,202205v0_till202208_voteAll_SD___cont4quit_cont1back_rb_follow,202205v0_till202208_voteAll_SD___cont4quit_cont2back_rb_lookback,202205v0_till202208_voteAll_SD___cont4quit_cont2back_rb_follow,202205v0_till202208_voteAll_SD___cont4quit_cont3back_rb_lookback,202205v0_till202208_voteAll_SD___cont4quit_cont3back_rb_follow,202205v0_till202208_voteAll_SD___cont4_rb_lookback,202205v0_till202208_voteAll_SD___cont4_rb_follow
Trading Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-01-02,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2008-01-03,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2008-01-04,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2008-01-07,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2008-01-08,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-07-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
save_file = './result/2022/portfolioY/vote_down_buy_ratio_yv_ppv_x37asset_v1.csv'
save_file

'./result/2022/portfolioY/vote_down_buy_ratio_yv_ppv_x37asset_v1.csv'

In [7]:
if os.path.exists(save_file):
    try:
        vote_freq_cont_df = pd.read_csv(save_file, index_col='Trading Date')
    except:
        vote_freq_cont_df = pd.read_csv(save_file, index_col=0)
    vote_freq_cont_df.index = pd.to_datetime(vote_freq_cont_df.index)
else:
    vote_freq_cont_df.to_csv(save_file)
vote_freq_cont_df

Unnamed: 0,trans_preds_202205v0_till202208___freqW___vote_down_buy_0,y_3030xxz___pp_05___x_GLD___freqW___vote_down_buy_0,y_3030xxz___pp_05___x_TIP___freqW___vote_down_buy_0,y_3030xxz___pp_05___x_TLT___freqW___vote_down_buy_0,"y_3030xxz___pp_05___x_avgproba(GLD,LQD,QQQ,TIP,TLT,VGK,VYM)___freqW___vote_down_buy_0","y_3030xxz___pp_05___x_avgproba(GLD,TIP,TLT)___freqW___vote_down_buy_0","y_3030xxz___pp_05___x_avgproba(GLD,TIP,TLT,combine_3assets,combine_7assets)___freqW___vote_down_buy_0","y_3030xxz___pp_05___x_avgproba(LQD,TIP,VTI)___freqW___vote_down_buy_0","y_3030xxz___pp_05___x_avgproba(combine_3assets,combine_7assets)___freqW___vote_down_buy_0",y_3030xxz___pp_05___x_combine_3assets___freqW___vote_down_buy_0,...,y_3030xxz___pp_avgproba___x_GLD___cont4_rb_follow___vote_down_buy_0.3,y_3030xxz___pp_avgproba___x_TIP___cont4_rb_follow___vote_down_buy_0.3,y_3030xxz___pp_avgproba___x_TLT___cont4_rb_follow___vote_down_buy_0.3,"y_3030xxz___pp_avgproba___x_avgproba(GLD,LQD,QQQ,TIP,TLT,VGK,VYM)___cont4_rb_follow___vote_down_buy_0.3","y_3030xxz___pp_avgproba___x_avgproba(GLD,TIP,TLT)___cont4_rb_follow___vote_down_buy_0.3","y_3030xxz___pp_avgproba___x_avgproba(GLD,TIP,TLT,combine_3assets,combine_7assets)___cont4_rb_follow___vote_down_buy_0.3","y_3030xxz___pp_avgproba___x_avgproba(LQD,TIP,VTI)___cont4_rb_follow___vote_down_buy_0.3","y_3030xxz___pp_avgproba___x_avgproba(combine_3assets,combine_7assets)___cont4_rb_follow___vote_down_buy_0.3",y_3030xxz___pp_avgproba___x_combine_3assets___cont4_rb_follow___vote_down_buy_0.3,y_3030xxz___pp_avgproba___x_combine_7assets___cont4_rb_follow___vote_down_buy_0.3
2008-01-02,1,1,1,1,1,1,1,1,1,1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2008-01-03,1,1,1,1,1,1,1,1,1,1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2008-01-04,1,1,1,1,1,1,1,1,1,1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2008-01-07,0,1,1,0,1,1,1,1,1,1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2008-01-08,0,1,1,0,1,1,1,1,1,1,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-03-25,1,1,1,1,1,1,1,1,0,0,...,1.0,1.0,1.0,1.0,1.0,0.3,1.0,0.3,0.3,0.3
2022-03-28,1,1,1,1,1,1,1,1,0,0,...,1.0,1.0,1.0,1.0,1.0,0.3,1.0,0.3,0.3,0.3
2022-03-29,1,1,1,1,1,1,1,1,0,0,...,1.0,1.0,1.0,1.0,1.0,0.3,1.0,0.3,0.3,0.3
2022-03-30,1,1,1,1,1,1,1,1,0,0,...,1.0,1.0,1.0,1.0,1.0,0.3,1.0,0.3,0.3,0.3


In [8]:
vote_freq_cont_df = vote_freq_cont_df.loc[:, vote_freq_cont_df.columns.str.endswith('___vote_down_buy_0')]
vote_freq_cont_df.columns = [col.replace('___vote_down_buy_0', "") for col in vote_freq_cont_df.columns]
vote_freq_cont_df

Unnamed: 0,trans_preds_202205v0_till202208___freqW,y_3030xxz___pp_05___x_GLD___freqW,y_3030xxz___pp_05___x_TIP___freqW,y_3030xxz___pp_05___x_TLT___freqW,"y_3030xxz___pp_05___x_avgproba(GLD,LQD,QQQ,TIP,TLT,VGK,VYM)___freqW","y_3030xxz___pp_05___x_avgproba(GLD,TIP,TLT)___freqW","y_3030xxz___pp_05___x_avgproba(GLD,TIP,TLT,combine_3assets,combine_7assets)___freqW","y_3030xxz___pp_05___x_avgproba(LQD,TIP,VTI)___freqW","y_3030xxz___pp_05___x_avgproba(combine_3assets,combine_7assets)___freqW",y_3030xxz___pp_05___x_combine_3assets___freqW,...,y_3030xxz___pp_avgproba___x_GLD___cont4_rb_follow,y_3030xxz___pp_avgproba___x_TIP___cont4_rb_follow,y_3030xxz___pp_avgproba___x_TLT___cont4_rb_follow,"y_3030xxz___pp_avgproba___x_avgproba(GLD,LQD,QQQ,TIP,TLT,VGK,VYM)___cont4_rb_follow","y_3030xxz___pp_avgproba___x_avgproba(GLD,TIP,TLT)___cont4_rb_follow","y_3030xxz___pp_avgproba___x_avgproba(GLD,TIP,TLT,combine_3assets,combine_7assets)___cont4_rb_follow","y_3030xxz___pp_avgproba___x_avgproba(LQD,TIP,VTI)___cont4_rb_follow","y_3030xxz___pp_avgproba___x_avgproba(combine_3assets,combine_7assets)___cont4_rb_follow",y_3030xxz___pp_avgproba___x_combine_3assets___cont4_rb_follow,y_3030xxz___pp_avgproba___x_combine_7assets___cont4_rb_follow
2008-01-02,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2008-01-03,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2008-01-04,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2008-01-07,0,1,1,0,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2008-01-08,0,1,1,0,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-03-25,1,1,1,1,1,1,1,1,0,0,...,1,1,1,1,1,0,1,0,0,0
2022-03-28,1,1,1,1,1,1,1,1,0,0,...,1,1,1,1,1,0,1,0,0,0
2022-03-29,1,1,1,1,1,1,1,1,0,0,...,1,1,1,1,1,0,1,0,0,0
2022-03-30,1,1,1,1,1,1,1,1,0,0,...,1,1,1,1,1,0,1,0,0,0


In [9]:
no_vote = (vote_freq_cont_df.iloc[:,0]*0+1).rename('no_3vote')
vote_all_kinds = pd.concat([no_vote, vote_freq_cont_df], axis=1)
buy_sum = vote_all_kinds.sum()
buy_sum.name = 'buy_sum'
buy_ratio = buy_sum/buy_sum['no_3vote']
buy_ratio.name = 'buy_ratio'
# buy_ratio

In [10]:
vote_shift = vote_all_kinds.shift()
vote_shift.iloc[0] = vote_all_kinds.iloc[0]
flip = (vote_all_kinds - vote_shift).abs()
flip_sum = flip.sum()
flip_sum.name = 'flip_sum'
flip_ratio = flip_sum/buy_sum['no_3vote']
flip_ratio.name = 'flip_ratio'

In [11]:
flip_df = pd.concat([buy_sum, buy_ratio, flip_sum, flip_ratio], axis=1)
flip_df

Unnamed: 0,buy_sum,buy_ratio,flip_sum,flip_ratio
no_3vote,3588,1.000000,0.0,0.000000
trans_preds_202205v0_till202208___freqW,2572,0.716834,150.0,0.041806
y_3030xxz___pp_05___x_GLD___freqW,2756,0.768116,150.0,0.041806
y_3030xxz___pp_05___x_TIP___freqW,2927,0.815775,138.0,0.038462
y_3030xxz___pp_05___x_TLT___freqW,2623,0.731048,168.0,0.046823
...,...,...,...,...
"y_3030xxz___pp_avgproba___x_avgproba(GLD,TIP,TLT,combine_3assets,combine_7assets)___cont4_rb_follow",3236,0.901895,75.0,0.020903
"y_3030xxz___pp_avgproba___x_avgproba(LQD,TIP,VTI)___cont4_rb_follow",3238,0.902453,74.0,0.020624
"y_3030xxz___pp_avgproba___x_avgproba(combine_3assets,combine_7assets)___cont4_rb_follow",3018,0.841137,67.0,0.018673
y_3030xxz___pp_avgproba___x_combine_3assets___cont4_rb_follow,2876,0.801561,89.0,0.024805


In [12]:
def read_kpi2ret(result_excel_name, start=None, end=None):
    hist = pd.read_excel(result_excel_name, index_col=0, sheet_name='History')
    hist.index = pd.to_datetime(hist.index)
    balance = hist['Balance']
    balance_ = pd.concat([pd.Series(1e9),balance])
    ret = balance_.pct_change().ffill().dropna()
    ret.index = pd.to_datetime(ret.index)
    if start is not None:
        ret = ret[start:]
    if end is not None:
        ret = ret[:end]
    return ret


def cal_metric(y_true, y_pred):

    if isinstance(y_pred, pd.DataFrame):
        prec_ser = pd.Series(index=y_pred.columns, name='precision')
        acccc_ser = pd.Series(index=y_pred.columns, name='accuracy')
        recall_ser = pd.Series(index=y_pred.columns, name='recall')
        recall0_ser = pd.Series(index=y_pred.columns, name='recall0')
        f1_score_ser = pd.Series(index=y_pred.columns, name='f1_score')
        f0_score_ser = pd.Series(index=y_pred.columns, name='f0_score')
        for col in y_pred.columns:
            # print("   *********************************  ", col)
            if isinstance(y_true, pd.DataFrame):
                tmp_y_true = y_true[col]
            else:
                tmp_y_true = y_true.copy()
            prec_ser[col], acccc_ser[col], recall_ser[col], recall0_ser[col], f1_score_ser[col], f0_score_ser[col] = cal_metric(
                tmp_y_true, y_pred[col])
        return prec_ser, acccc_ser, recall_ser, recall0_ser, f1_score_ser, f0_score_ser

    y_true = y_true.reindex(y_pred.index)
    tmp = pd.concat([y_true, y_pred], axis=1).dropna()
    y_true = tmp.iloc[:, 0]
    y_pred = tmp.iloc[:, -1]
    # print("y_pred\n", y_pred)
    # print("y_true\n", y_true)
    # pd.concat([y_proba, y_pred, y_pctch, y_true, ydelta_true], axis=1).to_csv("./tmp/{}.csv".format(y_pctch.name))

    try:
        prec = precision_score(y_true=y_true, y_pred=y_pred)
        acccc = accuracy_score(y_true=y_true, y_pred=y_pred)
    except Exception as e:
        print(e)
        prec = 0
        acccc = 0

    try:
        recall = recall_score(y_true=y_true, y_pred=y_pred)
        recall0 = recall_score(y_true=y_true, y_pred=y_pred, pos_label=0)
    except Exception as e:
        print(e)
        recall = 0
        recall0 = 0

    try:
        f1score = f1_score(y_true=y_true, y_pred=y_pred)
        f0score = f1_score(y_true=y_true, y_pred=y_pred, pos_label=0)
    except Exception as e:
        print(e)
        f1score = 0
        f0score = 0
    # print("mse, mse_proba, msefullscore, msefullscore_ret, prec, acccc, recall, recall0")
    # print(mse, mse_proba, msefullscore, msefullscore_ret, prec, acccc, recall, recall0)
    return prec, acccc, recall, recall0, f1score, f0score


In [13]:
base_no3vote = './result/2022/MPT_kpi/1.5.1b/'
mpt_v = '151b'
start = '2008'
end = '2022-07'
client_ret = {}
for result_excel_name in os.listdir(base_no3vote):
    if result_excel_name.endswith('.xlsx'):
        client = result_excel_name.replace('.xlsx', '').split('_')[-1]
        ret = read_kpi2ret(os.path.join(base_no3vote, result_excel_name), start, end)
        ret.name = client
        client_ret[client] = ret

In [14]:
metrics4portfolio = {}
for client in client_ret.keys():
    portfolioY = (client_ret[client]>=0).astype(int)
    prec_ser, acccc_ser, recall_ser, recall0_ser, f1score_ser, f0score_ser = cal_metric(portfolioY, vote_all_kinds)
    metrics4portfolio[client] = pd.concat([acccc_ser, prec_ser, recall_ser, recall0_ser, f1score_ser, f0score_ser], axis=1)
metrics4portfolio_df = pd.concat(metrics4portfolio.values(), keys=metrics4portfolio.keys(), axis=1)
metrics4portfolio_df

Unnamed: 0_level_0,agg,agg,agg,agg,agg,agg,con,con,con,con,...,ficon,ficon,ficon,ficon,mod,mod,mod,mod,mod,mod
Unnamed: 0_level_1,accuracy,precision,recall,recall0,f1_score,f0_score,accuracy,precision,recall,recall0,...,recall,recall0,f1_score,f0_score,accuracy,precision,recall,recall0,f1_score,f0_score
no_3vote,0.560479,0.560479,1.000000,0.000000,0.718343,0.000000,0.559643,0.559643,1.000000,0.000000,...,1.000000,0.000000,0.711901,0.000000,0.557414,0.557414,1.0000,0.000000,0.715820,0.000000
trans_preds_202205v0_till202208___freqW,0.524805,0.559487,0.715564,0.281547,0.627973,0.342460,0.528428,0.561431,0.719124,0.286076,...,0.718608,0.285358,0.625686,0.349485,0.527871,0.559487,0.7195,0.286524,0.629484,0.349462
y_3030xxz___pp_05___x_GLD___freqW,0.543757,0.567852,0.778220,0.244769,0.656597,0.320465,0.542363,0.566401,0.777390,0.243671,...,0.778618,0.244860,0.651614,0.322528,0.542363,0.564949,0.7785,0.244962,0.654752,0.321488
y_3030xxz___pp_05___x_TIP___freqW,0.545151,0.564742,0.821979,0.192137,0.669502,0.270777,0.546544,0.565084,0.823705,0.194304,...,0.821987,0.191900,0.663951,0.271845,0.546544,0.563717,0.8250,0.195844,0.669779,0.276567
y_3030xxz___pp_05___x_TLT___freqW,0.528986,0.561189,0.731974,0.270133,0.635304,0.335169,0.534838,0.564621,0.737550,0.277215,...,0.735250,0.274143,0.633087,0.342412,0.527035,0.557758,0.7315,0.269521,0.632922,0.335292
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"y_3030xxz___pp_avgproba___x_avgproba(GLD,TIP,TLT,combine_3assets,combine_7assets)___cont4_rb_follow",0.562709,0.568294,0.914470,0.114141,0.700972,0.186625,0.560201,0.566440,0.912849,0.112025,...,0.913263,0.112150,0.694003,0.183955,0.560201,0.565204,0.9145,0.113980,0.698625,0.186598
"y_3030xxz___pp_avgproba___x_avgproba(LQD,TIP,VTI)___cont4_rb_follow",0.561037,0.567326,0.913476,0.111604,0.699943,0.182667,0.561315,0.567017,0.914343,0.112658,...,0.911246,0.108411,0.692205,0.178005,0.558528,0.564237,0.9135,0.111461,0.697595,0.182663
"y_3030xxz___pp_avgproba___x_avgproba(combine_3assets,combine_7assets)___cont4_rb_follow",0.547101,0.563950,0.846345,0.165504,0.676874,0.243130,0.545708,0.562624,0.845618,0.164557,...,0.845688,0.164486,0.670666,0.242759,0.545151,0.560968,0.8465,0.165617,0.674771,0.243744
y_3030xxz___pp_avgproba___x_combine_3assets___cont4_rb_follow,0.545987,0.566412,0.810045,0.209258,0.666667,0.288336,0.545708,0.565716,0.810259,0.209494,...,0.807363,0.205607,0.658983,0.284851,0.545151,0.563978,0.8110,0.210327,0.665299,0.290435


In [16]:
# metrics_file = vote_down_buy_ratio_csv.replace('trans_preds', 'metrics-3voteContNdays_vs_'+mpt_v)
metrics_file = './result/2022/portfolioY/metrics-3voteContNdays_yv_ppv_x37asset_vs_151b.csv'
metrics_file

'./result/2022/portfolioY/metrics-3voteContNdays_yv_ppv_x37asset_vs_151b.csv'

In [17]:
metrics4portfolio_df.to_csv(metrics_file)