# 真槍實戰選基金

In [1]:
import numpy as np
import pandas as pd
from pycaret.regression import *
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS

In [2]:
path = "/Users/alex_chiang/Documents/Fin_tech/AI基金/classified/"
MLpath = "/Users/alex_chiang/Documents/Fin_tech/AI基金/data_ML/"
MODELpath = '/Users/alex_chiang/Documents/Fin_tech/AI基金/ML_result/pyCaret_model/'

jp_equity = pd.read_csv(path + 'jp_equity.csv', parse_dates=True, index_col='Datetime')
model_jp_equity = load_model(MODELpath + 'ML_jp_equity_model')
ML_jp_equity = pd.read_csv(MLpath+'ML_jp_equity.csv', parse_dates = True, index_col = ['Datetime', 'fund_id'])
ML_jp_equity.sort_index(level = 0, inplace = True)

Transformation Pipeline and Model Successfully Loaded


In [3]:
def invest_funds(df, df_XY, model):

    df = df.iloc[-242:]
    fund_id = df.columns
    
    # 計算市場平均報酬
    mkt_ret = df.pct_change().mean(axis = 1).to_frame().dropna()
    mkt_ret = mkt_ret.rolling(240).mean()
    
    df_ML = pd.DataFrame()
    for i in fund_id:
        ret = df[[i]].pct_change().dropna()
        neg_ret = ret.applymap(lambda x: 0 if x > 0 else x)
        
        # x1: 報酬率的平均數
        X_mean = ret.rolling(240).mean()
        X_mean.rename(columns = {i:'X_mean'}, inplace = True)
        
        # x2: 報酬率的標準差
        X_std = ret.rolling(240).std()
        X_std.rename(columns = {i:'X_std'}, inplace = True)
        
        # x3: 報酬率的負標準差
        X_neg_std = neg_ret.rolling(240).std()
        X_neg_std.rename(columns = {i:'X_neg_std'}, inplace = True)
        
        # x4: 報酬率的偏態係數
        X_skew = ret.rolling(240).skew()
        X_skew.rename(columns = {i:'X_skew'}, inplace = True)
        
        # x5: 報酬率的峰度係數
        X_kurt = ret.rolling(240).kurt()
        X_kurt.rename(columns = {i:'X_kurt'}, inplace = True)
        
        # x6: 平均報酬率漲跌幅
        X_return_growth = (X_mean - X_mean.shift(1)) / abs(X_mean.shift(1))
        X_return_growth.rename(columns = {'X_mean':'X_return_growth'}, inplace = True)
        
        # x7: 基金報酬率相對於市場報酬率的比率
        X_fund_to_market_return = (X_mean['X_mean'] / mkt_ret[0]).to_frame()
        X_fund_to_market_return.rename(columns = {0:'X_fund_to_market_return'}, inplace = True)
        
        # x8: sharpe ratio
        X_sharpe_ratio = (X_mean['X_mean'] / X_std['X_std']).to_frame()
        X_sharpe_ratio.rename(columns = {0:'X_sharpe_ratio'}, inplace = True)
        
        # x9: sortino ratio
        X_sortino_ratio = (X_mean['X_mean'] / X_neg_std['X_neg_std']).to_frame()
        X_sortino_ratio.rename(columns = {0:'X_sortino_ratio'}, inplace = True)
        
        # x10: maximum drawdown
        log_ret = np.log(df[[i]]/df[[i]].shift(1)).dropna()
        roll_cumsum = log_ret.rolling(min_periods = 1, window = 240).sum()
        roll_max = roll_cumsum.rolling(min_periods = 1, window = 240).max()
        X_max_drawdown = abs(roll_cumsum - roll_max)
        X_max_drawdown.rename(columns = {i:'X_max_drawdown'}, inplace = True)
        
        # x11: calmar ratio
        X_calmar_ratio = (X_mean['X_mean'] / X_max_drawdown['X_max_drawdown']).to_frame()
        X_calmar_ratio = X_calmar_ratio.applymap(lambda x: np.nan if (x == np.Inf or x == -np.Inf) else x)
        X_calmar_ratio = X_calmar_ratio.fillna(method='ffill').fillna(method='bfill')
        X_calmar_ratio.rename(columns = {0:'X_calmar_ratio'}, inplace = True)
        
        # x12, x13: alpha, beta
        X = sm.add_constant(mkt_ret)
        rolling = RollingOLS(endog=X_mean, exog=X, window=240)
        X_alpha = rolling.fit().params.iloc[:,0].to_frame()
        X_alpha.rename(columns = {'const':'X_alpha'}, inplace = True)
        X_beta = rolling.fit().params.iloc[:,1].to_frame()
        X_beta.rename(columns = {0:'X_beta'}, inplace = True)
    
        df_each = pd.concat([X_mean, X_std, X_neg_std, 
                             X_skew, X_kurt, X_return_growth,
                             X_fund_to_market_return, X_sharpe_ratio,
                             X_sortino_ratio, X_max_drawdown,
                             X_calmar_ratio, X_alpha, X_beta], axis=1).dropna() 
        
        df_each['fund_id'] = i
        df_ML = pd.concat([df_ML, df_each], axis = 0)
    
    df_ML.set_index('fund_id', append = True, inplace = True)
    df_ML.sort_index(level = 0, inplace = True)
    
    # 模型建立
    df_predict = pd.DataFrame(predict_model(model, data=df_ML)['Label'])
    df_predict.index = fund_id
    return df_predict.nlargest(5, 'Label').index 

In [4]:
# jp_equity
invest_funds(df = jp_equity, df_XY = ML_jp_equity, model = model_jp_equity)

Index(['L84', '71Q', '866', 'F71', '777'], dtype='object')