## Latent Factor Analysis - Factor Style Rotation

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import NMF
import pyodbc
import pickle
import os
os.chdir('T:\\index\\95_곽용하\\운용\\코드\\')
import mf_3 as mf

# DATABASE
conn_quant = pyodbc.connect('driver={SQL Server};server=46.2.90.172;database=quant;uid=index;pwd=samsung@00')
conn_wisefn = pyodbc.connect('driver={SQL Server};server=46.2.90.172;database=wisefn;uid=index;pwd=samsung@00')

### Data

In [3]:
open_path = 'T:\\index\\999_quant\\run_kyh\\sector_data\\'

with open(open_path+'daily_factor_fwdrtn.pkl', 'rb') as f:
    df00 = pickle.load(f)

In [17]:
# 0~1 사이의 값으로 변환
df00_mm = df00.copy()
minmax_scaler = MinMaxScaler()
df00_mm = minmax_scaler.fit_transform(df00_mm.transpose())
df00_mm = df00_mm.transpose()

SRM = df00_mm.copy()



#### Step 1a: Matrix Factorization - Decompose SRM into DCM and SCM

In [28]:
num_latent_factors = 4  # Number of latent factors
nmf_model = NMF(n_components=num_latent_factors, init='random', random_state=42)
DCM = nmf_model.fit_transform(SRM)
SCM = nmf_model.components_

In [29]:
df_dcm = pd.DataFrame(DCM, index=df00.index, columns=['RGM_'+ str(x+1) for x in range(num_latent_factors)])
df_scm = pd.DataFrame(SCM, index=['RGM_'+ str(x+1) for x in range(num_latent_factors)], columns=df00.columns)

In [30]:
df_dcm

Unnamed: 0_level_0,RGM_1,RGM_2,RGM_3,RGM_4
TRD_DT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2006-01-31,1.536252,0.347715,0.523386,0.396093
2006-02-01,0.924354,0.000000,0.655369,0.569640
2006-02-02,1.463394,0.104572,0.887854,0.435596
2006-02-03,0.000000,0.973757,0.299522,0.551607
2006-02-06,0.774339,0.058031,0.663504,0.664878
...,...,...,...,...
2024-06-03,0.641350,0.093445,0.840144,0.000000
2024-06-04,0.166418,0.000000,0.804718,0.098428
2024-06-05,0.000000,0.817224,0.239687,0.184330
2024-06-07,1.207265,1.112663,0.111941,0.429067


In [31]:
df_scm

FactorGroup_sub,Growth,Market,Price Momentum,Price-reversal,Profitability,Sentiment,Stability,Value
RGM_1,0.0,0.688222,0.0,0.0,0.200128,0.024476,0.081186,0.0
RGM_2,0.539491,0.0,0.900481,0.0,0.493471,0.620535,0.119505,0.0
RGM_3,0.532812,0.0,0.0,1.132657,0.295494,0.0,0.047362,0.115622
RGM_4,0.0,0.116003,0.16061,0.053805,0.0,0.525065,1.098436,1.383704


#### Step 2a: Time Series Modeling - Predict future values of latent features

In [32]:

# For simplicity, we use ARIMA to predict future values of each latent factor

def predict_future_latent_features(DCM, num_future_periods=1):
    from statsmodels.tsa.arima.model import ARIMA
    future_DCM = np.zeros((num_future_periods, DCM.shape[1]))
    for k in range(DCM.shape[1]):
        model = ARIMA(DCM[:, k], order=(1, 1, 1))  # ARIMA model order can be tuned
        fitted_model = model.fit()
        future_values = fitted_model.forecast(steps=num_future_periods)
        future_DCM[:, k] = future_values
    return future_DCM

In [33]:
num_future_periods = 1
future_DCM = predict_future_latent_features(DCM, num_future_periods)

In [34]:
future_DCM

array([[0.655511  , 0.47943971, 0.44608087, 0.33674052]])

#### Step 3a: Estimate future returns based on the predicted market regime

In [35]:

future_SRM = np.dot(future_DCM, SCM)

print("Predicted future strategy returns (SRM):")
print(future_SRM)

Predicted future strategy returns (SRM):
[[0.49633035 0.49020011 0.4858099  0.52337488 0.49958987 0.49036425
  0.5015288  0.517526  ]]


### Step 4b: Incorporating Market Features (Optional step if market features data is available)

In [None]:
# We can extend this model to include market features as described in the paper

In [36]:
path_daily = 'T:\\index\\999_quant\\__data_pkl\\kgh\\tmp_daily\\'
with open(path_daily +'economy_all.pkl', 'rb') as f:
    econ = pickle.load(f) #76개의 데이터 종류

econ_pick = econ[['bnd_10y', 'bnd_1y', 'bnd_20y','bnd_3y','bnd_5y','cbnd_3y','cbnd_3y_jnk','cd_91d', #금리
                    'usd','euro','yen', #통화
                    ]] #경제성장

# 변화율
econ_pick_chg = econ_pick.pct_change(1).dropna()
econ_pick_chg = econ_pick_chg.reset_index().rename(columns={'date':'TRD_DT'})
econ_pick_chg['TRD_DT'] = pd.to_datetime(econ_pick_chg['TRD_DT'])
econ_pick_chg = econ_pick_chg[econ_pick_chg['TRD_DT'].isin(df00.index)].reset_index(drop=True).set_index('TRD_DT')

In [38]:
econ_pick_chg_1 = econ_pick_chg.copy()
econ_pick_chg_1['ls_sprd_1'] = econ_pick_chg_1.bnd_20y - econ_pick_chg_1.bnd_3y
econ_pick_chg_1['ls_sprd_2'] = econ_pick_chg_1.bnd_10y - econ_pick_chg_1.bnd_3y
econ_pick_chg_1['ls_sprd_3'] = econ_pick_chg_1.bnd_3y - econ_pick_chg_1.bnd_1y
econ_pick_chg_1['crdt_sprd_1'] = econ_pick_chg_1.cbnd_3y_jnk - econ_pick_chg_1.bnd_3y
econ_pick_chg_1['crdt_sprd_2'] = econ_pick_chg_1.cbnd_3y - econ_pick_chg_1.bnd_3y
econ_pick_chg_1['euro_dollar'] = econ_pick_chg_1.euro - econ_pick_chg_1.usd

econ_pick_chg_1 = econ_pick_chg_1[['ls_sprd_1','ls_sprd_2','ls_sprd_3','crdt_sprd_1','crdt_sprd_2','cd_91d','euro_dollar','usd']]

In [45]:
econ_pick_chg_1

code,ls_sprd_1,ls_sprd_2,ls_sprd_3,crdt_sprd_1,crdt_sprd_2,cd_91d,euro_dollar,usd
TRD_DT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2006-01-31,0.000289,0.000194,-0.002028,0.000922,0.002028,0.004808,-0.010025,0.003306
2006-02-01,-0.006098,-0.004932,-0.006098,-0.001654,-0.001283,0.004785,0.004499,-0.002472
2006-02-02,-0.000281,-0.000186,0.002045,-0.003161,-0.000186,0.002381,-0.006821,-0.007743
2006-02-03,-0.003162,-0.002783,0.005856,-0.004617,-0.004638,0.002375,0.002259,0.008948
2006-02-06,-0.001204,-0.001484,-0.004040,-0.000404,0.000350,0.004739,-0.004478,0.000928
...,...,...,...,...,...,...,...,...
2024-06-03,-0.011034,-0.007642,0.000651,0.003218,0.000564,0.000000,0.001714,0.003705
2024-06-04,-0.003124,-0.004741,-0.004848,0.008311,0.001653,-0.002770,0.005283,-0.002895
2024-06-05,0.005029,0.001747,-0.009111,0.008953,0.003038,0.000000,-0.002556,-0.003267
2024-06-07,-0.000815,-0.002058,-0.006582,0.007327,0.001780,0.000000,0.001105,-0.001165


In [44]:
MFM = econ_pick_chg_1.values

In [46]:
# Additional matrix to relate market features to latent factors

def relate_market_features_to_latent_factors(MFM, DCM):
    from sklearn.linear_model import LinearRegression
    FCM = np.zeros((num_latent_factors, len(econ_pick_chg_1.columns)))
    for f in range(len(econ_pick_chg_1.columns)):
        reg = LinearRegression().fit(DCM, MFM[:, f])
        FCM[:, f] = reg.coef_
    return FCM

In [47]:
FCM = relate_market_features_to_latent_factors(MFM, DCM)

In [48]:
FCM

array([[ 4.42151140e-04,  3.79806791e-04, -1.37138195e-04,
         7.46789712e-04,  6.28076478e-05, -2.93028287e-04,
        -2.86689235e-04, -2.38103564e-04],
       [ 3.44485191e-04,  4.20097205e-05, -3.37020892e-04,
        -5.07364435e-03, -4.28762924e-06,  2.98803411e-04,
        -7.66437436e-04,  9.45402822e-04],
       [ 4.57070358e-06, -1.34469618e-04,  2.85133026e-04,
        -5.21686414e-03, -3.27711624e-04,  7.17308229e-04,
        -6.50772734e-04, -1.45883273e-05],
       [-5.26876114e-04, -8.47054196e-04,  1.76214281e-03,
        -8.46475070e-03, -1.96117728e-03,  1.46226876e-03,
        -6.21502892e-04,  8.05484382e-04]])

In [49]:
# Predict future market features (this part is illustrative)
future_MFM = np.random.rand(num_future_periods, len(econ_pick_chg_1.columns))  # Simulated future market features

In [50]:
future_MFM

array([[0.08935001, 0.03572677, 0.33203118, 0.68380204, 0.02637344,
        0.4918585 , 0.34677862, 0.0159194 ]])

In [51]:
# Adjust future DCM based on future market features
def adjust_future_dcm_based_on_market_features(future_MFM, FCM, future_DCM):
    for f in range(len(econ_pick_chg_1.columns)):
        future_DCM[:, :] += future_MFM[:, f].reshape(-1, 1) @ FCM[:, f].reshape(1, -1)
    return future_DCM

adjusted_future_DCM = adjust_future_dcm_based_on_market_features(future_MFM, FCM, future_DCM)
adjusted_future_SRM = np.dot(adjusted_future_DCM, SCM)

print("Adjusted predicted future strategy returns (SRM):")
print(adjusted_future_SRM)

Adjusted predicted future strategy returns (SRM):
[[0.49257007 0.48982903 0.48174713 0.51931145 0.49684933 0.48557565
  0.49566562 0.51047421]]
