In [1]:
from imputation import core_utils, core_imputation_model
import numpy as np
from tqdm.notebook import tqdm

# Data Loading

this method loads the data from the corresponding `data_path` this would be the feather file shared on google drive, however it is too large to host on github, it returns the characteristic percentile ranks as a numpy array of shape TxNxC where T is the number of dates N the number of stocks and C the number of characteristics, the raw characteristics, the characteristic namess, the dates, returns and permos

In [2]:
data_path = "data/raw_chars_returns_df_yearly_fb_monthly_avg.fthr"
percentile_rank_chars, raw_chars, chars, date_vals, returns, permnos = core_utils.get_data_panel(
    path=data_path, computstat_data_present_filter=True,start_date=19770000)

  0%|          | 0/528 [00:00<?, ?it/s]

In [3]:
char_groupings = core_utils.CHAR_GROUPINGS

The two methods you will be interested in are:
- `core_imputation_model.fit_factors_and_loadings`
- `core_imputation_model.impute_chars`

The first generates the factors and loadings. 

The second runs the regressions to potentially combine time-series information with cross-sectional information and perform the imputation.

The below examples correspond to global and local fits, the parameters are documented in the function definition. 

# Local Fit

In [5]:
# extract factors & loadings
T = percentile_rank_chars.shape[0]
gamma_ts, lmbda, return_mask, missing_counts = core_imputation_model.fit_factors_and_loadings(percentile_rank_chars, 
   returns, min_chars=10, K=6, num_months_train=T, 
    time_varying_lambdas=False, adaptive_reg=True, min_reg=-3, 
   max_reg=-1)
# run TW + xs factor regressions and impute

local_bw = core_imputation_model.impute_chars('BW-XS', gamma_ts, percentile_rank_chars, num_months_train=T,
                                                       window_size=1)
del local_bw

local_xs = core_imputation_model.impute_chars('XS', gamma_ts, percentile_rank_chars, 
                                              num_months_train=T,
                                                           window_size=1)
del local_xs

local_ts = core_imputation_model.impute_chars('BW', None, percentile_rank_chars, 
                                              num_months_train=T,
                                                               window_size=1)
del local_ts

  0%|          | 0/528 [00:00<?, ?it/s]

(22351, 45)


  0%|          | 0/527 [00:00<?, ?it/s]

7


  0%|          | 0/45 [00:00<?, ?it/s]

6


  0%|          | 0/45 [00:00<?, ?it/s]

(22351, 45)


  0%|          | 0/527 [00:00<?, ?it/s]

1


  0%|          | 0/45 [00:00<?, ?it/s]

# Gobal Fit

In [6]:

T = percentile_rank_chars.shape[0]
gamma_ts, lmbda, return_mask, missing_counts = core_imputation_model.fit_factors_and_loadings(percentile_rank_chars, 
   returns, min_chars=10, K=6, num_months_train=T, 
    time_varying_lambdas=True, adaptive_reg=True, min_reg=-3, 
   max_reg=-1)
# run TW + xs factor regressions and impute
global_fwbw = core_imputation_model.impute_chars('FWBW-XS', gamma_ts, percentile_rank_chars, 
                                         num_months_train=T,
                                                           window_size=None)
del global_fwbw


global_fw = core_imputation_model.impute_chars('FW-XS', gamma_ts, percentile_rank_chars, 
                                         num_months_train=T,
                                                           window_size=None)
del global_fw


global_bw = core_imputation_model.impute_chars('BW-XS', gamma_ts, percentile_rank_chars, 
                                         num_months_train=T,
                                                           window_size=None)
del global_bw


global_xs = core_imputation_model.impute_chars('XS', gamma_ts, percentile_rank_chars, 
                                         num_months_train=T,
                                                           window_size=1)
del global_xs

  0%|          | 0/528 [00:00<?, ?it/s]

  0%|          | 0/527 [00:00<?, ?it/s]

(22351, 45)


  0%|          | 0/527 [00:00<?, ?it/s]

  0%|          | 0/312172636 [00:00<?, ?it/s]

8


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/527 [00:00<?, ?it/s]

7


  0%|          | 0/45 [00:00<?, ?it/s]

(22351, 45)


  0%|          | 0/527 [00:00<?, ?it/s]

7


  0%|          | 0/45 [00:00<?, ?it/s]

6


  0%|          | 0/45 [00:00<?, ?it/s]