In [None]:
from Models.framework_utils import *
from Models.diagnostic_utils import *

# load numerapi
public_id, secret_key = get_numerapi_config()
napi, modelnameids = get_napi_and_models(public_id, secret_key)

# load data
ds_version = "v4.2"
dataset_loc = os.path.join(os.getcwd(), 'train_datasets', ds_version)
currentRound = get_update_training_data(napi, dataset_loc, ds_version)

In [None]:
np.random.seed(42)
print("# Loading data... ",end='')

# training data T* | X = features, I = era indices
train, TI, targets = processData(os.path.join(dataset_loc, 'train_int8.parquet'), return_target_names=True)

# validation data V*
validation, VI = processData(os.path.join(dataset_loc, 'validation_int8.parquet'), return_target_names=False)

with open(os.path.join(dataset_loc, "features.json"), "r") as f:
    feature_metadata = json.load(f)
feature_sets = feature_metadata['feature_sets']

BVP = pd.read_parquet(os.path.join(dataset_loc, 'validation_benchmark_models.parquet'),engine="fastparquet")

gc.collect()
print("done")

In [None]:
import lightgbm

params = {"n_estimators": 200, # 2000
          "learning_rate": 0.1, # 0.01
          "max_depth": 4, # 5
          "num_leaves": 2 ** 4 - 1, # 2 ** 5 - 1
          "colsample_bytree": 0.10} # 0.1

lgbm_model = lightgbm.LGBMRegressor(**params)

lgbm_model.fit(train[feature_sets['all_features']], train['target'])

In [None]:
VP = lgbm_model.predict(validation[feature_sets['all_features']])

In [None]:
VId = VI[5:-5] # remove first 5 eras of validation to prevent lookahead bias from the training data, remove last 5 as they are unresolved
validation_diagnostics, validation_corrs = run_diagnostics(VP,validation['target'],validation[feature_sets['fnc_v3_features']], VId, featexp=True, fnc=True, 
                                                           print_output=True, graph_corrs=True, compare={'example_preds': BVP['v42_teager_plus_cyrus']})

In [None]:
lgbm_model.booster_.save_model(os.path.join(os.getcwd(), 'Models', 'Modeldata', 'lgbm_model.txt'))
np.save(os.path.join(os.getcwd(), 'Models', 'Modeldata', 'lgbm_model_features.npy'), feature_sets['all_features'])