# National Model 5

- Equifax, TU, Experian National model 
- Features selection on PenFed data + Org data model - clientmodel4a
- Updated time split on Equifax
    - Training: 2016-04-01 to 2020-12-31
    - Valid: 2021-01-01 to 2021-03-31

The notebook covers all three steps for creatign a new national model; feature selection, validation model and fold-in model. depending on the applied use case, not each step may be necessary (e.g., if the feature selection does not change compaed to the previous interation, step 1 may be skipped, etc.

In [1]:
import pandas as pd
import numpy as np

import model_engine
model_engine.__version__

'v1.13.1'

In [2]:
from model_engine.power.post_sale import NationalModelBuilder
from model_engine.io.loaders import load_json

import json




In [3]:
import os
import numba as nb
n_jobs = os.getenv('CPU_LIMIT', default=-1)
n_jobs = 20
print(n_jobs)
nb.set_num_threads(n_jobs)

20


In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
input_client_name = 'penfed'
input_project_name = 'autoloan'
input_bureau_name = 'normalized'
model_type = 'standard_model_member_data' # options are 'standard_model', 'mega_model', 'standard_model_member_data', 'standard_model_ln_data', ''- None defaults to 'standard_model'

print(input_client_name, input_project_name, input_bureau_name, model_type)


penfed autoloan normalized standard_model_member_data


## Create a national model given pre-selected features

In [6]:
# client' feature selection model

feature_selection_model_id = 'clientmodel4a'

In [7]:
feature_selection_model_output_path = f'/d/shared/silver_projects_v2/penfed/autoindirectv1/modeling/{feature_selection_model_id}'
feature_selection_model_output_path

'/d/shared/silver_projects_v2/penfed/autoindirectv1/modeling/clientmodel4a'

In [8]:
keep_features = json.load(open(os.path.join(feature_selection_model_output_path, 'keep_features.json')))
keep_features.pop('client_data')

[]

In [9]:
keep_features.keys()

dict_keys(['trade', 'inq', 'collec', 'bnkr', 'member'])

In [10]:
with open(os.path.join(feature_selection_model_output_path, 'keep_features_national_model.json'), "w") as json_file:
    json.dump(keep_features, json_file, indent=4)

In [11]:
os.path.join(feature_selection_model_output_path, 'keep_features_national_model.json')

'/d/shared/silver_projects_v2/penfed/autoindirectv1/modeling/clientmodel4a/keep_features_national_model.json'

In [13]:
national_model_id = 'nationalmodel4'
national_model_output_path = f'/d/shared/silver_projects_v2/penfed/autoindirectv1/modeling/nationalmodels/{national_model_id}'
national_model_output_path

'/d/shared/silver_projects_v2/penfed/autoindirectv1/modeling/nationalmodels/nationalmodel4'

In [14]:
config = {
  "model_type": "standard_model",
  "storage_location": "s3",
  "lookalike": {
    "bureau": "normalized",
    "accounts":[
      "auto"
    ],
    "splits": [
      "train",
      "valid",
      "test"
    ],
    "pull_names": None,
    "states": None,
    "max_row": None,
    "tables": ['trade', 'inq', 'bnkr', 'collec', 'target', 'app', 'member'],
  },
  "model_config": {
      "data_split": {
          "train": {
            "start_date": "2016-04-01",
            "end_date": "2021-01-01"
          },
          "valid": {
            "start_date": "2021-01-01",
            "end_date": "2021-04-01"
          },
          "test": {
            "start_date": "2021-04-01",
            "end_date": "2022-01-01"
          }
    },
    "target": "final_DQ60_m24",
    "base_features": f"/d/shared/silver_projects_v2/penfed/autoindirectv1/modeling/{feature_selection_model_id}/keep_features_national_model.json",
#    "feature_filter_rules": "/home/lyt/CODEBASE/model-engine_penfed/model-engine/model_engine/assets/power/feature_rules/intuitive_features.json",
    "memory_efficient": True,
    "pipeline_factory": None,
    "bivariate_fe_instructions": [],
    "monotonic_constraints_list": [],
    "exclusion_list": [],
    "feature_definition_list": [],
    "key_factor_mapping_list": [],
    "fold_valid": True,
    "mandatory_features": []
  }
}

In [None]:
nmb = NationalModelBuilder(
    configuration=config,
    model_output_path=national_model_output_path,
    overwrite_model_output=True
)
nmb.run()

Attempting to create a national model with full data split here /d/shared/silver_projects_v2/penfed/autoindirectv1/modeling/nationalmodels/nationalmodel4/supporting_model
Sampling experian, national_1.3


In [None]:
ftr_importance = pd.read_parquet(os.path.join(feature_selection_model_output_path, 'feature_importance.parquet'))
ftr_importance.shape

In [None]:
ftr_importance = pd.read_parquet(os.path.join(national_model_output_path, 'feature_importance.parquet'))
ftr_importance.shape

In [None]:
scoreFiles = []

for j in ['_auc.json','_ks.json']:
    for i in ['train', 'test']:
        scoreFiles.append(i+j)
        
scoreFiles_model = {}
for i in scoreFiles:
    name = i.split('.')[0]
    # print(name)
    scoreFiles_model[name] = json.load(open(os.path.join(national_model_output_path, i), 'rb'))


for i in scoreFiles_model:
    print(i, scoreFiles_model[i])