In [2]:
# import all necessary references
import pandas as pd
import importlib.metadata
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import yaml
from pprint import pprint
from math import floor
from pathlib import PurePath, Path
from kaggle_hcdr.utils import get_dir_path
from sklearn.model_selection import train_test_split


print(

    f"""
Pandas version: {pd.__version__}
Matplotlib Pyplot version: {importlib.metadata.version("matplotlib")}
Seaborn version: {sns.__version__}
PyYAML version: {yaml.__version__}    
    """
)


Pandas version: 2.0.0
Matplotlib Pyplot version: 3.7.1
Seaborn version: 0.12.2
PyYAML version: 6.0    
    


In [3]:
# Pandas configuration
pd.set_option("display.max_rows", 150)
pd.set_option("display.max_columns", 150)
pd.set_option("display.max_colwidth", None)

# Plot configuration
%matplotlib widget

# Path configuration
current_directory = Path.cwd()
current_directory

PosixPath('/home/xlevb/projects/kaggle-home-credit-default-risk/notebooks/iteration_1')

In [4]:
# Create references for data paths
data_directory = get_dir_path("data")

raw_application_test_file = data_directory.joinpath("01_raw", "application_test.csv")
raw_application_train_file = data_directory.joinpath("01_raw", "application_train.csv")
conf_directory = get_dir_path("conf")
project_parameters_file = conf_directory.joinpath("base", "parameters.yml")

In [5]:
with open(project_parameters_file, "r") as file:
    project_parameters = yaml.safe_load(file)

pprint(project_parameters)

{'dataset_variables_types': {'application_test': {'categorical_nominal': ['CODE_GENDER',
                                                                          'FLAG_WORK_PHONE',
                                                                          'FLAG_EMP_PHONE',
                                                                          'NAME_CONTRACT_TYPE',
                                                                          'FLAG_OWN_REALTY',
                                                                          'WEEKDAY_APPR_PROCESS_START',
                                                                          'FONDKAPREMONT_MODE',
                                                                          'FLAG_EMAIL',
                                                                          'WALLSMATERIAL_MODE',
                                                                          'FLAG_OWN_CAR',
                                                               

In [6]:
raw_application_train_df = pd.read_csv(
    filepath_or_buffer=raw_application_train_file,
    index_col="SK_ID_CURR",
    parse_dates=True,
    engine="pyarrow",
    # dtype_backend="pyarrow",
    encoding='unicode_escape'
)
raw_application_test_df = pd.read_csv(
    filepath_or_buffer=raw_application_test_file,
    index_col="SK_ID_CURR",
    parse_dates=True,
    engine="pyarrow",
    # dtype_backend="pyarrow",
    encoding='unicode_escape'
)

In [7]:
pre_feat_application_w_target = raw_application_train_df[project_parameters.get("target")].copy()
pre_feat_application_wo_target = raw_application_train_df.drop(columns=project_parameters.get("target"))

In [8]:
(
    pre_feat_application_wo_target_train,
    pre_feat_application_wo_target_test,
    pre_feat_application_w_target_train,
    pre_feat_application_w_target_test,
) = train_test_split(
    pre_feat_application_wo_target,
    pre_feat_application_w_target,
    test_size=0.25,
    random_state=42,
    shuffle=True
)

In [9]:
print(f"""
Training dataset input variables: {pre_feat_application_wo_target_train.shape}
Training dataset output variables: {pre_feat_application_w_target_train.shape}
Test dataset input variables: {pre_feat_application_wo_target_test.shape}
Test dataset output variables: {pre_feat_application_w_target_test.shape}
""")


Training dataset input variables: (230633, 120)
Training dataset output variables: (230633, 1)
Test dataset input variables: (76878, 120)
Test dataset output variables: (76878, 1)



In [52]:
categorical_columns_str = pre_feat_application_wo_target_train[project_parameters["dataset_variables_types"]["application_train"]["categorical_nominal"]].select_dtypes(include='object').columns.to_list()
categorical_columns_str.append("ORGANIZATION_TYPE")
categorical_columns_str.append("OCCUPATION_TYPE")
categorical_columns_str                          

['CODE_GENDER',
 'NAME_CONTRACT_TYPE',
 'FLAG_OWN_REALTY',
 'WEEKDAY_APPR_PROCESS_START',
 'FONDKAPREMONT_MODE',
 'WALLSMATERIAL_MODE',
 'FLAG_OWN_CAR',
 'NAME_INCOME_TYPE',
 'HOUSETYPE_MODE',
 'NAME_HOUSING_TYPE',
 'NAME_EDUCATION_TYPE',
 'EMERGENCYSTATE_MODE',
 'NAME_FAMILY_STATUS',
 'NAME_TYPE_SUITE',
 'ORGANIZATION_TYPE',
 'OCCUPATION_TYPE']

In [53]:
# Transform catgorical variables
feat_application_wo_target_train = pre_feat_application_wo_target_train.copy()
feat_application_wo_target_train = pd.get_dummies(
    feat_application_wo_target_train, columns=categorical_columns_str, dtype="int"
)

In [54]:
feat_application_wo_target_train

Unnamed: 0_level_0,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,TOTALAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,...,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 6,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_XNA,OCCUPATION_TYPE_,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_HR staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_Waiters/barmen staff
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1
400186,0,270000.0,539100.0,27652.5,450000.0,0.010147,-19802,-6322,-5503.0,-3336,,1,1,1,1,0,0,1.0,2,2,15,0,0,0,0,0,0,0.572197,0.575546,0.165407,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
173375,0,93960.0,157500.0,7875.0,157500.0,0.011703,-15032,-127,-7331.0,-4563,,1,1,1,1,0,0,1.0,2,2,13,1,1,0,1,1,0,,0.325363,0.411849,0.0722,0.0835,0.9781,,,0.00,0.1379,0.1667,,,,0.0669,,0.0000,0.0735,0.0867,0.9782,,,0.0000,0.1379,0.1667,,,,0.0697,,0.0000,0.0729,0.0835,0.9781,,,0.00,0.1379,0.1667,,,,0.0681,,0.0000,0.0566,0.0,0.0,...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
119338,0,85500.0,156384.0,16155.0,135000.0,0.010556,-19673,-3969,-6836.0,-3189,,1,1,0,1,0,0,2.0,3,3,10,0,0,0,0,1,1,0.550870,0.437946,0.598926,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
280147,0,99000.0,443088.0,24034.5,382500.0,0.028663,-15839,-4227,-8333.0,-4238,,1,1,1,1,1,0,2.0,2,2,20,0,0,0,0,0,0,0.569085,0.717100,0.720944,0.0165,0.0000,0.9811,,,0.00,0.0690,0.0417,,,,0.0000,,,0.0168,0.0000,0.9811,,,0.0000,0.0690,0.0417,,,,0.0000,,,0.0167,0.0000,0.9811,,,0.00,0.0690,0.0417,,,,0.0000,,,0.0121,0.0,0.0,...,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
161534,0,45000.0,99000.0,5683.5,99000.0,0.031329,-18660,-173,-1951.0,-1674,,1,1,1,1,1,0,1.0,2,2,9,0,0,0,0,0,0,,0.262258,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239009,0,121500.0,675000.0,32602.5,675000.0,0.006305,-14481,-1124,-3813.0,-4134,,1,1,0,1,0,0,1.0,3,3,5,0,0,0,0,0,0,,0.617180,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
399937,2,225000.0,509922.0,40419.0,472500.0,0.003069,-15492,-292,-1613.0,-4465,,1,1,0,1,1,0,4.0,3,3,12,0,0,0,0,0,0,0.455170,0.722398,0.370650,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
253016,0,450000.0,1125000.0,33025.5,1125000.0,0.014520,-14309,-5929,-5038.0,-5123,1.0,1,1,0,1,0,1,1.0,2,2,8,0,0,0,0,0,0,0.305919,0.608427,0.554947,0.2227,0.1578,0.9856,0.8028,0.0531,0.24,0.2069,0.3333,0.3750,0.0632,0.1807,0.2292,0.0039,0.0046,0.2269,0.1638,0.9856,0.8105,0.0536,0.2417,0.2069,0.3333,0.3750,0.0647,0.1974,0.2388,0.0039,0.0048,0.2248,0.1578,0.9856,0.8054,0.0534,0.24,0.2069,0.3333,0.3750,0.0643,0.1838,0.2334,0.0039,0.0047,0.2375,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
270275,1,180000.0,270000.0,13500.0,270000.0,0.010006,-12706,-1868,-923.0,-4406,3.0,1,1,0,1,0,0,3.0,2,1,11,0,0,0,0,1,1,0.611373,0.597959,0.259468,0.2495,0.0098,0.9980,0.9728,0.0817,0.32,0.1379,0.5417,0.5833,0.0838,0.1967,0.2466,0.0309,0.1974,0.2542,0.0102,0.9980,0.9739,0.0825,0.3222,0.1379,0.5417,0.5833,0.0857,0.2149,0.2569,0.0311,0.2089,0.2519,0.0098,0.9980,0.9732,0.0823,0.32,0.1379,0.5417,0.5833,0.0853,0.2001,0.2510,0.0311,0.2015,0.2863,0.0,0.0,...,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [55]:
feat_application_wo_target_test = pre_feat_application_wo_target_test.copy()
feat_application_wo_target_test = pd.get_dummies(
    feat_application_wo_target_test, columns=categorical_columns_str, dtype="int"
)

In [56]:
feat_application_wo_target_test

Unnamed: 0_level_0,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,TOTALAREA_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,...,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Business Entity Type 1,ORGANIZATION_TYPE_Business Entity Type 2,ORGANIZATION_TYPE_Business Entity Type 3,ORGANIZATION_TYPE_Cleaning,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Culture,ORGANIZATION_TYPE_Electricity,ORGANIZATION_TYPE_Emergency,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Hotel,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Industry: type 1,ORGANIZATION_TYPE_Industry: type 10,ORGANIZATION_TYPE_Industry: type 11,ORGANIZATION_TYPE_Industry: type 12,ORGANIZATION_TYPE_Industry: type 13,ORGANIZATION_TYPE_Industry: type 2,ORGANIZATION_TYPE_Industry: type 3,ORGANIZATION_TYPE_Industry: type 4,ORGANIZATION_TYPE_Industry: type 5,ORGANIZATION_TYPE_Industry: type 6,ORGANIZATION_TYPE_Industry: type 7,ORGANIZATION_TYPE_Industry: type 8,ORGANIZATION_TYPE_Industry: type 9,ORGANIZATION_TYPE_Insurance,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Legal Services,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Mobile,ORGANIZATION_TYPE_Other,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Realtor,ORGANIZATION_TYPE_Religion,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Telecom,ORGANIZATION_TYPE_Trade: type 1,ORGANIZATION_TYPE_Trade: type 2,ORGANIZATION_TYPE_Trade: type 3,ORGANIZATION_TYPE_Trade: type 4,ORGANIZATION_TYPE_Trade: type 5,ORGANIZATION_TYPE_Trade: type 6,ORGANIZATION_TYPE_Trade: type 7,ORGANIZATION_TYPE_Transport: type 1,ORGANIZATION_TYPE_Transport: type 2,ORGANIZATION_TYPE_Transport: type 3,ORGANIZATION_TYPE_Transport: type 4,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_XNA,OCCUPATION_TYPE_,OCCUPATION_TYPE_Accountants,OCCUPATION_TYPE_Cleaning staff,OCCUPATION_TYPE_Cooking staff,OCCUPATION_TYPE_Core staff,OCCUPATION_TYPE_Drivers,OCCUPATION_TYPE_HR staff,OCCUPATION_TYPE_High skill tech staff,OCCUPATION_TYPE_IT staff,OCCUPATION_TYPE_Laborers,OCCUPATION_TYPE_Low-skill Laborers,OCCUPATION_TYPE_Managers,OCCUPATION_TYPE_Medicine staff,OCCUPATION_TYPE_Private service staff,OCCUPATION_TYPE_Realty agents,OCCUPATION_TYPE_Sales staff,OCCUPATION_TYPE_Secretaries,OCCUPATION_TYPE_Security staff,OCCUPATION_TYPE_Waiters/barmen staff
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1
384575,2,207000.0,465457.5,52641.0,418500.0,0.009630,-13297,-762,-637.0,-4307,19.0,1,1,0,1,0,0,4.0,2,2,11,0,0,0,0,1,1,0.675878,0.604894,0.000527,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
214010,0,247500.0,1281712.5,48946.5,1179000.0,0.006852,-14778,-1141,-1610.0,-4546,11.0,1,1,0,1,0,1,1.0,3,3,10,0,0,0,0,0,0,0.430827,0.425351,0.712155,0.0753,0.0568,0.9970,0.9592,0.1326,0.08,0.0517,0.4167,0.2917,0.0735,0.0601,0.0844,0.0058,0.1118,0.0756,0.0566,0.9940,0.9216,0.0523,0.0806,0.0345,0.3333,0.0417,0.0445,0.0652,0.0857,0.0,0.0000,0.0760,0.0568,0.9970,0.9597,0.1335,0.08,0.0517,0.4167,0.2917,0.0748,0.0611,0.0859,0.0058,0.1142,0.0754,2.0,0.0,...,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
142232,0,202500.0,495000.0,39109.5,495000.0,0.035792,-17907,-639,-2507.0,-1461,4.0,1,1,1,1,0,0,2.0,2,2,16,0,0,0,0,0,0,0.527239,0.531760,0.207964,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
389171,0,247500.0,254700.0,24939.0,225000.0,0.046220,-19626,-6982,-11167.0,-3158,,1,1,0,1,0,0,1.0,1,1,14,0,0,0,0,0,0,,0.693521,0.614414,0.1320,0.0645,0.9846,,,0.16,0.0690,0.6250,,,,0.1628,,0.0022,0.1345,0.0670,0.9846,,,0.1611,0.0690,0.6250,,,,0.1696,,0.0023,0.1332,0.0645,0.9846,,,0.16,0.0690,0.6250,,,,0.1657,,0.0022,0.1285,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
283617,0,112500.0,308133.0,15862.5,234000.0,0.018850,-20327,-1105,-7299.0,-494,,1,1,0,1,0,0,1.0,2,2,11,0,0,0,0,0,0,0.654882,0.560690,0.636376,0.0619,0.0553,0.9717,,,0.00,0.1724,0.1667,,0.0866,,0.0749,,0.0149,0.0630,0.0574,0.9717,,,0.0000,0.1724,0.1667,,0.0885,,0.0780,,0.0158,0.0625,0.0553,0.9717,,,0.00,0.1724,0.1667,,0.0881,,0.0762,,0.0152,0.0765,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189952,1,112500.0,450000.0,23692.5,450000.0,0.014520,-11787,-2175,-1327.0,-4168,,1,1,0,1,0,0,3.0,2,2,9,0,0,0,0,0,0,,0.131586,0.782608,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,...,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
119823,0,157500.0,438084.0,32008.5,387000.0,0.002134,-9265,-347,-485.0,-882,,1,1,0,1,0,0,2.0,3,3,9,0,0,0,1,1,0,,0.254411,,,,0.9861,,,,0.1034,0.1250,,,,0.0548,,0.1232,,,0.9861,,,,0.1034,0.1250,,,,0.0571,,0.1305,,,0.9861,,,,0.1034,0.1250,,,,0.0557,,0.1258,0.0452,6.0,3.0,...,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
232927,1,180000.0,454500.0,33979.5,454500.0,0.026392,-14169,-1743,-1277.0,-1912,2.0,1,1,1,1,1,0,3.0,2,2,13,0,0,0,0,0,0,0.546667,0.786429,0.177704,0.0196,0.0214,0.9613,,,0.00,0.0690,0.1667,,,,0.0233,,0.0029,0.0200,0.0222,0.9613,,,0.0000,0.0690,0.1667,,,,0.0243,,0.0031,0.0198,0.0214,0.9613,,,0.00,0.0690,0.1667,,,,0.0238,,0.0030,0.0218,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
283277,0,270000.0,473760.0,50269.5,450000.0,0.028663,-17807,-341,-2746.0,-1348,,1,1,1,1,1,0,2.0,2,2,15,0,0,0,0,0,0,0.626375,0.457588,0.424130,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0


In [57]:
feat_application_wo_target_train.isna().sum().sort_values(ascending=False)

COMMONAREA_MEDI                         161235
COMMONAREA_MODE                         161235
COMMONAREA_AVG                          161235
NONLIVINGAPARTMENTS_AVG                 160189
NONLIVINGAPARTMENTS_MEDI                160189
                                         ...  
WALLSMATERIAL_MODE_Monolithic                0
WALLSMATERIAL_MODE_Others                    0
WALLSMATERIAL_MODE_Panel                     0
WALLSMATERIAL_MODE_Stone, brick              0
OCCUPATION_TYPE_Waiters/barmen staff         0
Length: 250, dtype: int64

In [58]:
feat_application_wo_target_test.isna().sum().sort_values(ascending=False)

COMMONAREA_AVG                          53630
COMMONAREA_MEDI                         53630
COMMONAREA_MODE                         53630
NONLIVINGAPARTMENTS_MEDI                53325
NONLIVINGAPARTMENTS_AVG                 53325
                                        ...  
AMT_INCOME_TOTAL                            0
WALLSMATERIAL_MODE_Monolithic               0
WALLSMATERIAL_MODE_Others                   0
WALLSMATERIAL_MODE_Panel                    0
OCCUPATION_TYPE_Waiters/barmen staff        0
Length: 250, dtype: int64

In [59]:
feat_application_wo_target_train.fillna(value=0.0, inplace=True)

In [60]:
feat_application_wo_target_test.fillna(value=0.0, inplace=True)

In [61]:
feat_application_wo_target_train.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Index: 230633 entries, 400186 to 241394
Data columns (total 250 columns):
 #    Column                                             Dtype  
---   ------                                             -----  
 0    CNT_CHILDREN                                       int64  
 1    AMT_INCOME_TOTAL                                   float64
 2    AMT_CREDIT                                         float64
 3    AMT_ANNUITY                                        float64
 4    AMT_GOODS_PRICE                                    float64
 5    REGION_POPULATION_RELATIVE                         float64
 6    DAYS_BIRTH                                         int64  
 7    DAYS_EMPLOYED                                      int64  
 8    DAYS_REGISTRATION                                  float64
 9    DAYS_ID_PUBLISH                                    int64  
 10   OWN_CAR_AGE                                        float64
 11   FLAG_MOBIL                           

In [62]:
data_model_input = data_directory.joinpath("05_model_input")

application_train_wo_target_file = data_model_input.joinpath("application_train_wo_target.parquet")
application_test_wo_target_file = data_model_input.joinpath("application_test_wo_target_file.parquet")
application_train_w_target_file = data_model_input.joinpath("application_train_w_target_file.parquet")
application_test_w_target_file = data_model_input.joinpath("application_test_w_target_file.parquet")

In [63]:
feat_application_wo_target_train.to_parquet(application_train_wo_target_file)
feat_application_wo_target_test.to_parquet(application_test_wo_target_file)
pre_feat_application_w_target_train.to_parquet(application_train_w_target_file)
pre_feat_application_w_target_test.to_parquet(application_test_w_target_file)

In [64]:
project_parameters["features"] = feat_application_wo_target_train.columns.to_list()

In [65]:
with open(project_parameters_file, "w") as file:
    yaml.safe_dump(project_parameters, file)

In [66]:
with open(project_parameters_file, "r") as file:
    project_parameters_updated = yaml.safe_load(file)

project_parameters_updated

{'dataset_variables_types': {'application_test': {'categorical_nominal': ['CODE_GENDER',
    'FLAG_WORK_PHONE',
    'FLAG_EMP_PHONE',
    'NAME_CONTRACT_TYPE',
    'FLAG_OWN_REALTY',
    'WEEKDAY_APPR_PROCESS_START',
    'FONDKAPREMONT_MODE',
    'FLAG_EMAIL',
    'WALLSMATERIAL_MODE',
    'FLAG_OWN_CAR',
    'FLAG_MOBIL',
    'FLAG_PHONE',
    'NAME_INCOME_TYPE',
    'HOUSETYPE_MODE',
    'NAME_HOUSING_TYPE',
    'FLAG_CONT_MOBILE',
    'NAME_EDUCATION_TYPE',
    'EMERGENCYSTATE_MODE',
    'NAME_FAMILY_STATUS',
    'NAME_TYPE_SUITE'],
   'categorical_ordinal': ['REGION_RATING_CLIENT',
    'REGION_RATING_CLIENT_W_CITY'],
   'datetime': ['HOUR_APPR_PROCESS_START'],
   'numerical_continuous': ['EXT_SOURCE_3',
    'ENTRANCES_MODE',
    'BASEMENTAREA_MODE',
    'APARTMENTS_MEDI',
    'YEARS_BUILD_MEDI',
    'ENTRANCES_MEDI',
    'NONLIVINGAREA_MODE',
    'YEARS_BEGINEXPLUATATION_MODE',
    'ELEVATORS_MEDI',
    'YEARS_BEGINEXPLUATATION_MEDI',
    'AMT_INCOME_TOTAL',
    'NONLIVINGAPARTMENT