In [1]:
import time
import pandas as pd
import numpy as np
import multiprocessing as mp
import datetime as dt
from sklearn import preprocessing
import warnings
warnings.filterwarnings('ignore')

import logging
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='f28101551_log.log', mode='a')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.DEBUG)

In [2]:
from fencoding import (save_to_csv,
                       reduce_mem_usage,
                       EmptyElim,
                       FEncoding,
                       OutlDetect,
                       FImputation
                       )

In [3]:
from pathlib import Path
# home-credit-default-risk tables
if not Path("/content/application_test.csv").is_file():
  ! wget https://www.dropbox.com/s/j9xwcj9ixki5t2l/home-credit-default-risk.zip?dl=0 -O data.zip
  ! unzip -q data.zip
# default-of-credit-card-clients-dataset
if not Path("/content/default_ucr.csv").is_file():
  ! wget https://www.dropbox.com/s/lj0d7qez18ea7dx/UCI_Credit_Card.csv?dl=0 -O default_ucr.csv

In [4]:
# Read in the source datasets
dict = {
    'datasets':[
                # home-credit-default-risk tables
                pd.read_csv('/content/application_train.csv'),
                pd.read_csv('/content/application_test.csv'),
                pd.read_csv('/content/bureau.csv'),
                pd.read_csv('/content/bureau_balance.csv'),
                pd.read_csv('/content/POS_CASH_balance.csv'),
                pd.read_csv('/content/credit_card_balance.csv'),
                pd.read_csv('/content/previous_application.csv'),
                pd.read_csv('/content/installments_payments.csv'),

                # default-of-credit-card-clients-datasets
                pd.read_csv('/content/default_ucr.csv'),
    ],

    'name_dropped_columns':
                [
                 # home-credit-default-risk tables
                 ['SK_ID_CURR', 'TARGET'],
                 ['SK_ID_CURR'],
                 ['SK_ID_CURR',	'SK_ID_BUREAU'],
                 ['SK_ID_BUREAU'],
                 ['SK_ID_PREV',	'SK_ID_CURR'],
                 ['SK_ID_PREV',	'SK_ID_CURR'],
                 ['SK_ID_PREV',	'SK_ID_CURR'],
                 ['SK_ID_PREV',	'SK_ID_CURR'],

                 # default-of-credit-card-clients-datasets
                 ['ID', 'default.payment.next.month']              
    ],   
}

# Keep ID and target columns separately
dict['dropped_columns'] = [dict['datasets'][i][dict['name_dropped_columns'][i]] for i in range(len(dict['datasets']))]

# Drop ID and target columns from the tables
dict['datasets'] = [dict['datasets'][i].drop(dict['name_dropped_columns'][i], axis=1) for i in range(len(dict['datasets']))]

In [5]:
emptyelim = EmptyElim(n_jobs=-1, chunks = None)
fencoding = FEncoding(n_jobs = 1, chunks = None)
outldetect = OutlDetect(
    outliers_detection_technique = 'iqr_proximity_rule', 
    #outliers_detection_technique = 'gaussian_approximation', 
    #outliers_detection_technique = 'quantiles',
    n_jobs = None, chunks = None)
fimputation = FImputation(
    model_type =  'tree-based', 
    fill_with_value = 'extreme_values',
    #fill_with_value = 'zeros',
    # or
    #model_type =  'regression-based',
    #fill_with_value = None,
    n_jobs = None, chunks = None)

In [None]:
for N_D in range(len(dict['datasets'])):
    X, X_rest = dict['datasets'][N_D], dict['dropped_columns'][N_D]
    print('\n Number of dataset: {} ...'.format(N_D))
    display(X.head())
    X = reduce_mem_usage(X)
    #emptyelim.fit(X)
    #X = emptyelim.transform(X)
    X = emptyelim.fit_transform(X)
    f_dict = fencoding.initialize_types(X)
    print('\n f_dict:', f_dict)
    X = fencoding.date_replace(X)
    X = fencoding.encode_categor(X, method = 'OrdinalEncoder')
    #X = fencoding.encode_categor(X, method = 'OneHotEncoder')
    X = fencoding.bucket_numerical(X, n_bins=5, columns_to_buck = 'all_numerical', 
                                drop_current = True)
    X = fencoding.encode_time(X, drop_current = True)
    #outldetect.fit(X)
    #outldetect.transform(X)
    X = outldetect.fit_transform(X)
    X = fimputation.impute(X)

    X = reduce_mem_usage(X)
    save_to_csv(X, rest_columns=X_rest, path=None)
    display(X.head())


 Number of dataset: 0 ...


Unnamed: 0,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,...,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,351000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.018801,-9461,-637,-3648.0,-2120,,1,1,0,1,1,0,Laborers,1.0,2,2,WEDNESDAY,10,0,0,0,0,0,0,Business Entity Type 3,0.083037,...,0.0205,0.0193,0.0,0.0,reg oper account,block of flats,0.0149,"Stone, brick",No,2.0,2.0,2.0,2.0,-1134.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,1129500.0,Family,State servant,Higher education,Married,House / apartment,0.003541,-16765,-1188,-1186.0,-291,,1,1,0,1,1,0,Core staff,2.0,1,1,MONDAY,11,0,0,0,0,0,0,School,0.311267,...,0.0787,0.0558,0.0039,0.01,reg oper account,block of flats,0.0714,Block,No,1.0,0.0,1.0,0.0,-828.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,135000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.010032,-19046,-225,-4260.0,-2531,26.0,1,1,1,1,1,0,Laborers,1.0,2,2,MONDAY,9,0,0,0,0,0,0,Government,,...,,,,,,,,,,0.0,0.0,0.0,0.0,-815.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,297000.0,Unaccompanied,Working,Secondary / secondary special,Civil marriage,House / apartment,0.008019,-19005,-3039,-9833.0,-2437,,1,1,0,1,0,0,Laborers,2.0,2,2,WEDNESDAY,17,0,0,0,0,0,0,Business Entity Type 3,,...,,,,,,,,,,2.0,0.0,2.0,0.0,-617.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,
4,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,513000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.028663,-19932,-3038,-4311.0,-3458,,1,1,0,1,0,0,Core staff,1.0,2,2,THURSDAY,11,0,0,0,0,1,1,Religion,,...,,,,,,,,,,0.0,0.0,0.0,0.0,-1106.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0



Memory usage of dataframe is 281.53 MB
Memory usage after optimization is: 90.91 MB
Memory usage decreased by 67.7%

 columns to drop: {}

 f_dict: {'categor_columns': ['NAME_CONTRACT_TYPE', 'CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'CNT_CHILDREN', 'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'DAYS_EMPLOYED', 'FLAG_MOBIL', 'FLAG_EMP_PHONE', 'FLAG_WORK_PHONE', 'FLAG_CONT_MOBILE', 'FLAG_PHONE', 'FLAG_EMAIL', 'OCCUPATION_TYPE', 'REGION_RATING_CLIENT', 'REGION_RATING_CLIENT_W_CITY', 'WEEKDAY_APPR_PROCESS_START', 'HOUR_APPR_PROCESS_START', 'REG_REGION_NOT_LIVE_REGION', 'REG_REGION_NOT_WORK_REGION', 'LIVE_REGION_NOT_WORK_REGION', 'REG_CITY_NOT_LIVE_CITY', 'REG_CITY_NOT_WORK_CITY', 'LIVE_CITY_NOT_WORK_CITY', 'ORGANIZATION_TYPE', 'FONDKAPREMONT_MODE', 'HOUSETYPE_MODE', 'WALLSMATERIAL_MODE', 'EMERGENCYSTATE_MODE', 'FLAG_DOCUMENT_2', 'FLAG_DOCUMENT_3', 'FLAG_DOCUMENT_4', 'FLAG_DOCUMENT_5', 'FLAG_DOCUMENT_6', 'FLAG_DOCUMENT_7', 

Unnamed: 0,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,FONDKAPREMONT_MODE,HOUSETYPE_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_INCOME_TOTAL_bucketed,AMT_CREDIT_bucketed
0,24704.0,351000.0,0,1,0,1,0,6,7,4,3,1,11195,1,1,0,1,1,0,16,1,1,6,12,0,0,0,0,0,0,45,2,0,5,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1
1,35712.0,1129500.0,0,0,0,0,0,1,4,1,1,1,3238,1,1,0,1,1,0,11,0,0,1,17,0,0,0,0,0,0,33,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3
2,6752.0,135000.0,1,1,1,1,0,6,7,4,3,1,6112,1,1,1,1,1,0,16,1,1,1,16,0,0,0,0,0,0,3,3,2,6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,29680.0,297000.0,0,0,0,1,0,6,7,4,0,1,7086,1,1,0,1,0,0,16,1,1,6,23,0,0,0,0,0,0,45,3,2,6,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
4,21872.0,513000.0,0,1,0,1,0,6,7,4,3,1,7085,1,1,0,1,0,0,11,1,1,4,17,0,0,0,0,1,1,31,3,2,6,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1



 Number of dataset: 1 ...


Unnamed: 0,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,...,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,Cash loans,F,N,Y,0,135000.0,568800.0,20560.5,450000.0,Unaccompanied,Working,Higher education,Married,House / apartment,0.01885,-19241,-2329,-5170.0,-812,,1,1,0,1,0,1,,2.0,2,2,TUESDAY,18,0,0,0,0,0,0,Kindergarten,0.752614,...,,0.0514,,,,block of flats,0.0392,"Stone, brick",No,0.0,0.0,0.0,0.0,-1740.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,Cash loans,M,N,Y,0,99000.0,222768.0,17370.0,180000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.035792,-18064,-4469,-9118.0,-1623,,1,1,0,1,0,0,Low-skill Laborers,2.0,2,2,FRIDAY,9,0,0,0,0,0,0,Self-employed,0.56499,...,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
2,Cash loans,M,Y,Y,0,202500.0,663264.0,69777.0,630000.0,,Working,Higher education,Married,House / apartment,0.019101,-20038,-4458,-2175.0,-3503,5.0,1,1,0,1,0,0,Drivers,2.0,2,2,MONDAY,14,0,0,0,0,0,0,Transport: type 3,,...,,,,,,,,,,0.0,0.0,0.0,0.0,-856.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,4.0
3,Cash loans,F,N,Y,2,315000.0,1575000.0,49018.5,1575000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.026392,-13976,-1866,-2000.0,-4208,,1,1,0,1,1,0,Sales staff,4.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,0.525734,...,0.2446,0.3739,0.0388,0.0817,reg oper account,block of flats,0.37,Panel,No,0.0,0.0,0.0,0.0,-1805.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
4,Cash loans,M,Y,N,1,180000.0,625500.0,32067.0,625500.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.010032,-13040,-2191,-4000.0,-4262,16.0,1,1,1,1,0,0,,3.0,2,2,FRIDAY,5,0,0,0,0,1,1,Business Entity Type 3,0.202145,...,,,,,,,,,,0.0,0.0,0.0,0.0,-821.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,



Memory usage of dataframe is 44.63 MB
Memory usage after optimization is: 14.41 MB
Memory usage decreased by 67.7%

 columns to drop: {'FLAG_DOCUMENT_2': [0], 'FLAG_DOCUMENT_10': [0], 'FLAG_DOCUMENT_12': [0], 'FLAG_DOCUMENT_13': [0], 'FLAG_DOCUMENT_14': [0], 'FLAG_DOCUMENT_15': [0], 'FLAG_DOCUMENT_16': [0], 'FLAG_DOCUMENT_17': [0], 'FLAG_DOCUMENT_19': [0], 'FLAG_DOCUMENT_20': [0], 'FLAG_DOCUMENT_21': [0]}

 f_dict: {'categor_columns': ['NAME_CONTRACT_TYPE', 'CODE_GENDER', 'FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'CNT_CHILDREN', 'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'DAYS_EMPLOYED', 'FLAG_MOBIL', 'FLAG_EMP_PHONE', 'FLAG_WORK_PHONE', 'FLAG_CONT_MOBILE', 'FLAG_PHONE', 'FLAG_EMAIL', 'OCCUPATION_TYPE', 'REGION_RATING_CLIENT', 'REGION_RATING_CLIENT_W_CITY', 'WEEKDAY_APPR_PROCESS_START', 'HOUR_APPR_PROCESS_START', 'REG_REGION_NOT_LIVE_REGION', 'REG_REGION_NOT_WORK_REGION', 'LIVE_REGION_NOT_WORK_REGION', 'REG_CITY_NOT_LIVE_CITY', 'R

Unnamed: 0,AMT_ANNUITY,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,DAYS_EMPLOYED,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,FONDKAPREMONT_MODE,HOUSETYPE_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_11,FLAG_DOCUMENT_18,AMT_INCOME_TOTAL_bucketed,AMT_CREDIT_bucketed,AMT_GOODS_PRICE_bucketed
0,20560.5,0,0,0,1,0,6,6,1,1,1,1162,1,1,0,1,0,1,17,1,2,5,2,0,0,0,0,0,0,21,3,0,5,0,1,0,0,0,0,0,0,0,0,1,2,2
1,17370.0,0,1,0,1,0,6,6,4,1,1,3698,1,1,0,1,0,0,17,1,2,0,16,0,0,0,0,0,0,37,3,2,6,1,1,0,0,0,0,0,0,0,0,0,0,0
2,66516.75,0,1,1,1,0,6,6,1,1,1,3685,1,1,0,1,0,0,12,1,2,1,20,0,0,0,0,0,0,50,3,2,6,1,0,0,0,0,0,1,0,0,0,2,2,3
3,49018.5,0,0,0,1,4,6,6,4,1,1,604,1,1,0,1,1,0,6,1,2,6,17,0,0,0,0,0,0,45,2,0,4,0,1,0,0,0,0,0,0,0,0,3,3,3
4,32067.0,0,1,1,0,1,6,6,4,1,1,996,1,1,1,1,0,0,17,1,2,0,11,0,0,0,0,1,1,45,3,2,6,1,1,0,0,0,0,0,0,0,0,2,2,2



 Number of dataset: 2 ...


Unnamed: 0,CREDIT_ACTIVE,CREDIT_CURRENCY,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,AMT_CREDIT_MAX_OVERDUE,CNT_CREDIT_PROLONG,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_ANNUITY
0,Closed,currency 1,-497,0,-153.0,-153.0,,0,91323.0,0.0,,0.0,Consumer credit,-131,
1,Active,currency 1,-208,0,1075.0,,,0,225000.0,171342.0,,0.0,Credit card,-20,
2,Active,currency 1,-203,0,528.0,,,0,464323.5,,,0.0,Consumer credit,-16,
3,Active,currency 1,-203,0,,,,0,90000.0,,,0.0,Credit card,-16,
4,Active,currency 1,-629,0,1197.0,,77674.5,0,2700000.0,,,0.0,Consumer credit,-21,



Memory usage of dataframe is 196.43 MB
Memory usage after optimization is: 99.85 MB
Memory usage decreased by 49.2%

 columns to drop: {}

 f_dict: {'categor_columns': ['CREDIT_ACTIVE', 'CREDIT_CURRENCY', 'CNT_CREDIT_PROLONG', 'CREDIT_TYPE', 'DAYS_CREDIT_UPDATE'], 'numer_columns': ['AMT_CREDIT_MAX_OVERDUE', 'AMT_CREDIT_SUM', 'AMT_CREDIT_SUM_DEBT', 'AMT_CREDIT_SUM_LIMIT', 'AMT_CREDIT_SUM_OVERDUE', 'AMT_ANNUITY'], 'time_columns': []}

 No time columns in the dataset

 No time columns in the dataset

 col_outl_info (upper, lower) bounds: {'AMT_CREDIT_MAX_OVERDUE': (0.0, 0.0), 'AMT_CREDIT_SUM': (-344250.0, 710550.0), 'AMT_CREDIT_SUM_DEBT': (-60230.25, 100383.75), 'AMT_CREDIT_SUM_LIMIT': (0.0, 0.0), 'AMT_ANNUITY': (-20250.0, 33750.0)}

Memory usage of dataframe is 124.41 MB
Memory usage after optimization is: 34.38 MB
Memory usage decreased by 72.4%

 Successfully saved to trial_10282020-12:56.csv


Unnamed: 0,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_ANNUITY,CREDIT_ACTIVE,CREDIT_CURRENCY,CNT_CREDIT_PROLONG,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_CREDIT_SUM_OVERDUE_bucketed,AMT_CREDIT_MAX_OVERDUE,AMT_CREDIT_SUM_LIMIT
0,91323.0,0.0,33760.0,2,0,0,8,2257,0,0.0,0.0
1,225000.0,100383.75,33760.0,0,0,0,9,128,0,0.0,0.0
2,464323.5,100383.75,33760.0,0,0,0,8,2613,0,0.0,0.0
3,90000.0,100383.75,33760.0,0,0,0,9,2613,0,0.0,0.0
4,710550.0,100383.75,33760.0,0,0,0,8,251,0,0.0,0.0



 Number of dataset: 3 ...


Unnamed: 0,MONTHS_BALANCE,STATUS
0,0,C
1,-1,C
2,-2,C
3,-3,C
4,-4,C



Memory usage of dataframe is 416.56 MB
Memory usage after optimization is: 234.32 MB
Memory usage decreased by 43.7%

 columns to drop: {}

 f_dict: {'categor_columns': ['MONTHS_BALANCE', 'STATUS'], 'numer_columns': [], 'time_columns': []}

 No time columns in the dataset

 No numerical columns in the dataset

 No time columns in the dataset

 No numerical columns in the dataset

 No numerical columns in the dataset
