In [61]:
# importing Required packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)# To see all the columns of a dataframe
pd.set_option('display.max_rows', None)

In [62]:
# Function to reduce the memory usage of various Dataframes
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
       
        1. Iterate over every column
        2. Determine if the column is numeric
        3. Determine if the column can be represented by an integer
        4. Find the min and the max value
        5. Determine and apply the smallest datatype that can fit the range of values

    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df


In [63]:
def import_data(file):
    """create a dataframe and optimize its memory usage"""
    df = pd.read_csv(file, parse_dates=True, keep_date_col=True)
    df = reduce_mem_usage(df)
    return df

In [64]:
# Loading the Cleaned train set
train = import_data('train_clean.csv')

Memory usage of dataframe is 362.48 MB
Memory usage after optimization is: 76.98 MB
Decreased by 78.8%


In [65]:
# Loading the Cleaned test set
test = import_data('test_clean.csv')

Memory usage of dataframe is 120.83 MB
Memory usage after optimization is: 25.66 MB
Decreased by 78.8%


In [67]:
train.head()

Unnamed: 0,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,REGION_POPULATION_RELATIVE,YEARS_BIRTH,YEARS_EMPLOYED,YEARS_REGISTRATION,YEARS_ID_PUBLISH,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_2,EXT_SOURCE_3,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,YEARS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,BU_YEARS_CREDIT,BU_CREDIT_DAY_OVERDUE,BU_YEARS_CREDIT_ENDDATE,BU_CNT_CREDIT_PROLONG,BU_AMT_CREDIT_SUM,BU_AMT_CREDIT_SUM_DEBT,BU_AMT_CREDIT_SUM_OVERDUE,BU_YEARS_CREDIT_UPDATE,BU_PREV_BU_LOAN_CNT,BU_CREDIT_ACTIVE_ACTIVE,BU_CREDIT_ACTIVE_CLOSED,BU_CREDIT_TYPE_CONSUMER CREDIT,BU_CREDIT_TYPE_CREDIT CARD,PREV_APP_AMT_ANNUITY,PREV_APP_AMT_APPLICATION,PREV_APP_AMT_CREDIT,PREV_APP_AMT_GOODS_PRICE,PREV_APP_HOUR_APPR_PROCESS_START,PREV_APP_NFLAG_LAST_APPL_IN_DAY,PREV_APP_YEARS_DECISION,PREV_APP_SELLERPLACE_AREA,PREV_APP_CNT_PAYMENT,PREV_APP_PREV_APP_CNT,PREV_APP_NAME_CONTRACT_TYPE_CASH LOANS,PREV_APP_NAME_CONTRACT_TYPE_CONSUMER LOANS,PREV_APP_NAME_CONTRACT_TYPE_REVOLVING LOANS,PREV_APP_WEEKDAY_APPR_PROCESS_START_FRIDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_MONDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_SATURDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_THURSDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_TUESDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_WEDNESDAY,PREV_APP_FLAG_LAST_APPL_PER_CONTRACT_Y,PREV_APP_NAME_CASH_LOAN_PURPOSE_XAP,PREV_APP_NAME_CASH_LOAN_PURPOSE_XNA,PREV_APP_NAME_CONTRACT_STATUS_APPROVED,PREV_APP_NAME_CONTRACT_STATUS_CANCELED,PREV_APP_NAME_CONTRACT_STATUS_REFUSED,PREV_APP_NAME_PAYMENT_TYPE_CASH THROUGH THE BANK,PREV_APP_NAME_PAYMENT_TYPE_XNA,PREV_APP_CODE_REJECT_REASON_HC,PREV_APP_CODE_REJECT_REASON_LIMIT,PREV_APP_CODE_REJECT_REASON_XAP,PREV_APP_NAME_CLIENT_TYPE_NEW,PREV_APP_NAME_CLIENT_TYPE_REFRESHED,PREV_APP_NAME_CLIENT_TYPE_REPEATER,PREV_APP_NAME_GOODS_CATEGORY_AUDIO/VIDEO,PREV_APP_NAME_GOODS_CATEGORY_COMPUTERS,PREV_APP_NAME_GOODS_CATEGORY_CONSUMER ELECTRONICS,PREV_APP_NAME_GOODS_CATEGORY_MOBILE,PREV_APP_NAME_GOODS_CATEGORY_XNA,PREV_APP_NAME_PORTFOLIO_CARDS,PREV_APP_NAME_PORTFOLIO_CASH,PREV_APP_NAME_PORTFOLIO_POS,PREV_APP_NAME_PORTFOLIO_XNA,PREV_APP_NAME_PRODUCT_TYPE_XNA,PREV_APP_NAME_PRODUCT_TYPE_X-SELL,PREV_APP_CHANNEL_TYPE_COUNTRY-WIDE,PREV_APP_CHANNEL_TYPE_CREDIT AND CASH OFFICES,PREV_APP_CHANNEL_TYPE_REGIONAL / LOCAL,PREV_APP_CHANNEL_TYPE_STONE,PREV_APP_NAME_SELLER_INDUSTRY_CONNECTIVITY,PREV_APP_NAME_SELLER_INDUSTRY_CONSUMER ELECTRONICS,PREV_APP_NAME_SELLER_INDUSTRY_XNA,PREV_APP_NAME_YIELD_GROUP_XNA,PREV_APP_NAME_YIELD_GROUP_HIGH,PREV_APP_NAME_YIELD_GROUP_LOW_NORMAL,PREV_APP_NAME_YIELD_GROUP_MIDDLE,PREV_APP_PRODUCT_COMBINATION_CARD STREET,PREV_APP_PRODUCT_COMBINATION_CARD X-SELL,PREV_APP_PRODUCT_COMBINATION_CASH,PREV_APP_PRODUCT_COMBINATION_CASH STREET: HIGH,PREV_APP_PRODUCT_COMBINATION_CASH X-SELL: HIGH,PREV_APP_PRODUCT_COMBINATION_CASH X-SELL: LOW,PREV_APP_PRODUCT_COMBINATION_CASH X-SELL: MIDDLE,PREV_APP_PRODUCT_COMBINATION_POS HOUSEHOLD WITH INTEREST,PREV_APP_PRODUCT_COMBINATION_POS HOUSEHOLD WITHOUT INTEREST,PREV_APP_PRODUCT_COMBINATION_POS INDUSTRY WITH INTEREST,PREV_APP_PRODUCT_COMBINATION_POS MOBILE WITH INTEREST,POS_CASH_MONTHS_BALANCE,POS_CASH_CNT_INSTALMENT,POS_CASH_CNT_INSTALMENT_FUTURE,POS_CASH_SK_DPD,POS_CASH_SK_DPD_DEF,POS_CASH_NAME_CONTRACT_STATUS_ACTIVE,POS_CASH_NAME_CONTRACT_STATUS_COMPLETED,POS_CASH_NAME_CONTRACT_STATUS_SIGNED,INSTALL_PAY_NUM_INSTALMENT_VERSION,INSTALL_PAY_NUM_INSTALMENT_NUMBER,INSTALL_PAY_YEARS_INSTALMENT,INSTALL_PAY_YEARS_ENTRY_PAYMENT,INSTALL_PAY_AMT_INSTALMENT,INSTALL_PAY_AMT_PAYMENT,TARGET,NAME_CONTRACT_TYPE_Cash loans,CODE_GENDER_F,FLAG_OWN_CAR_N,FLAG_OWN_REALTY_Y,NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Family,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Unaccompanied,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Working,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Single / not married,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Business_Entity,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Industry,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Trade,ORGANIZATION_TYPE_Transport,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_XNA
0,0,112500.0,545040.0,25407.0,450000.0,0.022797,24.28125,3.857422,8.296875,1.84375,1,1,1,1,1,0,2.0,2,2,17,0,0,0,0,1,1,0.443115,0.508301,0.0,0.0,0.0,0.0,1.282227,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,-1.274414,0.0,-0.163452,0.0,168255.0,0.0,0.0,-0.626465,3.0,0.333252,0.666504,1.0,0.0,9499.589844,85384.796875,88209.0,85384.796875,15.203125,1.0,-0.719727,53.799999,11.335938,5.0,0.0,1.0,0.0,0.0,0.0,0.600098,0.0,0.0,0.0,1.0,1.0,0.0,0.399902,0.0,0.0,1.0,0.0,0.0,0.0,0.399902,0.399902,0.0,0.600098,0.0,0.600098,0.0,0.399902,0.0,0.0,0.0,0.600098,0.399902,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.399902,0.0,0.600098,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.399902,-11.0,11.453125,6.546875,0.0,0.0,0.90918,0.090881,0.0,1.111328,5.0,-0.868652,-0.983398,10151.355469,10151.355469,0,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
1,0,247500.0,225000.0,13896.0,225000.0,0.028656,66.125,4.519531,20.40625,12.609375,1,0,0,1,0,0,1.0,2,2,9,0,0,0,0,0,0,0.680664,0.607422,2.0,1.0,2.0,0.0,4.484375,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,2.0,-1.213867,0.0,2.789062,0.0,900000.0,191412.0,0.0,-0.032867,1.0,1.0,0.0,1.0,0.0,13661.280273,100335.0,100335.0,150502.5,15.335938,1.0,-1.96582,1166.333374,18.0,3.0,0.333252,0.666504,0.0,0.0,0.333252,0.0,0.0,0.0,0.0,1.0,0.666504,0.333252,0.666504,0.333252,0.0,0.333252,0.333252,0.0,0.0,1.0,0.333252,0.333252,0.333252,0.333252,0.0,0.333252,0.0,0.333252,0.0,0.0,0.666504,0.333252,1.0,0.0,0.666504,0.333252,0.0,0.0,0.0,0.666504,0.333252,0.333252,0.0,0.0,0.333252,0.0,0.0,0.333252,0.0,0.0,0.0,0.0,0.333252,0.333252,0.0,0.0,-27.5,10.5625,5.5625,0.0,0.0,0.9375,0.0625,0.0,1.055664,4.667969,-2.267578,-2.289062,15835.860352,15564.720703,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,0,121500.0,263686.5,26208.0,238500.0,0.031342,61.0,4.519531,4.398438,13.226562,1,0,0,1,0,0,2.0,2,2,12,0,0,0,0,0,0,0.62207,0.812988,3.0,1.0,3.0,1.0,2.041016,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,-4.234375,0.0,-3.230469,0.0,135274.5,0.0,0.0,-3.087891,5.0,0.199951,0.799805,1.0,0.0,50049.734375,789000.0,848691.0,789000.0,12.0,1.0,-1.510742,-1.0,26.0,3.0,1.0,0.0,0.0,0.0,0.0,0.666504,0.0,0.0,0.333252,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.333252,0.0,0.666504,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.666504,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.333252,0.333252,0.333252,0.0,0.0,0.0,0.333252,0.0,0.333252,0.333252,0.0,0.0,0.0,0.0,-13.59375,28.921875,24.671875,0.0,0.0,0.925781,0.074097,0.0,1.345703,5.230469,-0.968262,-0.989746,75723.023438,78068.875,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,0,135000.0,443088.0,30105.0,382500.0,0.018845,27.34375,5.320312,1.438477,0.282227,1,1,0,1,0,0,2.0,2,2,13,0,0,0,0,0,0,0.67334,0.51123,14.0,0.0,14.0,0.0,3.933594,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,-2.878906,0.0,-0.38501,0.0,196017.421875,44295.75,0.0,-1.320312,4.0,0.413086,0.583008,0.719238,0.199951,6007.274902,40759.5,39844.5,61139.25,14.664062,1.0,-1.523438,389.0,15.0,3.0,0.333252,0.666504,0.0,0.0,0.666504,0.0,0.0,0.333252,0.0,1.0,0.666504,0.333252,0.666504,0.333252,0.0,0.666504,0.333252,0.0,0.0,1.0,0.333252,0.333252,0.333252,0.333252,0.0,0.333252,0.0,0.333252,0.0,0.0,0.666504,0.333252,1.0,0.0,0.666504,0.333252,0.0,0.0,0.0,0.666504,0.333252,0.333252,0.333252,0.333252,0.0,0.0,0.0,0.333252,0.0,0.0,0.0,0.0,0.666504,0.0,0.0,0.0,-25.3125,14.304688,11.539062,0.0,0.0,0.922852,0.076904,0.0,1.0,3.539062,-1.817383,-1.859375,5898.866211,5545.412109,1,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
4,1,216000.0,640080.0,31261.5,450000.0,0.035797,23.484375,4.425781,9.0,2.632812,1,1,0,1,1,0,3.0,2,2,11,0,0,0,0,0,0,0.60498,0.463379,0.0,0.0,0.0,0.0,-0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,-1.264648,0.0,1.140625,0.0,386504.09375,290834.09375,0.0,-0.324219,5.0,0.799805,0.199951,0.799805,0.199951,17686.619141,81000.0,95098.5,81000.0,14.0,1.0,-2.537109,35.0,6.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,-28.0,6.0,3.0,0.0,0.0,0.856934,0.142822,0.0,1.0,3.5,-2.246094,-2.263672,17681.152344,17681.152344,1,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [140]:
train.shape

(230633, 205)

### Let's define 3 new features which are better suited for our Analysis
### CREDIT_INCOME_RATIO = AMT_CREDIT/AMT_INCOME_TOTAL  
### ANNUITY_INCOME_RATIO = AMT_ANNUITY/AMT_INCOME_TOTAL
### PAYOVER_TIME_YEARS = AMT_GOODS_PRICE/AMT_ANNUITY


In [69]:
# Defining new features for the training set
train['CREDIT_INCOME_RATIO'] = train['AMT_CREDIT']/train['AMT_INCOME_TOTAL']
train['ANNUITY_INCOME_RATIO'] = train['AMT_ANNUITY']/train['AMT_INCOME_TOTAL']
train['PAYOVER_TIME_YEARS'] = train['AMT_GOODS_PRICE']/train['AMT_ANNUITY']


In [70]:
# Defining new features for the test set
test['CREDIT_INCOME_RATIO'] = test['AMT_CREDIT']/test['AMT_INCOME_TOTAL']
test['ANNUITY_INCOME_RATIO'] = test['AMT_ANNUITY']/test['AMT_INCOME_TOTAL']
test['PAYOVER_TIME_YEARS'] = test['AMT_GOODS_PRICE']/test['AMT_ANNUITY']

In [71]:
# Removing redundant columns from training set
train.drop(columns=['AMT_INCOME_TOTAL','AMT_CREDIT','AMT_GOODS_PRICE','AMT_ANNUITY'], inplace = True)

In [72]:
# Removing redundant columns from test set
test.drop(columns=['AMT_INCOME_TOTAL','AMT_CREDIT','AMT_GOODS_PRICE','AMT_ANNUITY'], inplace = True)

In [181]:
# Getting the head of the training data
train.head()

Unnamed: 0,CNT_CHILDREN,REGION_POPULATION_RELATIVE,YEARS_BIRTH,YEARS_EMPLOYED,YEARS_REGISTRATION,YEARS_ID_PUBLISH,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_2,EXT_SOURCE_3,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,YEARS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,BU_YEARS_CREDIT,BU_CREDIT_DAY_OVERDUE,BU_YEARS_CREDIT_ENDDATE,BU_CNT_CREDIT_PROLONG,BU_AMT_CREDIT_SUM,BU_AMT_CREDIT_SUM_DEBT,BU_AMT_CREDIT_SUM_OVERDUE,BU_YEARS_CREDIT_UPDATE,BU_PREV_BU_LOAN_CNT,BU_CREDIT_ACTIVE_ACTIVE,BU_CREDIT_ACTIVE_CLOSED,BU_CREDIT_TYPE_CONSUMER CREDIT,BU_CREDIT_TYPE_CREDIT CARD,PREV_APP_AMT_ANNUITY,PREV_APP_AMT_APPLICATION,PREV_APP_AMT_CREDIT,PREV_APP_AMT_GOODS_PRICE,PREV_APP_HOUR_APPR_PROCESS_START,PREV_APP_NFLAG_LAST_APPL_IN_DAY,PREV_APP_YEARS_DECISION,PREV_APP_SELLERPLACE_AREA,PREV_APP_CNT_PAYMENT,PREV_APP_PREV_APP_CNT,PREV_APP_NAME_CONTRACT_TYPE_CASH LOANS,PREV_APP_NAME_CONTRACT_TYPE_CONSUMER LOANS,PREV_APP_NAME_CONTRACT_TYPE_REVOLVING LOANS,PREV_APP_WEEKDAY_APPR_PROCESS_START_FRIDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_MONDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_SATURDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_THURSDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_TUESDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_WEDNESDAY,PREV_APP_FLAG_LAST_APPL_PER_CONTRACT_Y,PREV_APP_NAME_CASH_LOAN_PURPOSE_XAP,PREV_APP_NAME_CASH_LOAN_PURPOSE_XNA,PREV_APP_NAME_CONTRACT_STATUS_APPROVED,PREV_APP_NAME_CONTRACT_STATUS_CANCELED,PREV_APP_NAME_CONTRACT_STATUS_REFUSED,PREV_APP_NAME_PAYMENT_TYPE_CASH THROUGH THE BANK,PREV_APP_NAME_PAYMENT_TYPE_XNA,PREV_APP_CODE_REJECT_REASON_HC,PREV_APP_CODE_REJECT_REASON_LIMIT,PREV_APP_CODE_REJECT_REASON_XAP,PREV_APP_NAME_CLIENT_TYPE_NEW,PREV_APP_NAME_CLIENT_TYPE_REFRESHED,PREV_APP_NAME_CLIENT_TYPE_REPEATER,PREV_APP_NAME_GOODS_CATEGORY_AUDIO/VIDEO,PREV_APP_NAME_GOODS_CATEGORY_COMPUTERS,PREV_APP_NAME_GOODS_CATEGORY_CONSUMER ELECTRONICS,PREV_APP_NAME_GOODS_CATEGORY_MOBILE,PREV_APP_NAME_GOODS_CATEGORY_XNA,PREV_APP_NAME_PORTFOLIO_CARDS,PREV_APP_NAME_PORTFOLIO_CASH,PREV_APP_NAME_PORTFOLIO_POS,PREV_APP_NAME_PORTFOLIO_XNA,PREV_APP_NAME_PRODUCT_TYPE_XNA,PREV_APP_NAME_PRODUCT_TYPE_X-SELL,PREV_APP_CHANNEL_TYPE_COUNTRY-WIDE,PREV_APP_CHANNEL_TYPE_CREDIT AND CASH OFFICES,PREV_APP_CHANNEL_TYPE_REGIONAL / LOCAL,PREV_APP_CHANNEL_TYPE_STONE,PREV_APP_NAME_SELLER_INDUSTRY_CONNECTIVITY,PREV_APP_NAME_SELLER_INDUSTRY_CONSUMER ELECTRONICS,PREV_APP_NAME_SELLER_INDUSTRY_XNA,PREV_APP_NAME_YIELD_GROUP_XNA,PREV_APP_NAME_YIELD_GROUP_HIGH,PREV_APP_NAME_YIELD_GROUP_LOW_NORMAL,PREV_APP_NAME_YIELD_GROUP_MIDDLE,PREV_APP_PRODUCT_COMBINATION_CARD STREET,PREV_APP_PRODUCT_COMBINATION_CARD X-SELL,PREV_APP_PRODUCT_COMBINATION_CASH,PREV_APP_PRODUCT_COMBINATION_CASH STREET: HIGH,PREV_APP_PRODUCT_COMBINATION_CASH X-SELL: HIGH,PREV_APP_PRODUCT_COMBINATION_CASH X-SELL: LOW,PREV_APP_PRODUCT_COMBINATION_CASH X-SELL: MIDDLE,PREV_APP_PRODUCT_COMBINATION_POS HOUSEHOLD WITH INTEREST,PREV_APP_PRODUCT_COMBINATION_POS HOUSEHOLD WITHOUT INTEREST,PREV_APP_PRODUCT_COMBINATION_POS INDUSTRY WITH INTEREST,PREV_APP_PRODUCT_COMBINATION_POS MOBILE WITH INTEREST,POS_CASH_MONTHS_BALANCE,POS_CASH_CNT_INSTALMENT,POS_CASH_CNT_INSTALMENT_FUTURE,POS_CASH_SK_DPD,POS_CASH_SK_DPD_DEF,POS_CASH_NAME_CONTRACT_STATUS_ACTIVE,POS_CASH_NAME_CONTRACT_STATUS_COMPLETED,POS_CASH_NAME_CONTRACT_STATUS_SIGNED,INSTALL_PAY_NUM_INSTALMENT_VERSION,INSTALL_PAY_NUM_INSTALMENT_NUMBER,INSTALL_PAY_YEARS_INSTALMENT,INSTALL_PAY_YEARS_ENTRY_PAYMENT,INSTALL_PAY_AMT_INSTALMENT,INSTALL_PAY_AMT_PAYMENT,TARGET,NAME_CONTRACT_TYPE_Cash loans,CODE_GENDER_F,FLAG_OWN_CAR_N,FLAG_OWN_REALTY_Y,NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Family,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Unaccompanied,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Working,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Single / not married,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Business_Entity,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Industry,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Trade,ORGANIZATION_TYPE_Transport,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_XNA,CREDIT_INCOME_RATIO,ANNUITY_INCOME_RATIO,PAYOVER_TIME_YEARS
0,0,0.022797,24.28125,3.857422,8.296875,1.84375,1,1,1,1,1,0,2,2,2,17,0,0,0,0,1,1,0.443115,0.508301,0,0,0,0,1.282227,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,-1.274414,0.0,-0.163452,0.0,168255.0,0.0,0.0,-0.626465,3,0.333252,0.666504,1.0,0.0,9499.589844,85384.796875,88209.0,85384.796875,15.203125,1.0,-0.719727,53.799999,11.335938,5,0.0,1.0,0.0,0.0,0.0,0.600098,0.0,0.0,0.0,1.0,1.0,0.0,0.399902,0.0,0.0,1.0,0.0,0.0,0.0,0.399902,0.399902,0.0,0.600098,0.0,0.600098,0.0,0.399902,0.0,0.0,0.0,0.600098,0.399902,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.399902,0.0,0.600098,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.399902,-11.0,11.453125,6.546875,0.0,0.0,0.90918,0.090881,0.0,1.111328,5.0,-0.868652,-0.983398,10151.355469,10151.355469,0,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,4.8448,0.22584,17.711655
1,0,0.028656,66.125,4.519531,20.40625,12.609375,1,0,0,1,0,0,1,2,2,9,0,0,0,0,0,0,0.680664,0.607422,2,1,2,0,4.484375,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,-1.213867,0.0,2.789062,0.0,900000.0,191412.0,0.0,-0.032867,1,1.0,0.0,1.0,0.0,13661.280273,100335.0,100335.0,150502.5,15.335938,1.0,-1.96582,1166.333374,18.0,3,0.333252,0.666504,0.0,0.0,0.333252,0.0,0.0,0.0,0.0,1.0,0.666504,0.333252,0.666504,0.333252,0.0,0.333252,0.333252,0.0,0.0,1.0,0.333252,0.333252,0.333252,0.333252,0.0,0.333252,0.0,0.333252,0.0,0.0,0.666504,0.333252,1.0,0.0,0.666504,0.333252,0.0,0.0,0.0,0.666504,0.333252,0.333252,0.0,0.0,0.333252,0.0,0.0,0.333252,0.0,0.0,0.0,0.0,0.333252,0.333252,0.0,0.0,-27.5,10.5625,5.5625,0.0,0.0,0.9375,0.0625,0.0,1.055664,4.667969,-2.267578,-2.289062,15835.860352,15564.720703,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0.909091,0.056145,16.19171
2,0,0.031342,61.0,4.519531,4.398438,13.226562,1,0,0,1,0,0,2,2,2,12,0,0,0,0,0,0,0.62207,0.812988,3,1,3,1,2.041016,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,-4.234375,0.0,-3.230469,0.0,135274.5,0.0,0.0,-3.087891,5,0.199951,0.799805,1.0,0.0,50049.734375,789000.0,848691.0,789000.0,12.0,1.0,-1.510742,-1.0,26.0,3,1.0,0.0,0.0,0.0,0.0,0.666504,0.0,0.0,0.333252,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.333252,0.0,0.666504,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.666504,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.333252,0.333252,0.333252,0.0,0.0,0.0,0.333252,0.0,0.333252,0.333252,0.0,0.0,0.0,0.0,-13.59375,28.921875,24.671875,0.0,0.0,0.925781,0.074097,0.0,1.345703,5.230469,-0.968262,-0.989746,75723.023438,78068.875,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2.170259,0.215704,9.100275
3,0,0.018845,27.34375,5.320312,1.438477,0.282227,1,1,0,1,0,0,2,2,2,13,0,0,0,0,0,0,0.67334,0.51123,14,0,14,0,3.933594,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,-2.878906,0.0,-0.38501,0.0,196017.421875,44295.75,0.0,-1.320312,4,0.413086,0.583008,0.719238,0.199951,6007.274902,40759.5,39844.5,61139.25,14.664062,1.0,-1.523438,389.0,15.0,3,0.333252,0.666504,0.0,0.0,0.666504,0.0,0.0,0.333252,0.0,1.0,0.666504,0.333252,0.666504,0.333252,0.0,0.666504,0.333252,0.0,0.0,1.0,0.333252,0.333252,0.333252,0.333252,0.0,0.333252,0.0,0.333252,0.0,0.0,0.666504,0.333252,1.0,0.0,0.666504,0.333252,0.0,0.0,0.0,0.666504,0.333252,0.333252,0.333252,0.333252,0.0,0.0,0.0,0.333252,0.0,0.0,0.0,0.0,0.666504,0.0,0.0,0.0,-25.3125,14.304688,11.539062,0.0,0.0,0.922852,0.076904,0.0,1.0,3.539062,-1.817383,-1.859375,5898.866211,5545.412109,1,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,3.282133,0.223,12.70553
4,1,0.035797,23.484375,4.425781,9.0,2.632812,1,1,0,1,1,0,3,2,2,11,0,0,0,0,0,0,0.60498,0.463379,0,0,0,0,-0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.264648,0.0,1.140625,0.0,386504.09375,290834.09375,0.0,-0.324219,5,0.799805,0.199951,0.799805,0.199951,17686.619141,81000.0,95098.5,81000.0,14.0,1.0,-2.537109,35.0,6.0,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,-28.0,6.0,3.0,0.0,0.0,0.856934,0.142822,0.0,1.0,3.5,-2.246094,-2.263672,17681.152344,17681.152344,1,1,1,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.963333,0.144729,14.394703


In [182]:
# Printing the Shape of the training data
train.shape

(230633, 205)

In [184]:
# Getting the head of the test data
test.head()

Unnamed: 0,CNT_CHILDREN,REGION_POPULATION_RELATIVE,YEARS_BIRTH,YEARS_EMPLOYED,YEARS_REGISTRATION,YEARS_ID_PUBLISH,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EXT_SOURCE_2,EXT_SOURCE_3,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,YEARS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,BU_YEARS_CREDIT,BU_CREDIT_DAY_OVERDUE,BU_YEARS_CREDIT_ENDDATE,BU_CNT_CREDIT_PROLONG,BU_AMT_CREDIT_SUM,BU_AMT_CREDIT_SUM_DEBT,BU_AMT_CREDIT_SUM_OVERDUE,BU_YEARS_CREDIT_UPDATE,BU_PREV_BU_LOAN_CNT,BU_CREDIT_ACTIVE_ACTIVE,BU_CREDIT_ACTIVE_CLOSED,BU_CREDIT_TYPE_CONSUMER CREDIT,BU_CREDIT_TYPE_CREDIT CARD,PREV_APP_AMT_ANNUITY,PREV_APP_AMT_APPLICATION,PREV_APP_AMT_CREDIT,PREV_APP_AMT_GOODS_PRICE,PREV_APP_HOUR_APPR_PROCESS_START,PREV_APP_NFLAG_LAST_APPL_IN_DAY,PREV_APP_YEARS_DECISION,PREV_APP_SELLERPLACE_AREA,PREV_APP_CNT_PAYMENT,PREV_APP_PREV_APP_CNT,PREV_APP_NAME_CONTRACT_TYPE_CASH LOANS,PREV_APP_NAME_CONTRACT_TYPE_CONSUMER LOANS,PREV_APP_NAME_CONTRACT_TYPE_REVOLVING LOANS,PREV_APP_WEEKDAY_APPR_PROCESS_START_FRIDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_MONDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_SATURDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_THURSDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_TUESDAY,PREV_APP_WEEKDAY_APPR_PROCESS_START_WEDNESDAY,PREV_APP_FLAG_LAST_APPL_PER_CONTRACT_Y,PREV_APP_NAME_CASH_LOAN_PURPOSE_XAP,PREV_APP_NAME_CASH_LOAN_PURPOSE_XNA,PREV_APP_NAME_CONTRACT_STATUS_APPROVED,PREV_APP_NAME_CONTRACT_STATUS_CANCELED,PREV_APP_NAME_CONTRACT_STATUS_REFUSED,PREV_APP_NAME_PAYMENT_TYPE_CASH THROUGH THE BANK,PREV_APP_NAME_PAYMENT_TYPE_XNA,PREV_APP_CODE_REJECT_REASON_HC,PREV_APP_CODE_REJECT_REASON_LIMIT,PREV_APP_CODE_REJECT_REASON_XAP,PREV_APP_NAME_CLIENT_TYPE_NEW,PREV_APP_NAME_CLIENT_TYPE_REFRESHED,PREV_APP_NAME_CLIENT_TYPE_REPEATER,PREV_APP_NAME_GOODS_CATEGORY_AUDIO/VIDEO,PREV_APP_NAME_GOODS_CATEGORY_COMPUTERS,PREV_APP_NAME_GOODS_CATEGORY_CONSUMER ELECTRONICS,PREV_APP_NAME_GOODS_CATEGORY_MOBILE,PREV_APP_NAME_GOODS_CATEGORY_XNA,PREV_APP_NAME_PORTFOLIO_CARDS,PREV_APP_NAME_PORTFOLIO_CASH,PREV_APP_NAME_PORTFOLIO_POS,PREV_APP_NAME_PORTFOLIO_XNA,PREV_APP_NAME_PRODUCT_TYPE_XNA,PREV_APP_NAME_PRODUCT_TYPE_X-SELL,PREV_APP_CHANNEL_TYPE_COUNTRY-WIDE,PREV_APP_CHANNEL_TYPE_CREDIT AND CASH OFFICES,PREV_APP_CHANNEL_TYPE_REGIONAL / LOCAL,PREV_APP_CHANNEL_TYPE_STONE,PREV_APP_NAME_SELLER_INDUSTRY_CONNECTIVITY,PREV_APP_NAME_SELLER_INDUSTRY_CONSUMER ELECTRONICS,PREV_APP_NAME_SELLER_INDUSTRY_XNA,PREV_APP_NAME_YIELD_GROUP_XNA,PREV_APP_NAME_YIELD_GROUP_HIGH,PREV_APP_NAME_YIELD_GROUP_LOW_NORMAL,PREV_APP_NAME_YIELD_GROUP_MIDDLE,PREV_APP_PRODUCT_COMBINATION_CARD STREET,PREV_APP_PRODUCT_COMBINATION_CARD X-SELL,PREV_APP_PRODUCT_COMBINATION_CASH,PREV_APP_PRODUCT_COMBINATION_CASH STREET: HIGH,PREV_APP_PRODUCT_COMBINATION_CASH X-SELL: HIGH,PREV_APP_PRODUCT_COMBINATION_CASH X-SELL: LOW,PREV_APP_PRODUCT_COMBINATION_CASH X-SELL: MIDDLE,PREV_APP_PRODUCT_COMBINATION_POS HOUSEHOLD WITH INTEREST,PREV_APP_PRODUCT_COMBINATION_POS HOUSEHOLD WITHOUT INTEREST,PREV_APP_PRODUCT_COMBINATION_POS INDUSTRY WITH INTEREST,PREV_APP_PRODUCT_COMBINATION_POS MOBILE WITH INTEREST,POS_CASH_MONTHS_BALANCE,POS_CASH_CNT_INSTALMENT,POS_CASH_CNT_INSTALMENT_FUTURE,POS_CASH_SK_DPD,POS_CASH_SK_DPD_DEF,POS_CASH_NAME_CONTRACT_STATUS_ACTIVE,POS_CASH_NAME_CONTRACT_STATUS_COMPLETED,POS_CASH_NAME_CONTRACT_STATUS_SIGNED,INSTALL_PAY_NUM_INSTALMENT_VERSION,INSTALL_PAY_NUM_INSTALMENT_NUMBER,INSTALL_PAY_YEARS_INSTALMENT,INSTALL_PAY_YEARS_ENTRY_PAYMENT,INSTALL_PAY_AMT_INSTALMENT,INSTALL_PAY_AMT_PAYMENT,TARGET,NAME_CONTRACT_TYPE_Cash loans,CODE_GENDER_F,FLAG_OWN_CAR_N,FLAG_OWN_REALTY_Y,NAME_TYPE_SUITE_Children,NAME_TYPE_SUITE_Family,"NAME_TYPE_SUITE_Spouse, partner",NAME_TYPE_SUITE_Unaccompanied,NAME_INCOME_TYPE_Commercial associate,NAME_INCOME_TYPE_Pensioner,NAME_INCOME_TYPE_State servant,NAME_INCOME_TYPE_Working,NAME_EDUCATION_TYPE_Higher education,NAME_EDUCATION_TYPE_Incomplete higher,NAME_EDUCATION_TYPE_Lower secondary,NAME_EDUCATION_TYPE_Secondary / secondary special,NAME_FAMILY_STATUS_Civil marriage,NAME_FAMILY_STATUS_Married,NAME_FAMILY_STATUS_Separated,NAME_FAMILY_STATUS_Single / not married,NAME_HOUSING_TYPE_House / apartment,NAME_HOUSING_TYPE_Municipal apartment,NAME_HOUSING_TYPE_Office apartment,NAME_HOUSING_TYPE_Rented apartment,NAME_HOUSING_TYPE_With parents,WEEKDAY_APPR_PROCESS_START_FRIDAY,WEEKDAY_APPR_PROCESS_START_MONDAY,WEEKDAY_APPR_PROCESS_START_SATURDAY,WEEKDAY_APPR_PROCESS_START_THURSDAY,WEEKDAY_APPR_PROCESS_START_TUESDAY,WEEKDAY_APPR_PROCESS_START_WEDNESDAY,ORGANIZATION_TYPE_Agriculture,ORGANIZATION_TYPE_Bank,ORGANIZATION_TYPE_Business_Entity,ORGANIZATION_TYPE_Construction,ORGANIZATION_TYPE_Government,ORGANIZATION_TYPE_Housing,ORGANIZATION_TYPE_Industry,ORGANIZATION_TYPE_Kindergarten,ORGANIZATION_TYPE_Medicine,ORGANIZATION_TYPE_Military,ORGANIZATION_TYPE_Police,ORGANIZATION_TYPE_Postal,ORGANIZATION_TYPE_Restaurant,ORGANIZATION_TYPE_School,ORGANIZATION_TYPE_Security,ORGANIZATION_TYPE_Security Ministries,ORGANIZATION_TYPE_Self-employed,ORGANIZATION_TYPE_Services,ORGANIZATION_TYPE_Trade,ORGANIZATION_TYPE_Transport,ORGANIZATION_TYPE_University,ORGANIZATION_TYPE_XNA,CREDIT_INCOME_RATIO,ANNUITY_INCOME_RATIO,PAYOVER_TIME_YEARS
0,1,0.022797,34.1875,0.750488,17.953125,7.472656,1,1,0,1,0,0,2.0,2,2,9,0,0,0,0,1,1,0.25,0.479492,7.0,0.0,7.0,0.0,-0.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,-6.558594,0.0,-3.9375,0.0,413628.75,0.0,0.0,-3.287109,4.0,0.0,1.0,0.75,0.25,11995.384766,105967.445312,117496.351562,128700.0,12.75,1.0,-2.162109,79.099998,12.0,4.0,0.317139,0.584473,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.682617,0.283203,0.748535,0.0,0.0,0.681152,0.312012,0.0,0.0,1.0,0.349609,0.0,0.549316,0.0,0.0,0.0,0.0,0.42041,0.0,0.111084,0.563477,0.0,0.714355,0.125,0.370117,0.29541,0.0,0.0,0.0,0.30249,0.365967,0.22876,0.142822,0.111084,0.255127,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-28.59375,12.0,6.949219,0.0,0.0,0.916504,0.076904,0.0,1.017578,6.046875,-2.179688,-2.210938,12582.792969,12233.078125,1,1,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.762286,0.192914,14.810427
1,1,0.032562,47.4375,26.96875,9.640625,2.164062,1,1,0,1,0,1,3.0,1,1,12,0,0,0,0,0,0,0.709961,0.312256,0.0,0.0,0.0,0.0,5.15625,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,6.0,-0.644531,0.0,1.526367,0.0,330246.90625,216642.59375,0.0,-0.312744,5.0,0.600098,0.399902,0.600098,0.399902,13406.697266,108000.0,123415.203125,108000.0,11.796875,1.0,-4.195312,0.0,7.199219,5.0,0.799805,0.0,0.199951,0.199951,0.199951,0.0,0.199951,0.0,0.399902,1.0,0.199951,0.799805,1.0,0.0,0.0,0.799805,0.199951,0.0,0.0,1.0,0.199951,0.0,0.799805,0.0,0.0,0.0,0.0,1.0,0.199951,0.799805,0.0,0.0,0.0,0.600098,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.199951,0.399902,0.199951,0.199951,0.199951,0.0,0.0,0.199951,0.199951,0.199951,0.199951,0.0,0.0,0.0,0.0,-48.9375,9.898438,4.949219,0.0,0.0,0.899902,0.099976,0.0,0.390381,22.140625,-2.316406,-2.328125,10483.912109,9203.073242,0,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.01776,0.16428,14.609203
2,0,0.010033,24.671875,0.912109,1.506836,4.59375,1,1,0,1,1,0,2.0,2,2,4,0,0,0,0,0,0,0.49292,0.51123,4.0,0.0,4.0,0.0,4.519531,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,-3.320312,0.0,-0.641113,0.0,62614.347656,0.0,0.0,-1.663086,3.0,0.666504,0.333252,0.333252,0.666504,11509.560547,104466.375,155791.125,139288.5,7.0,1.0,-4.226562,787.5,15.0,4.0,0.0,0.75,0.25,0.0,0.75,0.0,0.25,0.0,0.0,1.0,1.0,0.0,0.5,0.0,0.5,0.0,1.0,0.5,0.0,0.5,0.75,0.0,0.25,0.0,0.75,0.0,0.0,0.25,0.25,0.0,0.75,0.0,0.75,0.25,1.0,0.0,0.0,0.0,0.25,0.75,0.0,0.25,0.5,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.75,0.0,0.0,0.0,-46.5,15.9375,8.8125,0.0,0.0,0.9375,0.0625,0.0,0.195068,29.25,-2.21875,-2.228516,5405.318848,5405.318848,0,1,1,1,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,9.584,0.28136,28.433323
3,0,0.031342,36.8125,12.585938,19.09375,11.625,1,1,0,1,0,0,1.0,2,2,9,0,0,0,0,0,0,0.697266,0.51123,0.0,0.0,0.0,0.0,-0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,-2.878906,0.0,-0.38501,0.0,196017.421875,44295.75,0.0,-1.320312,4.0,0.413086,0.583008,0.719238,0.199951,11443.149414,41518.285156,46004.144531,58125.601562,9.570312,1.0,-3.015625,7.571429,7.601562,7.0,0.714355,0.285645,0.0,0.0,0.0,0.571289,0.142822,0.0,0.0,1.0,0.285645,0.714355,0.714355,0.285645,0.0,0.714355,0.285645,0.0,0.0,1.0,0.285645,0.142822,0.571289,0.0,0.285645,0.0,0.0,0.714355,0.0,0.428467,0.285645,0.285645,0.571289,0.428467,0.0,0.714355,0.0,0.285645,0.0,0.285645,0.714355,0.285645,0.428467,0.142822,0.142822,0.0,0.0,0.285645,0.0,0.142822,0.142822,0.142822,0.285645,0.0,0.0,0.0,-46.875,7.0,3.583984,0.0,0.0,0.791504,0.166626,0.041656,1.111328,4.332031,-4.5,-4.527344,17744.042969,17744.042969,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.226333,0.120667,8.977901
4,0,0.030762,49.96875,1.755859,26.8125,4.847656,1,1,0,1,1,0,2.0,2,2,12,0,0,0,0,0,0,0.159668,0.531738,2.0,1.0,2.0,1.0,0.537109,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,-2.910156,0.0,-3.164062,0.0,98697.484375,34698.375,0.0,-1.728516,4.0,0.25,0.75,0.75,0.25,6116.52002,40719.75,42825.0,81439.5,14.335938,1.0,-0.814453,385.0,6.0,6.0,0.0,0.333252,0.666504,0.0,0.833496,0.0,0.0,0.166626,0.0,1.0,1.0,0.0,0.166626,0.5,0.333252,0.333252,0.666504,0.0,0.166626,0.666504,0.166626,0.0,0.833496,0.0,0.0,0.166626,0.0,0.666504,0.166626,0.0,0.333252,0.5,0.833496,0.166626,0.333252,0.666504,0.0,0.0,0.0,0.166626,0.666504,0.666504,0.0,0.0,0.333252,0.5,0.166626,0.0,0.0,0.0,0.0,0.0,0.166626,0.0,0.0,0.0,-12.0,12.0,6.0,0.0,0.0,0.922852,0.076904,0.0,1.0,6.5,-0.939941,-0.987793,6037.327637,6037.327637,1,1,0,1,1,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,4.4224,0.28792,13.892748


In [185]:
# Saving the cleaned unbalanced training set to a csv file
train.to_csv('train_clean_FE.csv',index=False)

In [186]:
# Saving the cleaned  test dataset to a csv file
test.to_csv('test_clean_FE.csv',index=False)