# Data Analysis for Home Credit Default Risk

In [302]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
DATA_DIR = '../data'
APP_TRAIN_FILENAME = 'application_train.csv'
APP_TEST_FILENAME = 'application_test.csv'
BUREAU_FILENAME = 'bereau.csv'
BUREAU_BALANCE_FILENAME = 'bureau_balance.csv'
CREDIT_CARD_BALANCE_FILENAME = 'credit_card_balance.csv'
INSTALLMENTS_PAYMENTS = 'installments_payments.csv'
POS_CACHE_BALANCE_FILENAME = 'POS_CACHE_balance.csv'
PREVIOUS_APP_FILENAME = 'previous_application.csv'

## User Information

In [None]:
APP_TRAIN_PATH = os.path.join(DATA_DIR, APP_TRAIN_FILENAME)
user_df = pd.read_csv(APP_TRAIN_PATH)
user_df.info()

In [None]:
user_df.head()

In [None]:
user_df.describe()

In [None]:
for col in user_df.columns:
    print('--------%s--------'%(col,))
    print(user_df[col].value_counts())

In [None]:
for col in user_df.columns:
    print('--------%s--------'%(col,))
    print('contains Nan: %s'%(user_df[col].isnull().value_counts()))
    print(user_df[col].describe())

- `TARGET` 正样本(违约用户)比例较低, 正负样本比例不均衡.
- `CODE_GENDER` 有缺失值, 应该如何处理?
- `CNT_CHILDREN` 孩子数目最高达到19, 是否应该当做异常值进行处理?

### NAME_CONTRACT_TYPE

In [None]:
user_df['NAME_CONTRACT_TYPE'].unique()

In [None]:
user_df['NAME_CONTRACT_TYPE'].describe()

In [None]:
user_df['NAME_CONTRACT_TYPE'].value_counts().plot(kind='bar')

### CODE_GENDER

In [None]:
user_df['CODE_GENDER'].unique()

In [None]:
user_df['CODE_GENDER'].value_counts()

In [None]:
user_df['CODE_GENDER'].value_counts().plot(kind='bar')

### FLAG_OWN_CAR

In [None]:
user_df['FLAG_OWN_CAR'].unique()

In [None]:
user_df['FLAG_OWN_CAR'].value_counts().plot(kind='bar')

### FLAG_OWN_REALTY

In [None]:
user_df['FLAG_OWN_REALTY'].unique()

In [None]:
user_df['FLAG_OWN_REALTY'].value_counts().plot(kind='bar')

### CNT_CHILDREN

In [None]:
user_df['CNT_CHILDREN'].unique()

In [None]:
user_df['CNT_CHILDREN'].describe()

In [None]:
(user_df['CNT_CHILDREN'] > 10).value_counts()

In [None]:
user_df['CNT_CHILDREN'].value_counts(sort=False)

In [None]:
user_df['CNT_CHILDREN'].value_counts(sort=False).plot(kind='bar', logy=True, use_index=True)

### AMT_INCOME_TOTAL

In [None]:
user_df['AMT_INCOME_TOTAL'].describe()

In [None]:
user_df['AMT_INCOME_TOTAL'].plot(kind='box', logy=True)

### AMT_CREDIT

In [None]:
user_df['AMT_CREDIT'].describe()

In [None]:
user_df['AMT_CREDIT'].plot(kind='box', logy=True)

### AMT_ANNUITY

In [None]:
user_df['AMT_ANNUITY'].describe()

In [None]:
user_df['AMT_ANNUITY'].isnull().value_counts()

In [None]:
user_df['AMT_ANNUITY'].plot(kind='box', logy=True)

### AMT_GOODS_PRICE

In [None]:
user_df['AMT_GOODS_PRICE'].describe()

In [None]:
user_df['AMT_GOODS_PRICE'].isnull().value_counts()

In [None]:
user_df['AMT_GOODS_PRICE'].plot(kind='box')

In [None]:
user_df['REGION_POPULATION_RELATIVE'].hist(bins=20)

### DAYS_BIRTH

In [None]:
user_df['DAYS_BIRTH'].describe()

In [None]:
user_df['DAYS_BIRTH'].isnull().value_counts()

In [None]:
user_df['DAYS_BIRTH'].hist(bins=100)

### DAYS_EMPLOYED

In [None]:
user_df['DAYS_EMPLOYED'].describe()

In [None]:
user_df['DAYS_EMPLOYED'].unique()

In [None]:
user_df['DAYS_EMPLOYED'].isnull().value_counts()

In [None]:
user_df['DAYS_EMPLOYED'][user_df['DAYS_EMPLOYED'] > 0].count()

In [None]:
user_df['DAYS_EMPLOYED'][user_df['DAYS_EMPLOYED'] == 365243].count()

In [None]:
user_df['DAYS_EMPLOYED'].hist(bins=100)

### DAYS_REGISTRATION

In [None]:
user_df['DAYS_REGISTRATION'].describe()

In [None]:
user_df['DAYS_REGISTRATION'].isnull().value_counts()

In [None]:
user_df['DAYS_REGISTRATION'].hist(bins=20)

### DAYS_ID_PUBLISH

In [None]:
user_df['DAYS_ID_PUBLISH'].describe()

In [None]:
user_df['DAYS_ID_PUBLISH'].isnull().value_counts()

In [None]:
user_df['DAYS_ID_PUBLISH'].hist(bins=100)

### OWN_CAR_AGE

In [None]:
user_df['OWN_CAR_AGE'].describe()

In [None]:
user_df['OWN_CAR_AGE'].isnull().value_counts()

In [None]:
user_df['OWN_CAR_AGE'][user_df['OWN_CAR_AGE'] > 20].count()

In [None]:
user_df['OWN_CAR_AGE'][user_df['OWN_CAR_AGE'] > 60].count()

In [None]:
user_df['OWN_CAR_AGE'].hist(bins=100)

### FLAG_MOBIL

In [None]:
user_df['FLAG_MOBIL'].describe()

In [None]:
user_df['FLAG_MOBIL'].value_counts()

### FLAG_EMP_PHONE

In [None]:
user_df['FLAG_EMP_PHONE'].describe()

In [None]:
user_df['FLAG_EMP_PHONE'].value_counts()

In [None]:
user_df['FLAG_EMP_PHONE'].value_counts().plot(kind='bar')

### FLAG_WORK_PHONE

In [None]:
user_df['FLAG_WORK_PHONE'].describe()

In [None]:
user_df['FLAG_WORK_PHONE'].value_counts()

In [None]:
user_df['FLAG_WORK_PHONE'].value_counts().plot(kind='bar')

### FLAG_CONT_MOBILE

In [None]:
user_df['FLAG_CONT_MOBILE'].describe()

In [None]:
user_df['FLAG_CONT_MOBILE'].value_counts().plot(kind='bar')

### FLAG_PHONE

In [None]:
user_df['FLAG_PHONE'].describe()

In [None]:
user_df['FLAG_PHONE'].isnull().value_counts()

In [None]:
user_df['FLAG_PHONE'].value_counts().plot(kind='bar')

### FLAG_EMAIL

In [None]:
user_df['FLAG_EMAIL'].describe()

In [None]:
user_df['FLAG_EMAIL'].isnull().value_counts()

In [None]:
user_df['FLAG_EMAIL'].value_counts().plot(kind='bar')

### OCCUPATION_TYPE

In [None]:
user_df['OCCUPATION_TYPE'].describe()

In [None]:
user_df['OCCUPATION_TYPE'].isnull().value_counts()

In [None]:
user_df['OCCUPATION_TYPE'].unique()

In [None]:
user_df['OCCUPATION_TYPE'].value_counts().plot(kind='bar')

### CNT_FAM_MEMBERS

In [None]:
user_df['CNT_FAM_MEMBERS'].describe()

In [None]:
user_df['CNT_FAM_MEMBERS'].isnull().value_counts()

In [None]:
user_df['CNT_FAM_MEMBERS'].unique()

In [None]:
user_df['CNT_FAM_MEMBERS'].value_counts().plot(kind='bar', logy=True)

### REGION_RATING_CLIENT

In [None]:
user_df['REGION_RATING_CLIENT'].describe()

In [None]:
user_df['REGION_RATING_CLIENT'].unique()

In [None]:
user_df['REGION_RATING_CLIENT'].value_counts().plot(kind='bar')

### REGION_RATING_CLIENT_W_CITY

In [None]:
user_df['REGION_RATING_CLIENT_W_CITY'].describe()

In [None]:
user_df['REGION_RATING_CLIENT_W_CITY'].unique()

In [None]:
user_df['REGION_RATING_CLIENT_W_CITY'].value_counts().plot(kind='bar')

### WEEKDAY_APPR_PROCESS_START

In [None]:
user_df['WEEKDAY_APPR_PROCESS_START'].describe()

In [None]:
user_df['WEEKDAY_APPR_PROCESS_START'].isnull().value_counts()

In [None]:
user_df['WEEKDAY_APPR_PROCESS_START'].value_counts().plot(kind='bar')

### HOUR_APPR_PROCESS_START

In [None]:
user_df['HOUR_APPR_PROCESS_START'].describe()

In [None]:
user_df['HOUR_APPR_PROCESS_START'].isnull().value_counts()

In [None]:
user_df['HOUR_APPR_PROCESS_START'].value_counts().plot(kind='bar')

### REG_REGION_NOT_LIVE_REGION

In [None]:
user_df['REG_REGION_NOT_LIVE_REGION'].describe()

In [None]:
user_df['REG_REGION_NOT_LIVE_REGION'].isnull().value_counts()

In [None]:
user_df['REG_REGION_NOT_LIVE_REGION'].value_counts().plot(kind='bar')

### REG_REGION_NOT_WORK_REGION

In [None]:
user_df['REG_REGION_NOT_WORK_REGION'].describe()

In [None]:
user_df['REG_REGION_NOT_WORK_REGION'].isnull().value_counts()

In [None]:
user_df['REG_REGION_NOT_WORK_REGION'].value_counts().plot(kind='bar')

### LIVE_REGION_NOT_WORK_REGION

In [None]:
user_df['LIVE_REGION_NOT_WORK_REGION'].describe()

In [None]:
user_df['LIVE_REGION_NOT_WORK_REGION'].value_counts().plot(kind='bar')

### REG_CITY_NOT_LIVE_CITY

In [None]:
user_df['REG_CITY_NOT_LIVE_CITY'].describe()

In [None]:
user_df['REG_CITY_NOT_LIVE_CITY'].value_counts().plot(kind='bar')

### REG_CITY_NOT_WORK_CITY

In [None]:
user_df['REG_CITY_NOT_WORK_CITY'].describe()

In [None]:
user_df['REG_CITY_NOT_WORK_CITY'].value_counts().plot(kind='bar')

### LIVE_CITY_NOT_WORK_CITY

In [None]:
user_df['LIVE_CITY_NOT_WORK_CITY'].describe()

In [None]:
user_df['LIVE_CITY_NOT_WORK_CITY'].value_counts().plot(kind='bar')

### ORGANIZATION_TYPE

In [None]:
user_df['ORGANIZATION_TYPE'].describe()

In [None]:
user_df['ORGANIZATION_TYPE'].unique()

In [None]:
user_df['ORGANIZATION_TYPE'].isnull().value_counts()

In [None]:
user_df['ORGANIZATION_TYPE'].value_counts().plot(kind='bar', figsize=[16, 9], logy=True)

### EXT_SOURCE_1

In [None]:
user_df['EXT_SOURCE_1'].describe()

In [None]:
user_df['EXT_SOURCE_1'].isnull().value_counts()

In [None]:
user_df['EXT_SOURCE_1'].hist(bins=100)

### EXT_SOURCE_2

In [None]:
user_df['EXT_SOURCE_2'].describe()

In [None]:
user_df['EXT_SOURCE_2'].isnull().value_counts()

In [None]:
user_df['EXT_SOURCE_2'].hist(bins=100)

### EXT_SOURCE_3

In [None]:
user_df['EXT_SOURCE_3'].describe()

In [None]:
user_df['EXT_SOURCE_3'].isnull().value_counts()

In [None]:
user_df['EXT_SOURCE_3'].hist(bins=100)

## Building Information

### APARTMENTS_AVG

In [None]:
user_df['APARTMENTS_AVG'].describe()

In [None]:
user_df['APARTMENTS_AVG'].isnull().value_counts()

In [None]:
user_df['APARTMENTS_AVG'].hist(bins=20)

### BASEMENTAREA_AVG

In [None]:
user_df['BASEMENTAREA_AVG'].describe()

In [None]:
user_df['BASEMENTAREA_AVG'].isnull().value_counts()

In [None]:
user_df['BASEMENTAREA_AVG'].hist(bins=20)

### YEARS_BEGINEXPLUATATION_AVG

In [None]:
user_df['YEARS_BEGINEXPLUATATION_AVG'].describe()

In [None]:
user_df['YEARS_BEGINEXPLUATATION_AVG'].isnull().value_counts()

In [None]:
user_df['YEARS_BEGINEXPLUATATION_AVG'].hist(bins=20)

### YEARS_BUILD_AVG

In [None]:
user_df['YEARS_BUILD_AVG'].describe()

In [None]:
user_df['YEARS_BUILD_AVG'].isnull().value_counts()

In [None]:
user_df['YEARS_BUILD_AVG'].hist(bins=20)

### COMMONAREA_AVG

In [None]:
user_df['COMMONAREA_AVG'].describe()

In [None]:
user_df['COMMONAREA_AVG'].isnull().value_counts()

In [None]:
user_df['COMMONAREA_AVG'].hist(bins=20)

### ELEVATORS_AVG

In [None]:
user_df['ELEVATORS_AVG'].describe()

In [None]:
user_df['ELEVATORS_AVG'].isnull().value_counts()

In [None]:
user_df['ELEVATORS_AVG'].hist(bins=20)

### ENTRANCES_AVG

In [None]:
user_df['ENTRANCES_AVG'].describe()

In [None]:
user_df['ENTRANCES_AVG'].isnull().value_counts()

In [None]:
user_df['ENTRANCES_AVG'].hist(bins=20)

### FLOORSMAX_AVG

In [None]:
user_df['FLOORSMAX_AVG'].describe()

In [None]:
user_df['FLOORSMAX_AVG'].isnull().value_counts()

In [None]:
user_df['FLOORSMAX_AVG'].hist(bins=20)

### FLOORSMIN_AVG

In [None]:
user_df['FLOORSMIN_AVG'].describe()

In [None]:
user_df['FLOORSMIN_AVG'].isnull().value_counts()

In [None]:
user_df['FLOORSMIN_AVG'].hist(bins=20)

### LANDAREA_AVG

In [None]:
user_df['LANDAREA_AVG'].describe()

In [None]:
user_df['LANDAREA_AVG'].isnull().value_counts()

In [None]:
user_df['LANDAREA_AVG'].hist(bins=20)

### LIVINGAPARTMENTS_AVG

In [None]:
user_df['LIVINGAPARTMENTS_AVG'].describe()

In [None]:
user_df['LIVINGAPARTMENTS_AVG'].isnull().value_counts()

In [None]:
user_df['LIVINGAPARTMENTS_AVG'].hist(bins=20)

### LIVINGAREA_AVG

In [None]:
user_df['LIVINGAREA_AVG'].describe()

In [None]:
user_df['LIVINGAREA_AVG'].isnull().value_counts()

In [None]:
user_df['LIVINGAREA_AVG'].hist(bins=20)

### NONLIVINGAPARTMENTS_AVG

In [None]:
user_df['NONLIVINGAPARTMENTS_AVG'].describe()

In [None]:
user_df['NONLIVINGAPARTMENTS_AVG'].isnull().value_counts()

In [None]:
user_df['NONLIVINGAPARTMENTS_AVG'].hist(bins=20)

### NONLIVINGAREA_AVG

In [None]:
user_df['NONLIVINGAREA_AVG'].describe()

In [None]:
user_df['NONLIVINGAREA_AVG'].isnull().value_counts()

In [None]:
user_df['NONLIVINGAREA_AVG'].hist(bins=20)

### APARTMENTS_MODE

In [None]:
user_df['APARTMENTS_MODE'].describe()

In [None]:
user_df['APARTMENTS_MODE'].isnull().value_counts()

In [None]:
user_df['APARTMENTS_MODE'].hist(bins=20)

### BASEMENTAREA_MODE

In [None]:
user_df['BASEMENTAREA_MODE'].describe()

In [None]:
user_df['BASEMENTAREA_MODE'].isnull().value_counts()

In [None]:
user_df['BASEMENTAREA_MODE'].hist(bins=20)

### YEARS_BEGINEXPLUATATION_MODE

In [None]:
user_df['YEARS_BEGINEXPLUATATION_MODE'].describe()

In [None]:
user_df['YEARS_BEGINEXPLUATATION_MODE'].isnull().value_counts()

In [None]:
user_df['YEARS_BEGINEXPLUATATION_MODE'].hist(bins=20)

### YEARS_BUILD_MODE

In [None]:
user_df['COMMONAREA_MODE'].describe()

In [None]:
user_df['COMMONAREA_MODE'].isnull().value_counts()

In [None]:
user_df['COMMONAREA_MODE'].hist(bins=20)

### ELEVATORS_MODE

In [None]:
user_df['ELEVATORS_MODE'].describe()

In [None]:
user_df['ELEVATORS_MODE'].isnull().value_counts()

In [None]:
user_df['ELEVATORS_MODE'].hist(bins=20)

### ENTRANCES_MODE

In [None]:
user_df['ENTRANCES_MODE'].describe()

In [None]:
user_df['ENTRANCES_MODE'].isnull().value_counts()

In [None]:
user_df['ENTRANCES_MODE'].hist(bins=20)

### FLOORSMAX_MODE

In [None]:
user_df['FLOORSMAX_MODE'].describe()

In [None]:
user_df['FLOORSMAX_MODE'].isnull().value_counts()

In [None]:
user_df['FLOORSMAX_MODE'].hist(bins=20)

### FLOORSMIN_MODE

In [None]:
user_df['FLOORSMIN_MODE'].describe()

In [None]:
user_df['FLOORSMIN_MODE'].isnull().value_counts()

In [None]:
user_df['FLOORSMIN_MODE'].hist(bins=20)

### LANDAREA_MODE

In [None]:
user_df['LANDAREA_MODE'].describe()

In [None]:
user_df['LANDAREA_MODE'].isnull().value_counts()

In [None]:
user_df['LANDAREA_MODE'].hist(bins=20)

### LIVINGAPARTMENTS_MODE

In [None]:
user_df['LIVINGAPARTMENTS_MODE'].describe()

In [None]:
user_df['LIVINGAPARTMENTS_MODE'].isnull().value_counts()

In [None]:
user_df['LIVINGAPARTMENTS_MODE'].hist(bins=20)

### LIVINGAREA_MODE

In [None]:
user_df['LIVINGAREA_MODE'].describe()

In [None]:
user_df['LIVINGAREA_MODE'].isnull().value_counts()

In [None]:
user_df['LIVINGAREA_MODE'].hist(bins=20)

### NONLIVINGAPARTMENTS_MODE

In [None]:
user_df['NONLIVINGAPARTMENTS_MODE'].describe()

In [None]:
user_df['NONLIVINGAPARTMENTS_MODE'].isnull().value_counts()

In [None]:
user_df['NONLIVINGAPARTMENTS_MODE'].hist(bins=20)

### NONLIVINGAREA_MODE

In [None]:
user_df['NONLIVINGAREA_MODE'].describe()

In [None]:
user_df['NONLIVINGAREA_MODE'].isnull().value_counts()

In [None]:
user_df['NONLIVINGAREA_MODE'].hist(bins=20)

### APARTMENTS_MEDI

In [None]:
user_df['APARTMENTS_MEDI'].describe()

In [None]:
user_df['APARTMENTS_MEDI'].isnull().value_counts()

In [None]:
user_df['APARTMENTS_MEDI'].hist(bins=20)

### BASEMENTAREA_MEDI

In [None]:
user_df['BASEMENTAREA_MEDI'].describe()

In [None]:
user_df['BASEMENTAREA_MEDI'].isnull().value_counts()

In [None]:
user_df['BASEMENTAREA_MEDI'].hist(bins=20)

### YEARS_BEGINEXPLUATATION_MEDI

In [None]:
user_df['YEARS_BEGINEXPLUATATION_MEDI'].describe()

In [None]:
user_df['YEARS_BEGINEXPLUATATION_MEDI'].isnull().value_counts()

In [None]:
user_df['YEARS_BEGINEXPLUATATION_MEDI'].hist(bins=20)

### YEARS_BUILD_MEDI

In [None]:
user_df['YEARS_BUILD_MEDI'].describe()

In [None]:
user_df['YEARS_BUILD_MEDI'].isnull().value_counts()

In [None]:
user_df['YEARS_BUILD_MEDI'].hist(bins=20)

### COMMONAREA_MEDI

In [None]:
user_df['COMMONAREA_MEDI'].describe()

In [None]:
user_df['COMMONAREA_MEDI'].isnull().value_counts()

In [None]:
user_df['COMMONAREA_MEDI'].hist(bins=20)

### ELEVATORS_MEDI

In [None]:
user_df['ELEVATORS_MEDI'].describe()

In [None]:
user_df['ELEVATORS_MEDI'].isnull().value_counts()

In [None]:
user_df['ELEVATORS_MEDI'].hist(bins=20)

### ENTRANCES_MEDI

In [None]:
user_df['ENTRANCES_MEDI'].describe()

In [None]:
user_df['ENTRANCES_MEDI'].isnull().value_counts()

In [None]:
user_df['ENTRANCES_MEDI'].hist(bins=20)

### FLOORSMAX_MEDI

In [None]:
user_df['FLOORSMAX_MEDI'].describe()

In [None]:
user_df['FLOORSMAX_MEDI'].isnull().value_counts()

In [None]:
user_df['FLOORSMAX_MEDI'].hist(bins=20)

### FLOORSMIN_MEDI

In [None]:
user_df['FLOORSMIN_MEDI'].describe()

In [None]:
user_df['FLOORSMIN_MEDI'].isnull().value_counts()

In [None]:
user_df['FLOORSMIN_MEDI'].hist(bins=20)

### LANDAREA_MEDI

In [None]:
user_df['LANDAREA_MEDI'].describe()

In [None]:
user_df['LANDAREA_MEDI'].isnull().value_counts()

In [None]:
user_df['LANDAREA_MEDI'].hist(bins=20)

### LIVINGAPARTMENTS_MEDI

In [None]:
user_df['LIVINGAPARTMENTS_MEDI'].describe()

In [None]:
user_df['LIVINGAPARTMENTS_MEDI'].isnull().value_counts()

In [None]:
user_df['LIVINGAPARTMENTS_MEDI'].hist(bins=20)

### LIVINGAREA_MEDI

In [None]:
user_df['LIVINGAREA_MEDI'].describe()

In [None]:
user_df['LIVINGAREA_MEDI'].isnull().value_counts()

In [None]:
user_df['LIVINGAREA_MEDI'].hist(bins=20)

### NONLIVINGAPARTMENTS_MEDI

In [None]:
user_df['NONLIVINGAPARTMENTS_MEDI'].describe()

In [None]:
user_df['NONLIVINGAPARTMENTS_MEDI'].isnull().value_counts()

In [None]:
user_df['NONLIVINGAPARTMENTS_MEDI'].hist(bins=20)

### NONLIVINGAREA_MEDI

In [None]:
user_df['NONLIVINGAREA_MEDI'].describe()

In [None]:
user_df['NONLIVINGAREA_MEDI'].isnull().value_counts()

In [None]:
user_df['NONLIVINGAREA_MEDI'].hist(bins=20)

### FONDKAPREMONT_MODE

In [None]:
user_df['FONDKAPREMONT_MODE'].describe()

In [None]:
user_df['FONDKAPREMONT_MODE'].value_counts()

In [None]:
user_df['FONDKAPREMONT_MODE'].isnull().value_counts()

In [None]:
user_df['FONDKAPREMONT_MODE'].value_counts().plot(kind='bar')

### HOUSETYPE_MODE

In [None]:
user_df['HOUSETYPE_MODE'].describe()

In [None]:
user_df['HOUSETYPE_MODE'].isnull().value_counts()

In [None]:
user_df['HOUSETYPE_MODE'].value_counts()

In [None]:
user_df['HOUSETYPE_MODE'].value_counts().plot(kind='bar')

### TOTALAREA_MODE

In [None]:
user_df['TOTALAREA_MODE'].describe()

In [None]:
user_df['TOTALAREA_MODE'].isnull().value_counts()

In [None]:
user_df['TOTALAREA_MODE'].hist(bins=20)

### WALLSMATERIAL_MODE

In [None]:
user_df['WALLSMATERIAL_MODE'].describe()

In [None]:
user_df['WALLSMATERIAL_MODE'].value_counts()

In [None]:
user_df['WALLSMATERIAL_MODE'].isnull().value_counts()

In [None]:
user_df['WALLSMATERIAL_MODE'].value_counts().plot(kind='bar')

### EMERGENCYSTATE_MODE

In [None]:
user_df['EMERGENCYSTATE_MODE'].describe()

In [None]:
user_df['EMERGENCYSTATE_MODE'].isnull().value_counts()

In [None]:
user_df['EMERGENCYSTATE_MODE'].value_counts()

In [None]:
user_df['EMERGENCYSTATE_MODE'].value_counts().plot(kind='bar')

### OBS_30_CNT_SOCIAL_CIRCLE

In [None]:
user_df['OBS_30_CNT_SOCIAL_CIRCLE'].describe()

In [None]:
user_df['OBS_30_CNT_SOCIAL_CIRCLE'].isnull().value_counts()

In [None]:
user_df['OBS_30_CNT_SOCIAL_CIRCLE'].value_counts()

In [None]:
user_df['OBS_30_CNT_SOCIAL_CIRCLE'].hist(bins=100)

### DEF_30_CNT_SOCIAL_CIRCLE

In [None]:
user_df['DEF_30_CNT_SOCIAL_CIRCLE'].describe()

In [None]:
user_df['DEF_30_CNT_SOCIAL_CIRCLE'].isnull().value_counts()

In [None]:
user_df['DEF_30_CNT_SOCIAL_CIRCLE'].hist(bins=20)

### OBS_60_CNT_SOCIAL_CIRCLE

In [None]:
user_df['OBS_60_CNT_SOCIAL_CIRCLE'].describe()

In [None]:
user_df['OBS_60_CNT_SOCIAL_CIRCLE'].isnull().value_counts()

In [None]:
user_df['OBS_60_CNT_SOCIAL_CIRCLE'].hist(bins=20)

### DEF_60_CNT_SOCIAL_CIRCLE

In [None]:
user_df['DEF_60_CNT_SOCIAL_CIRCLE'].describe()

In [None]:
user_df['DEF_60_CNT_SOCIAL_CIRCLE'].isnull().value_counts()

In [None]:
user_df['DEF_60_CNT_SOCIAL_CIRCLE'].hist(bins=20)

### DAYS_LAST_PHONE_CHANGE

In [None]:
user_df['DAYS_LAST_PHONE_CHANGE'].describe()

In [None]:
user_df['DAYS_LAST_PHONE_CHANGE'].isnull().value_counts()

In [None]:
user_df['DAYS_LAST_PHONE_CHANGE'].hist(bins=100)

### FLAG_DOCUMENT

In [None]:
FLAG_DOCUMENTS = []
for ind in range(2, 22):
    FLAG_DOCUMENTS.append('FLAG_DOCUMENT_%d'%(ind,))

for item in FLAG_DOCUMENTS:
    print('==========%s=========='%(item,))
    print(user_df[item].describe())
    print(user_df[item].isnull().value_counts())
    print(user_df[item].value_counts())

### AMT_REQ_CREDIT_BUREAU_HOUR

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_HOUR'].describe()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_HOUR'].isnull().value_counts()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_HOUR'].value_counts()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_HOUR'].hist(bins=4)

### AMT_REQ_CREDIT_BUREAU_DAY

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_DAY'].describe()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_DAY'].isnull().value_counts()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_DAY'].value_counts()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_DAY'].hist(bins=10)

### AMT_REQ_CREDIT_BUREAU_WEEK

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_WEEK'].describe()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_WEEK'].isnull().value_counts()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_WEEK'].value_counts()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_WEEK'].hist(bins=10)

### AMT_REQ_CREDIT_BUREAU_MON

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_MON'].describe()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_MON'].value_counts()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_MON'].isnull().value_counts()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_MON'].value_counts().plot(kind='bar')

### AMT_REQ_CREDIT_BUREAU_QRT

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_QRT'].describe()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_QRT'].isnull().value_counts()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_QRT'].value_counts()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_QRT'].value_counts().plot(kind='bar')

### AMT_REQ_CREDIT_BUREAU_YEAR

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_YEAR'].describe()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_YEAR'].isnull().value_counts()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_YEAR'].value_counts()

In [None]:
user_df['AMT_REQ_CREDIT_BUREAU_YEAR'].value_counts().plot(kind='bar')