In [1]:
import pandas as pd
import numpy as np

In [2]:
df_bur = pd.read_csv('bureau.csv')

In [3]:
df_bur.columns

Index(['SK_ID_CURR', 'SK_ID_BUREAU', 'CREDIT_ACTIVE', 'CREDIT_CURRENCY',
       'DAYS_CREDIT', 'CREDIT_DAY_OVERDUE', 'DAYS_CREDIT_ENDDATE',
       'DAYS_ENDDATE_FACT', 'AMT_CREDIT_MAX_OVERDUE', 'CNT_CREDIT_PROLONG',
       'AMT_CREDIT_SUM', 'AMT_CREDIT_SUM_DEBT', 'AMT_CREDIT_SUM_LIMIT',
       'AMT_CREDIT_SUM_OVERDUE', 'CREDIT_TYPE', 'DAYS_CREDIT_UPDATE',
       'AMT_ANNUITY'],
      dtype='object')

- `SK_ID_CURR` - ID of loan in our sample - one loan in our sample can have 0,1,2 or more related previous credits in credit bureau
- `SK_BUREAU_ID` - Recoded ID of previous Credit Bureau credit related to our loan (unique coding for each loan application)

In [4]:
df_bur.head()

Unnamed: 0,SK_ID_CURR,SK_ID_BUREAU,CREDIT_ACTIVE,CREDIT_CURRENCY,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,AMT_CREDIT_MAX_OVERDUE,CNT_CREDIT_PROLONG,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_ANNUITY
0,215354,5714462,Closed,currency 1,-497,0,-153.0,-153.0,,0,91323.0,0.0,,0.0,Consumer credit,-131,
1,215354,5714463,Active,currency 1,-208,0,1075.0,,,0,225000.0,171342.0,,0.0,Credit card,-20,
2,215354,5714464,Active,currency 1,-203,0,528.0,,,0,464323.5,,,0.0,Consumer credit,-16,
3,215354,5714465,Active,currency 1,-203,0,,,,0,90000.0,,,0.0,Credit card,-16,
4,215354,5714466,Active,currency 1,-629,0,1197.0,,77674.5,0,2700000.0,,,0.0,Consumer credit,-21,


In [5]:
df_bur['CREDIT_ACTIVE'].unique()

array(['Closed', 'Active', 'Sold', 'Bad debt'], dtype=object)

In [6]:
df_bur['CREDIT_CURRENCY'].unique()

array(['currency 1', 'currency 2', 'currency 4', 'currency 3'],
      dtype=object)

In [7]:
df_bur['DAYS_CREDIT'].unique()

array([-497, -208, -203, ...,    0,   -3,   -1])

In [8]:
df_bur['DAYS_CREDIT_ENDDATE'].unique()

array([-153., 1075.,  528., ..., 5922., 3835., 5650.])

In [9]:
df_bur['DAYS_ENDDATE_FACT'].unique()

array([ -153.,    nan, -1710., ..., -2919., -3042., -2891.])

In [10]:
df_bur['AMT_CREDIT_MAX_OVERDUE'].unique()

array([      nan, 77674.5  ,     0.   , ...,   173.07 , 11317.59 ,
        5693.625])

In [11]:
df_bur['CNT_CREDIT_PROLONG'].unique()

array([0, 2, 1, 4, 3, 5, 9, 8, 6, 7])

In [12]:
df_bur['AMT_CREDIT_SUM'].unique()

array([ 91323.  , 225000.  , 464323.5 , ...,  77861.43, 112204.35,
       108765.72])

In [13]:
df_bur['AMT_CREDIT_SUM_DEBT'].unique()

array([      0.   ,  171342.   ,         nan, ...,  830860.875,
        146502.   , 1092226.5  ])

In [14]:
df_bur['AMT_CREDIT_SUM_LIMIT'].unique()

array([       nan, 108982.62 ,      0.   , ...,  10398.15 ,  10617.75 ,
       135035.865])

In [15]:
df_bur['AMT_CREDIT_SUM_OVERDUE'].unique()

array([    0.   ,   231.525,   288.   , ...,   804.735, 22581.   ,
         352.62 ])

In [16]:
df_bur['CREDIT_TYPE'].unique()

array(['Consumer credit', 'Credit card', 'Mortgage', 'Car loan',
       'Microloan', 'Loan for working capital replenishment',
       'Loan for business development', 'Real estate loan',
       'Unknown type of loan', 'Another type of loan',
       'Cash loan (non-earmarked)', 'Loan for the purchase of equipment',
       'Mobile operator loan', 'Interbank credit',
       'Loan for purchase of shares (margin lending)'], dtype=object)

In [17]:
df_bur['DAYS_CREDIT_UPDATE'].unique()

array([  -131,    -20,    -16, ...,  -2901, -41918,  -2844])

In [18]:
df_bur['AMT_ANNUITY'].unique()

array([      nan,     0.   ,  2691.   , ...,  7681.95 ,  4482.585,
       24351.435])

## Analysis

### Column Description

- `CREDIT_ACTIVE`          - Status of the credits reported by Credit Bureau (CB) - Closed/Active/Sold/Bad debt
    - Closed- credit is closed
    - Active - credit is active
    - Sold - credit has been sold to collection agency
    - Bad debt - credit can never be recovered (mostly can happen due to death of the person)
- `CREDIT_CURRENCY`        - Currency in which the transaction was executed - currency1/currency2/currency3/currency4 
- `DAYS_CREDIT`            - How many days before current application did client apply for Credit Bureau credit? May be like time lapse between the current application and a credit in CB. All are negative values.
- `CREDIT_DAY_OVERDUE`     - Number of days overdue of a payment for a particular loan by customer
- `DAYS_CREDIT_ENDDATE`    - Number of days remaining to repay for a customer's CREDIT  in CB at the time of application 
- `DAYS_ENDDATE_FACT`      - For a closed credit, it is the number of days since CB credit has ended at the time of current application in Home Credit
- `AMT_CREDIT_MAX_OVERDUE` - Maximal amount overdue on the Credit Bureau credit so far (at application date of loan in our sample)
- `CNT_CREDIT_PROLONG`     - How many times was the Credit expiration date is prolonged
- `CREDIT_DAY_OVERDUE`     - Number of days the customer CREDIT is past the end date at the time of application
- `AMT_CREDIT_SUM`         - Total current credit amount of a customer reported in Credit Bureau
- `AMT_CREDIT_SUM_DEBT`    - Total amount yet to be repayed 
- `AMT_CREDIT_SUM_LIMIT`   - Current credit limit of credit card reported in Credit Bureau
- `AMT_CREDIT_SUM_OVERDUE` - Current amount overdue on Credit Bureau credit
- `CREDIT_TYPE`            - Type of Credit Bureau credit (Car, cash,...)

       'Consumer credit', 'Credit card', 'Mortgage', 'Car loan',
       'Microloan', 'Loan for working capital replenishment',
       'Loan for business development', 'Real estate loan',
       'Unknown type of loan', 'Another type of loan',
       'Cash loan (non-earmarked)', 'Loan for the purchase of equipment',
       'Mobile operator loan', 'Interbank credit',
       'Loan for purchase of shares (margin lending)' --- 15 types of credit
- `DAYS_CREDIT_UPDATE`     - How many days before loan application did last information about the Credit Bureau credit come
- `AMT_ANNUITY`            - Annuity of the Credit Bureau credit