# Credit Card Default Prediction

## Data Preprocessing

In [21]:
from ucimlrepo import fetch_ucirepo
import pandas as pd

# Avoid warnings caused by inplace column name reassignment
pd.options.mode.chained_assignment = None

In [22]:
# fetch dataset 
default_of_credit_card_clients = fetch_ucirepo(id=350) 
  
# data (as pandas dataframes) 
features = default_of_credit_card_clients.data.features
targets = default_of_credit_card_clients.data.targets

Rename feature columns to be more descriptive

In [23]:
# rename columns
features.rename(inplace = True,
                columns={'X1': 'CREDIT_LIMIT', # Credit limit (NT dollar)
                         'X2': 'SEX', # Gender (1 = male; 2 = female)
                         'X3': 'EDUCATION', # Education (1 = graduate school; 2 = university; 3 = high school; 4 = others)
                         'X4': 'MARRIAGE', # Marital status (1 = married; 2 = single; 3 = others)
                         'X5': 'AGE', # (years)

                         # X6 - X11 is repayment status
                         # The measurement scale for the repayment status is:
                         # -1 = pay duly;
                         # 1 = payment delay for one month;
                         # 2 = payment delay for two months;
                         # . . .;
                         # 8 = payment delay for eight months;
                         # 9 = payment delay for nine months and above.
                         'X6': '09_PAY_STATUS', # repayment status in September, 2005
                         'X7': '08_PAY_STATUS', # repayment status in August, 2005
                         'X8': '07_PAY_STATUS', # repayment status in July, 2005
                         'X9': '06_PAY_STATUS', # repayment status in June, 2005
                         'X10': '05_PAY_STATUS', # repayment status in May, 2005
                         'X11': '04_PAY_STATUS', # repayment status in April, 2005

                         # X12 - X17 is amount of bill statement (NT dollar)
                         'X12': '09_BILL', # amount of bill statement in September, 2005
                         'X13': '08_BILL', # amount of bill statement in August, 2005
                         'X14': '07_BILL', # amount of bill statement in July, 2005
                         'X15': '06_BILL', # amount of bill statement in June, 2005
                         'X16': '05_BILL', # amount of bill statement in May, 2005
                         'X17': '04_BILL', # amount of bill statement in April, 2005

                         # X18 - X23 is amount of previous payment (NT dollar)
                         'X18': '09_PAYMENT', # amount paid in September, 2005
                         'X19': '08_PAYMENT', # amount paid in August, 2005
                         'X20': '07_PAYMENT', # amount paid in July, 2005
                         'X21': '06_PAYMENT', # amount paid in June, 2005
                         'X22': '05_PAYMENT', # amount paid in May, 2005
                         'X23': '04_PAYMENT', # amount paid in April, 2005
                        }
                )

features.head()

Unnamed: 0,CREDIT_LIMIT,SEX,EDUCATION,MARRIAGE,AGE,09_PAY_STATUS,08_PAY_STATUS,07_PAY_STATUS,06_PAY_STATUS,05_PAY_STATUS,...,07_BILL,06_BILL,05_BILL,04_BILL,09_PAYMENT,08_PAYMENT,07_PAYMENT,06_PAYMENT,05_PAYMENT,04_PAYMENT
0,20000,2,2,1,24,2,2,-1,-1,-2,...,689,0,0,0,0,689,0,0,0,0
1,120000,2,2,2,26,-1,2,0,0,0,...,2682,3272,3455,3261,0,1000,1000,1000,0,2000
2,90000,2,2,2,34,0,0,0,0,0,...,13559,14331,14948,15549,1518,1500,1000,1000,1000,5000
3,50000,2,2,1,37,0,0,0,0,0,...,49291,28314,28959,29547,2000,2019,1200,1100,1069,1000
4,50000,1,2,1,57,-1,0,-1,0,0,...,35835,20940,19146,19131,2000,36681,10000,9000,689,679


Rename target column to be more descriptive

In [24]:
targets.rename(inplace = True,
               columns={'Y': 'DEFAULT'} # Default payment next month
              )

targets.head()

Unnamed: 0,DEFAULT
0,1
1,1
2,0
3,0
4,0
