In [1]:
import numpy as np
import pandas as pd

# **Google Colab-related Codes: Authentication and Loading of Files**

In [2]:
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

download = drive.CreateFile({'id': '17oiv9nFc9ytqligduGPulszeAFaoDLj9'})
download.GetContentFile('UCI_Credit_Card.csv')

# **Load libraries**

In [5]:
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

# **Read data**

In [6]:
data=pd.read_csv('UCI_Credit_Card.csv')

In [7]:
data.head(5)

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month
0,1,20000.0,2,2,1,24,2,2,-1,-1,-2,-2,3913.0,3102.0,689.0,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1
1,2,120000.0,2,2,2,26,-1,2,0,0,0,2,2682.0,1725.0,2682.0,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1
2,3,90000.0,2,2,2,34,0,0,0,0,0,0,29239.0,14027.0,13559.0,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0
3,4,50000.0,2,2,1,37,0,0,0,0,0,0,46990.0,48233.0,49291.0,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0
4,5,50000.0,1,2,1,57,-1,0,-1,0,0,0,8617.0,5670.0,35835.0,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0


In [8]:
data=data.drop(['ID'],axis=1) # Drop index since it is unuseful information for modeling and prediction

In [7]:
# Inspect column headers
data.columns

Index(['LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0', 'PAY_2',
       'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',
       'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6',
       'default.payment.next.month'],
      dtype='object')

# **Feature Engineering**

In [9]:
#Process education column - merge categories above 4 into 4
def reduce_duplicate_others(x):
    if x['EDUCATION'] >= 4:
        return int(4)
    else:
        return (x['EDUCATION']).astype(int)

data['EDUCATION'] = data.apply(reduce_duplicate_others, axis=1)

In [10]:
data.EDUCATION.unique()

array([2, 1, 3, 4, 0])

In [11]:
#Add new features
data['Sex_Marriage'] = data['SEX'].astype(str) + data['MARRIAGE'].astype(str)
data['Sex_Education'] = data['SEX'].astype(str) + data['EDUCATION'].astype(str)
data['Sex_Education_Marriage'] = data['SEX'].astype(str) + data['EDUCATION'].astype(str)+ data['MARRIAGE'].astype(str)
data['Total_Bill_Amount'] = data['BILL_AMT1'] + data['BILL_AMT2']+ data['BILL_AMT3'] + data['BILL_AMT4']+ data['BILL_AMT5']+ data['BILL_AMT6']
data['Total_payment_Amount'] = data['PAY_AMT1'] + data['PAY_AMT2'] + data['PAY_AMT3'] + data['PAY_AMT4'] + data['PAY_AMT5'] + + data['PAY_AMT6']

In [12]:
# Bin Age
def AGE(x):
    if x['AGE'] <= 28:
        return '<=28'
    elif x['AGE'] > 28 and x['AGE'] <= 34:
        return '29 - 34'
    elif x['AGE'] > 34 and x['AGE'] <= 41:
        return '35 - 41'
    elif x['AGE'] > 41:
        return '>41'

data['Age_category'] = data.apply(AGE, axis =1)

In [13]:
# Drop AGE, since we've put them into bins
data.drop('AGE', axis=1, inplace=True)
data.head()

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month,Sex_Marriage,Sex_Education,Sex_Education_Marriage,Total_Bill_Amount,Total_payment_Amount,Age_category
0,20000.0,2,2,1,2,2,-1,-1,-2,-2,3913.0,3102.0,689.0,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1,21,22,221,7704.0,689.0,<=28
1,120000.0,2,2,2,-1,2,0,0,0,2,2682.0,1725.0,2682.0,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1,22,22,222,17077.0,5000.0,<=28
2,90000.0,2,2,2,0,0,0,0,0,0,29239.0,14027.0,13559.0,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0,22,22,222,101653.0,11018.0,29 - 34
3,50000.0,2,2,1,0,0,0,0,0,0,46990.0,48233.0,49291.0,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0,21,22,221,231334.0,8388.0,35 - 41
4,50000.0,1,2,1,-1,0,-1,0,0,0,8617.0,5670.0,35835.0,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0,11,12,121,109339.0,59049.0,>41


In [14]:
list(data) # Check list of header names

['LIMIT_BAL',
 'SEX',
 'EDUCATION',
 'MARRIAGE',
 'PAY_0',
 'PAY_2',
 'PAY_3',
 'PAY_4',
 'PAY_5',
 'PAY_6',
 'BILL_AMT1',
 'BILL_AMT2',
 'BILL_AMT3',
 'BILL_AMT4',
 'BILL_AMT5',
 'BILL_AMT6',
 'PAY_AMT1',
 'PAY_AMT2',
 'PAY_AMT3',
 'PAY_AMT4',
 'PAY_AMT5',
 'PAY_AMT6',
 'default.payment.next.month',
 'Sex_Marriage',
 'Sex_Education',
 'Sex_Education_Marriage',
 'Total_Bill_Amount',
 'Total_payment_Amount',
 'Age_category']

In [15]:
# Select categorical variables to dummy (listed all variables for completeness)
col_to_dummy = [
# 'LIMIT_BAL',
'SEX',
'EDUCATION',
'MARRIAGE',
'PAY_0',
'PAY_2',
'PAY_3',
'PAY_4',
'PAY_5',
'PAY_6',
# 'BILL_AMT1',
# 'BILL_AMT2',
# 'BILL_AMT3',
# 'BILL_AMT4',
# 'BILL_AMT5',
# 'BILL_AMT6',
# 'PAY_AMT1',
# 'PAY_AMT2',
# 'PAY_AMT3',
# 'PAY_AMT4',
# 'PAY_AMT5',
# 'PAY_AMT6',
# 'default.payment.next.month',
'Sex_Marriage',
'Sex_Education',
'Sex_Education_Marriage',
# 'Total_Bill_Amount',
# 'Total_payment_Amount',
'Age_category'
 ]

In [16]:
# Perform dummying
data_dummy = pd.get_dummies(data, columns=col_to_dummy)
data_dummy.head(10)

Unnamed: 0,LIMIT_BAL,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month,Total_Bill_Amount,Total_payment_Amount,SEX_1,SEX_2,EDUCATION_0,EDUCATION_1,EDUCATION_2,EDUCATION_3,EDUCATION_4,MARRIAGE_0,MARRIAGE_1,MARRIAGE_2,MARRIAGE_3,PAY_0_-2,PAY_0_-1,PAY_0_0,PAY_0_1,PAY_0_2,PAY_0_3,PAY_0_4,PAY_0_5,PAY_0_6,PAY_0_7,PAY_0_8,PAY_2_-2,PAY_2_-1,...,Sex_Education_23,Sex_Education_24,Sex_Education_Marriage_101,Sex_Education_Marriage_102,Sex_Education_Marriage_110,Sex_Education_Marriage_111,Sex_Education_Marriage_112,Sex_Education_Marriage_113,Sex_Education_Marriage_120,Sex_Education_Marriage_121,Sex_Education_Marriage_122,Sex_Education_Marriage_123,Sex_Education_Marriage_130,Sex_Education_Marriage_131,Sex_Education_Marriage_132,Sex_Education_Marriage_133,Sex_Education_Marriage_141,Sex_Education_Marriage_142,Sex_Education_Marriage_143,Sex_Education_Marriage_201,Sex_Education_Marriage_202,Sex_Education_Marriage_210,Sex_Education_Marriage_211,Sex_Education_Marriage_212,Sex_Education_Marriage_213,Sex_Education_Marriage_220,Sex_Education_Marriage_221,Sex_Education_Marriage_222,Sex_Education_Marriage_223,Sex_Education_Marriage_230,Sex_Education_Marriage_231,Sex_Education_Marriage_232,Sex_Education_Marriage_233,Sex_Education_Marriage_241,Sex_Education_Marriage_242,Sex_Education_Marriage_243,Age_category_29 - 34,Age_category_35 - 41,Age_category_<=28,Age_category_>41
0,20000.0,3913.0,3102.0,689.0,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1,7704.0,689.0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0
1,120000.0,2682.0,1725.0,2682.0,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1,17077.0,5000.0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0
2,90000.0,29239.0,14027.0,13559.0,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0,101653.0,11018.0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0
3,50000.0,46990.0,48233.0,49291.0,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0,231334.0,8388.0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
4,50000.0,8617.0,5670.0,35835.0,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0,109339.0,59049.0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
5,50000.0,64400.0,57069.0,57608.0,19394.0,19619.0,20024.0,2500.0,1815.0,657.0,1000.0,1000.0,800.0,0,238114.0,7772.0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
6,500000.0,367965.0,412023.0,445007.0,542653.0,483003.0,473944.0,55000.0,40000.0,38000.0,20239.0,13750.0,13770.0,0,2724595.0,180759.0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
7,100000.0,11876.0,380.0,601.0,221.0,-159.0,567.0,380.0,601.0,0.0,581.0,1687.0,1542.0,0,13486.0,4791.0,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0
8,140000.0,11285.0,14096.0,12108.0,12211.0,11793.0,3719.0,3329.0,0.0,432.0,1000.0,1000.0,1000.0,0,65212.0,6761.0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0
9,20000.0,0.0,0.0,0.0,0.0,13007.0,13912.0,0.0,0.0,0.0,13007.0,1122.0,0.0,0,26919.0,14129.0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0


In [17]:
list(data_dummy) # Check header names

['LIMIT_BAL',
 'BILL_AMT1',
 'BILL_AMT2',
 'BILL_AMT3',
 'BILL_AMT4',
 'BILL_AMT5',
 'BILL_AMT6',
 'PAY_AMT1',
 'PAY_AMT2',
 'PAY_AMT3',
 'PAY_AMT4',
 'PAY_AMT5',
 'PAY_AMT6',
 'default.payment.next.month',
 'Total_Bill_Amount',
 'Total_payment_Amount',
 'SEX_1',
 'SEX_2',
 'EDUCATION_0',
 'EDUCATION_1',
 'EDUCATION_2',
 'EDUCATION_3',
 'EDUCATION_4',
 'MARRIAGE_0',
 'MARRIAGE_1',
 'MARRIAGE_2',
 'MARRIAGE_3',
 'PAY_0_-2',
 'PAY_0_-1',
 'PAY_0_0',
 'PAY_0_1',
 'PAY_0_2',
 'PAY_0_3',
 'PAY_0_4',
 'PAY_0_5',
 'PAY_0_6',
 'PAY_0_7',
 'PAY_0_8',
 'PAY_2_-2',
 'PAY_2_-1',
 'PAY_2_0',
 'PAY_2_1',
 'PAY_2_2',
 'PAY_2_3',
 'PAY_2_4',
 'PAY_2_5',
 'PAY_2_6',
 'PAY_2_7',
 'PAY_2_8',
 'PAY_3_-2',
 'PAY_3_-1',
 'PAY_3_0',
 'PAY_3_1',
 'PAY_3_2',
 'PAY_3_3',
 'PAY_3_4',
 'PAY_3_5',
 'PAY_3_6',
 'PAY_3_7',
 'PAY_3_8',
 'PAY_4_-2',
 'PAY_4_-1',
 'PAY_4_0',
 'PAY_4_1',
 'PAY_4_2',
 'PAY_4_3',
 'PAY_4_4',
 'PAY_4_5',
 'PAY_4_6',
 'PAY_4_7',
 'PAY_4_8',
 'PAY_5_-2',
 'PAY_5_-1',
 'PAY_5_0',
 'PAY_5_2',


# **Train test split**

In [18]:
X = data_dummy.drop('default.payment.next.month', axis=1) # Extracting the features
y = data_dummy['default.payment.next.month'] # Extracting the target

In [19]:
X.shape

(30000, 146)

In [20]:
# Randomly split train and validation set
train_X, test_X, train_y, test_y = train_test_split(X, y, stratify=y, test_size=0.2, random_state=1)
print(train_X.shape)
print(test_X.shape)
print(train_y.shape)
print(test_y.shape)

(24000, 146)
(6000, 146)
(24000,)
(6000,)


# **Scale before SMOTE**
(Important, since SMOTE uses KNN algorithm, which is sensitive to scale, to produce artificial points of the minor class)

In [21]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

train_X_scaled = scaler.fit_transform(train_X)
train_X_scaled = pd.DataFrame(train_X_scaled, columns=train_X.columns)
train_X_scaled.head()

Unnamed: 0,LIMIT_BAL,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,Total_Bill_Amount,Total_payment_Amount,SEX_1,SEX_2,EDUCATION_0,EDUCATION_1,EDUCATION_2,EDUCATION_3,EDUCATION_4,MARRIAGE_0,MARRIAGE_1,MARRIAGE_2,MARRIAGE_3,PAY_0_-2,PAY_0_-1,PAY_0_0,PAY_0_1,PAY_0_2,PAY_0_3,PAY_0_4,PAY_0_5,PAY_0_6,PAY_0_7,PAY_0_8,PAY_2_-2,PAY_2_-1,PAY_2_0,...,Sex_Education_23,Sex_Education_24,Sex_Education_Marriage_101,Sex_Education_Marriage_102,Sex_Education_Marriage_110,Sex_Education_Marriage_111,Sex_Education_Marriage_112,Sex_Education_Marriage_113,Sex_Education_Marriage_120,Sex_Education_Marriage_121,Sex_Education_Marriage_122,Sex_Education_Marriage_123,Sex_Education_Marriage_130,Sex_Education_Marriage_131,Sex_Education_Marriage_132,Sex_Education_Marriage_133,Sex_Education_Marriage_141,Sex_Education_Marriage_142,Sex_Education_Marriage_143,Sex_Education_Marriage_201,Sex_Education_Marriage_202,Sex_Education_Marriage_210,Sex_Education_Marriage_211,Sex_Education_Marriage_212,Sex_Education_Marriage_213,Sex_Education_Marriage_220,Sex_Education_Marriage_221,Sex_Education_Marriage_222,Sex_Education_Marriage_223,Sex_Education_Marriage_230,Sex_Education_Marriage_231,Sex_Education_Marriage_232,Sex_Education_Marriage_233,Sex_Education_Marriage_241,Sex_Education_Marriage_242,Sex_Education_Marriage_243,Age_category_29 - 34,Age_category_35 - 41,Age_category_<=28,Age_category_>41
0,1.332097,-0.578587,-0.395218,-0.524384,0.063334,-0.076339,-0.306303,0.89745,0.188805,2.286437,-0.244674,1.000386,1.088086,-0.332228,1.468521,-0.808793,0.808793,-0.022366,1.352105,-0.936589,-0.44123,-0.126159,-0.043822,-0.91463,0.938472,-0.104855,-0.318072,2.0667,-0.982323,-0.374139,-0.312401,-0.104855,-0.051302,-0.03029,-0.019369,-0.017081,-0.02328,-0.380193,1.991189,-1.049823,...,-0.326735,-0.101136,-0.009129,-0.015813,-0.006455,-0.24452,-0.309258,-0.030972,-0.006455,-0.293211,-0.327827,-0.046147,-0.020417,-0.1922,-0.172818,-0.038216,-0.054085,-0.050892,-0.006455,-0.006455,-0.011181,-0.009129,-0.268868,2.471214,-0.025828,-0.014435,-0.41761,-0.398643,-0.055614,-0.03356,-0.253019,-0.184008,-0.049219,-0.072066,-0.069691,-0.011181,1.699011,-0.543706,-0.601545,-0.575554
1,-0.522752,-0.257742,-0.249251,-0.188615,-0.12768,-0.101516,-0.059027,-0.334758,-0.124727,-0.181188,-0.306665,-0.180259,-0.097398,-0.178524,-0.340692,-0.808793,0.808793,-0.022366,-0.739588,1.067704,-0.44123,-0.126159,-0.043822,1.093339,-1.065561,-0.104855,-0.318072,-0.483863,-0.982323,2.672803,-0.312401,-0.104855,-0.051302,-0.03029,-0.019369,-0.017081,-0.02328,-0.380193,-0.502213,-1.049823,...,-0.326735,-0.101136,-0.009129,-0.015813,-0.006455,-0.24452,-0.309258,-0.030972,-0.006455,-0.293211,-0.327827,-0.046147,-0.020417,-0.1922,-0.172818,-0.038216,-0.054085,-0.050892,-0.006455,-0.006455,-0.011181,-0.009129,-0.268868,-0.404659,-0.025828,-0.014435,2.394579,-0.398643,-0.055614,-0.03356,-0.253019,-0.184008,-0.049219,-0.072066,-0.069691,-0.011181,-0.588578,1.839228,-0.601545,-0.575554
2,-0.290896,-0.688197,-0.662907,-0.657984,-0.673499,-0.664159,-0.608012,-0.217634,-0.192569,-0.290234,-0.306665,-0.136106,-0.081901,-0.694642,-0.351506,1.236411,-1.236411,-0.022366,-0.739588,-0.936589,2.266393,-0.126159,-0.043822,1.093339,-1.065561,-0.104855,-0.318072,2.0667,-0.982323,-0.374139,-0.312401,-0.104855,-0.051302,-0.03029,-0.019369,-0.017081,-0.02328,-0.380193,1.991189,-1.049823,...,-0.326735,-0.101136,-0.009129,-0.015813,-0.006455,-0.24452,-0.309258,-0.030972,-0.006455,-0.293211,-0.327827,-0.046147,-0.020417,5.202901,-0.172818,-0.038216,-0.054085,-0.050892,-0.006455,-0.006455,-0.011181,-0.009129,-0.268868,-0.404659,-0.025828,-0.014435,-0.41761,-0.398643,-0.055614,-0.03356,-0.253019,-0.184008,-0.049219,-0.072066,-0.069691,-0.011181,-0.588578,-0.543706,1.662385,-0.575554
3,-0.136325,-0.694987,-0.691068,-0.677534,-0.673499,-0.664159,-0.653272,-0.334758,-0.248376,-0.290234,-0.306665,-0.306592,-0.301299,-0.711944,-0.507434,-0.808793,0.808793,-0.022366,1.352105,-0.936589,-0.44123,-0.126159,-0.043822,1.093339,-1.065561,-0.104855,-0.318072,-0.483863,-0.982323,2.672803,-0.312401,-0.104855,-0.051302,-0.03029,-0.019369,-0.017081,-0.02328,2.630243,-0.502213,-1.049823,...,-0.326735,-0.101136,-0.009129,-0.015813,-0.006455,-0.24452,-0.309258,-0.030972,-0.006455,-0.293211,-0.327827,-0.046147,-0.020417,-0.1922,-0.172818,-0.038216,-0.054085,-0.050892,-0.006455,-0.006455,-0.011181,-0.009129,3.719291,-0.404659,-0.025828,-0.014435,-0.41761,-0.398643,-0.055614,-0.03356,-0.253019,-0.184008,-0.049219,-0.072066,-0.069691,-0.011181,-0.588578,-0.543706,-0.601545,1.737456
4,0.559243,1.711801,1.701083,1.720194,-0.673499,-0.664159,-0.653272,0.078281,0.020354,-0.290234,-0.306665,-0.306592,-0.301299,0.643025,-0.291892,-0.808793,0.808793,-0.022366,-0.739588,1.067704,-0.44123,-0.126159,-0.043822,1.093339,-1.065561,-0.104855,-0.318072,-0.483863,1.017995,-0.374139,-0.312401,-0.104855,-0.051302,-0.03029,-0.019369,-0.017081,-0.02328,-0.380193,-0.502213,0.952541,...,-0.326735,-0.101136,-0.009129,-0.015813,-0.006455,-0.24452,-0.309258,-0.030972,-0.006455,-0.293211,-0.327827,-0.046147,-0.020417,-0.1922,-0.172818,-0.038216,-0.054085,-0.050892,-0.006455,-0.006455,-0.011181,-0.009129,-0.268868,-0.404659,-0.025828,-0.014435,2.394579,-0.398643,-0.055614,-0.03356,-0.253019,-0.184008,-0.049219,-0.072066,-0.069691,-0.011181,-0.588578,-0.543706,-0.601545,1.737456


# **Categorical SMOTE (i.e. SMOTENC)**

In [22]:
for index, header in enumerate(train_X_scaled): # Print out header names and their index for inspection
  print(f"Index: {index} \tColumn: {header}")

Index: 0 	Column: LIMIT_BAL
Index: 1 	Column: BILL_AMT1
Index: 2 	Column: BILL_AMT2
Index: 3 	Column: BILL_AMT3
Index: 4 	Column: BILL_AMT4
Index: 5 	Column: BILL_AMT5
Index: 6 	Column: BILL_AMT6
Index: 7 	Column: PAY_AMT1
Index: 8 	Column: PAY_AMT2
Index: 9 	Column: PAY_AMT3
Index: 10 	Column: PAY_AMT4
Index: 11 	Column: PAY_AMT5
Index: 12 	Column: PAY_AMT6
Index: 13 	Column: Total_Bill_Amount
Index: 14 	Column: Total_payment_Amount
Index: 15 	Column: SEX_1
Index: 16 	Column: SEX_2
Index: 17 	Column: EDUCATION_0
Index: 18 	Column: EDUCATION_1
Index: 19 	Column: EDUCATION_2
Index: 20 	Column: EDUCATION_3
Index: 21 	Column: EDUCATION_4
Index: 22 	Column: MARRIAGE_0
Index: 23 	Column: MARRIAGE_1
Index: 24 	Column: MARRIAGE_2
Index: 25 	Column: MARRIAGE_3
Index: 26 	Column: PAY_0_-2
Index: 27 	Column: PAY_0_-1
Index: 28 	Column: PAY_0_0
Index: 29 	Column: PAY_0_1
Index: 30 	Column: PAY_0_2
Index: 31 	Column: PAY_0_3
Index: 32 	Column: PAY_0_4
Index: 33 	Column: PAY_0_5
Index: 34 	Column: 

In [23]:
# Specify index of categorical columns
np.arange(15, 146)

array([ 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
        28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,
        54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,
        67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
        80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,
        93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105,
       106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
       119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
       132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
       145])

### Perform SMOTENC (i.e. SMOTE variant for data with both numerical and categorical values)

In [24]:
from imblearn.over_sampling import SMOTENC
smote_nc = SMOTENC(categorical_features=np.arange(15, 146), random_state=0)

X_resampled, y_resampled = smote_nc.fit_resample(train_X_scaled, train_y)



In [25]:
X_resampled_df = pd.DataFrame(X_resampled, columns=train_X_scaled.columns)
X_resampled_df.head()

Unnamed: 0,LIMIT_BAL,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,Total_Bill_Amount,Total_payment_Amount,SEX_1,SEX_2,EDUCATION_0,EDUCATION_1,EDUCATION_2,EDUCATION_3,EDUCATION_4,MARRIAGE_0,MARRIAGE_1,MARRIAGE_2,MARRIAGE_3,PAY_0_-2,PAY_0_-1,PAY_0_0,PAY_0_1,PAY_0_2,PAY_0_3,PAY_0_4,PAY_0_5,PAY_0_6,PAY_0_7,PAY_0_8,PAY_2_-2,PAY_2_-1,PAY_2_0,...,Sex_Education_23,Sex_Education_24,Sex_Education_Marriage_101,Sex_Education_Marriage_102,Sex_Education_Marriage_110,Sex_Education_Marriage_111,Sex_Education_Marriage_112,Sex_Education_Marriage_113,Sex_Education_Marriage_120,Sex_Education_Marriage_121,Sex_Education_Marriage_122,Sex_Education_Marriage_123,Sex_Education_Marriage_130,Sex_Education_Marriage_131,Sex_Education_Marriage_132,Sex_Education_Marriage_133,Sex_Education_Marriage_141,Sex_Education_Marriage_142,Sex_Education_Marriage_143,Sex_Education_Marriage_201,Sex_Education_Marriage_202,Sex_Education_Marriage_210,Sex_Education_Marriage_211,Sex_Education_Marriage_212,Sex_Education_Marriage_213,Sex_Education_Marriage_220,Sex_Education_Marriage_221,Sex_Education_Marriage_222,Sex_Education_Marriage_223,Sex_Education_Marriage_230,Sex_Education_Marriage_231,Sex_Education_Marriage_232,Sex_Education_Marriage_233,Sex_Education_Marriage_241,Sex_Education_Marriage_242,Sex_Education_Marriage_243,Age_category_29 - 34,Age_category_35 - 41,Age_category_<=28,Age_category_>41
0,1.332097,-0.578587,-0.395218,-0.524384,0.063334,-0.076339,-0.306303,0.89745,0.188805,2.286437,-0.244674,1.000386,1.088086,-0.332228,1.468521,-0.808793,0.808793,-0.022366,1.352105,-0.936589,-0.44123,-0.126159,-0.043822,-0.91463,0.938472,-0.104855,-0.318072,2.0667,-0.982323,-0.374139,-0.312401,-0.104855,-0.051302,-0.03029,-0.019369,-0.017081,-0.02328,-0.380193,1.991189,-1.049823,...,-0.326735,-0.101136,-0.009129,-0.015813,-0.006455,-0.24452,-0.309258,-0.030972,-0.006455,-0.293211,-0.327827,-0.046147,-0.020417,-0.1922,-0.172818,-0.038216,-0.054085,-0.050892,-0.006455,-0.006455,-0.011181,-0.009129,-0.268868,2.471214,-0.025828,-0.014435,-0.41761,-0.398643,-0.055614,-0.03356,-0.253019,-0.184008,-0.049219,-0.072066,-0.069691,-0.011181,1.699011,-0.543706,-0.601545,-0.575554
1,-0.522752,-0.257742,-0.249251,-0.188615,-0.12768,-0.101516,-0.059027,-0.334758,-0.124727,-0.181188,-0.306665,-0.180259,-0.097398,-0.178524,-0.340692,-0.808793,0.808793,-0.022366,-0.739588,1.067704,-0.44123,-0.126159,-0.043822,1.093339,-1.065561,-0.104855,-0.318072,-0.483863,-0.982323,2.672803,-0.312401,-0.104855,-0.051302,-0.03029,-0.019369,-0.017081,-0.02328,-0.380193,-0.502213,-1.049823,...,-0.326735,-0.101136,-0.009129,-0.015813,-0.006455,-0.24452,-0.309258,-0.030972,-0.006455,-0.293211,-0.327827,-0.046147,-0.020417,-0.1922,-0.172818,-0.038216,-0.054085,-0.050892,-0.006455,-0.006455,-0.011181,-0.009129,-0.268868,-0.404659,-0.025828,-0.014435,2.394579,-0.398643,-0.055614,-0.03356,-0.253019,-0.184008,-0.049219,-0.072066,-0.069691,-0.011181,-0.588578,1.839228,-0.601545,-0.575554
2,-0.290896,-0.688197,-0.662907,-0.657984,-0.673499,-0.664159,-0.608012,-0.217634,-0.192569,-0.290234,-0.306665,-0.136106,-0.081901,-0.694642,-0.351506,1.236411,-1.236411,-0.022366,-0.739588,-0.936589,2.266393,-0.126159,-0.043822,1.093339,-1.065561,-0.104855,-0.318072,2.0667,-0.982323,-0.374139,-0.312401,-0.104855,-0.051302,-0.03029,-0.019369,-0.017081,-0.02328,-0.380193,1.991189,-1.049823,...,-0.326735,-0.101136,-0.009129,-0.015813,-0.006455,-0.24452,-0.309258,-0.030972,-0.006455,-0.293211,-0.327827,-0.046147,-0.020417,5.202901,-0.172818,-0.038216,-0.054085,-0.050892,-0.006455,-0.006455,-0.011181,-0.009129,-0.268868,-0.404659,-0.025828,-0.014435,-0.41761,-0.398643,-0.055614,-0.03356,-0.253019,-0.184008,-0.049219,-0.072066,-0.069691,-0.011181,-0.588578,-0.543706,1.662385,-0.575554
3,-0.136325,-0.694987,-0.691068,-0.677534,-0.673499,-0.664159,-0.653272,-0.334758,-0.248376,-0.290234,-0.306665,-0.306592,-0.301299,-0.711944,-0.507434,-0.808793,0.808793,-0.022366,1.352105,-0.936589,-0.44123,-0.126159,-0.043822,1.093339,-1.065561,-0.104855,-0.318072,-0.483863,-0.982323,2.672803,-0.312401,-0.104855,-0.051302,-0.03029,-0.019369,-0.017081,-0.02328,2.630243,-0.502213,-1.049823,...,-0.326735,-0.101136,-0.009129,-0.015813,-0.006455,-0.24452,-0.309258,-0.030972,-0.006455,-0.293211,-0.327827,-0.046147,-0.020417,-0.1922,-0.172818,-0.038216,-0.054085,-0.050892,-0.006455,-0.006455,-0.011181,-0.009129,3.719291,-0.404659,-0.025828,-0.014435,-0.41761,-0.398643,-0.055614,-0.03356,-0.253019,-0.184008,-0.049219,-0.072066,-0.069691,-0.011181,-0.588578,-0.543706,-0.601545,1.737456
4,0.559243,1.711801,1.701083,1.720194,-0.673499,-0.664159,-0.653272,0.078281,0.020354,-0.290234,-0.306665,-0.306592,-0.301299,0.643025,-0.291892,-0.808793,0.808793,-0.022366,-0.739588,1.067704,-0.44123,-0.126159,-0.043822,1.093339,-1.065561,-0.104855,-0.318072,-0.483863,1.017995,-0.374139,-0.312401,-0.104855,-0.051302,-0.03029,-0.019369,-0.017081,-0.02328,-0.380193,-0.502213,0.952541,...,-0.326735,-0.101136,-0.009129,-0.015813,-0.006455,-0.24452,-0.309258,-0.030972,-0.006455,-0.293211,-0.327827,-0.046147,-0.020417,-0.1922,-0.172818,-0.038216,-0.054085,-0.050892,-0.006455,-0.006455,-0.011181,-0.009129,-0.268868,-0.404659,-0.025828,-0.014435,2.394579,-0.398643,-0.055614,-0.03356,-0.253019,-0.184008,-0.049219,-0.072066,-0.069691,-0.011181,-0.588578,-0.543706,-0.601545,1.737456


In [26]:
# Check SMOTE-ed data
print(f"Length of SMOTE-ed Y: {len(y_resampled)}, Number of positive class: {y_resampled.sum()}")
# Number of positive classes is exactly half of the number of samples; SMOTE process has achieved our intent

Length of SMOTE-ed Y: 37382, Number of positive class: 18691


In [27]:
# Inverse transform data back to original scale, for interpretability purposes
X_resampled_original_scale = scaler.inverse_transform(X_resampled)
X_resampled_original_scale = pd.DataFrame(X_resampled_original_scale, columns=train_X_scaled.columns)
X_resampled_original_scale.head()

Unnamed: 0,LIMIT_BAL,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,Total_Bill_Amount,Total_payment_Amount,SEX_1,SEX_2,EDUCATION_0,EDUCATION_1,EDUCATION_2,EDUCATION_3,EDUCATION_4,MARRIAGE_0,MARRIAGE_1,MARRIAGE_2,MARRIAGE_3,PAY_0_-2,PAY_0_-1,PAY_0_0,PAY_0_1,PAY_0_2,PAY_0_3,PAY_0_4,PAY_0_5,PAY_0_6,PAY_0_7,PAY_0_8,PAY_2_-2,PAY_2_-1,PAY_2_0,...,Sex_Education_23,Sex_Education_24,Sex_Education_Marriage_101,Sex_Education_Marriage_102,Sex_Education_Marriage_110,Sex_Education_Marriage_111,Sex_Education_Marriage_112,Sex_Education_Marriage_113,Sex_Education_Marriage_120,Sex_Education_Marriage_121,Sex_Education_Marriage_122,Sex_Education_Marriage_123,Sex_Education_Marriage_130,Sex_Education_Marriage_131,Sex_Education_Marriage_132,Sex_Education_Marriage_133,Sex_Education_Marriage_141,Sex_Education_Marriage_142,Sex_Education_Marriage_143,Sex_Education_Marriage_201,Sex_Education_Marriage_202,Sex_Education_Marriage_210,Sex_Education_Marriage_211,Sex_Education_Marriage_212,Sex_Education_Marriage_213,Sex_Education_Marriage_220,Sex_Education_Marriage_221,Sex_Education_Marriage_222,Sex_Education_Marriage_223,Sex_Education_Marriage_230,Sex_Education_Marriage_231,Sex_Education_Marriage_232,Sex_Education_Marriage_233,Sex_Education_Marriage_241,Sex_Education_Marriage_242,Sex_Education_Marriage_243,Age_category_29 - 34,Age_category_35 - 41,Age_category_<=28,Age_category_>41
0,340000.0,8571.0,21012.0,10607.0,47258.0,35675.0,20691.0,21041.0,10607.0,47258.0,983.0,20691.0,23849.0,143814.0,124429.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,5.421011e-20,0.0,0.0,0.0,1.0,0.0,...,-1.387779e-17,0.0,0.0,0.0,0.0,0.0,1.387779e-17,0.0,0.0,0.0,0.0,0.0,5.421011e-20,0.0,-3.469447e-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,6.938894e-18,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.775558e-17
1,100000.0,32196.0,31379.0,33862.0,35007.0,34147.0,35437.0,0.0,3000.0,2000.0,0.0,2000.0,3500.0,202028.0,10500.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,5.421011e-20,0.0,0.0,0.0,-2.775558e-17,0.0,...,-1.387779e-17,0.0,0.0,0.0,0.0,0.0,1.387779e-17,0.0,0.0,0.0,0.0,0.0,5.421011e-20,0.0,-3.469447e-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,6.938894e-18,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.775558e-17
2,130000.0,500.0,2000.0,1354.0,0.0,0.0,2699.0,2000.0,1354.0,0.0,0.0,2699.0,3766.0,6553.0,9819.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,5.421011e-20,0.0,0.0,0.0,1.0,0.0,...,-1.387779e-17,0.0,0.0,0.0,0.0,0.0,1.387779e-17,0.0,0.0,0.0,0.0,0.0,5.421011e-20,1.0,-3.469447e-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.938894e-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.775558e-17
3,150000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.637979e-12,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,5.421011e-20,0.0,0.0,1.0,-2.775558e-17,0.0,...,-1.387779e-17,0.0,0.0,0.0,0.0,0.0,1.387779e-17,0.0,0.0,0.0,0.0,0.0,5.421011e-20,0.0,-3.469447e-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.938894e-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,240000.0,177221.0,169897.0,166064.0,0.0,0.0,0.0,7053.0,6520.0,0.0,0.0,0.0,0.0,513182.0,13573.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,5.421011e-20,0.0,0.0,0.0,-2.775558e-17,1.0,...,-1.387779e-17,0.0,0.0,0.0,0.0,0.0,1.387779e-17,0.0,0.0,0.0,0.0,0.0,5.421011e-20,0.0,-3.469447e-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,6.938894e-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [30]:
# Correct the ultra small values in some of the categorical columns
for col_index in np.arange(15, 146):
  X_resampled_original_scale.iloc[:,col_index] = (X_resampled_original_scale.iloc[:,col_index]).astype(int)

X_resampled_original_scale.head()

Unnamed: 0,LIMIT_BAL,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,Total_Bill_Amount,Total_payment_Amount,SEX_1,SEX_2,EDUCATION_0,EDUCATION_1,EDUCATION_2,EDUCATION_3,EDUCATION_4,MARRIAGE_0,MARRIAGE_1,MARRIAGE_2,MARRIAGE_3,PAY_0_-2,PAY_0_-1,PAY_0_0,PAY_0_1,PAY_0_2,PAY_0_3,PAY_0_4,PAY_0_5,PAY_0_6,PAY_0_7,PAY_0_8,PAY_2_-2,PAY_2_-1,PAY_2_0,...,Sex_Education_23,Sex_Education_24,Sex_Education_Marriage_101,Sex_Education_Marriage_102,Sex_Education_Marriage_110,Sex_Education_Marriage_111,Sex_Education_Marriage_112,Sex_Education_Marriage_113,Sex_Education_Marriage_120,Sex_Education_Marriage_121,Sex_Education_Marriage_122,Sex_Education_Marriage_123,Sex_Education_Marriage_130,Sex_Education_Marriage_131,Sex_Education_Marriage_132,Sex_Education_Marriage_133,Sex_Education_Marriage_141,Sex_Education_Marriage_142,Sex_Education_Marriage_143,Sex_Education_Marriage_201,Sex_Education_Marriage_202,Sex_Education_Marriage_210,Sex_Education_Marriage_211,Sex_Education_Marriage_212,Sex_Education_Marriage_213,Sex_Education_Marriage_220,Sex_Education_Marriage_221,Sex_Education_Marriage_222,Sex_Education_Marriage_223,Sex_Education_Marriage_230,Sex_Education_Marriage_231,Sex_Education_Marriage_232,Sex_Education_Marriage_233,Sex_Education_Marriage_241,Sex_Education_Marriage_242,Sex_Education_Marriage_243,Age_category_29 - 34,Age_category_35 - 41,Age_category_<=28,Age_category_>41
0,340000.0,8571.0,21012.0,10607.0,47258.0,35675.0,20691.0,21041.0,10607.0,47258.0,983.0,20691.0,23849.0,143814.0,124429.0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
1,100000.0,32196.0,31379.0,33862.0,35007.0,34147.0,35437.0,0.0,3000.0,2000.0,0.0,2000.0,3500.0,202028.0,10500.0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
2,130000.0,500.0,2000.0,1354.0,0.0,0.0,2699.0,2000.0,1354.0,0.0,0.0,2699.0,3766.0,6553.0,9819.0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
3,150000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.637979e-12,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,240000.0,177221.0,169897.0,166064.0,0.0,0.0,0.0,7053.0,6520.0,0.0,0.0,0.0,0.0,513182.0,13573.0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1


In [31]:
# Put oversampled targets into a dataframe
y_resampled = pd.DataFrame(y_resampled, columns=['default.payment.next.month'])
y_resampled.head()

Unnamed: 0,default.payment.next.month
0,0
1,1
2,1
3,0
4,1


In [32]:
# Export train data into csv files; download from Google Colab
from google.colab import files

# Assemble train features with their target values
train_data = pd.concat([X_resampled_original_scale, y_resampled], axis=1)
train_data.to_csv('train_data.csv', index=False)
files.download('train_data.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [33]:
# Export test data into csv files; download from Google Colab
test_y = pd.DataFrame(test_y, columns=['default.payment.next.month'])

# Assemble test features with their target values
test_data = pd.concat([test_X, test_y], axis=1)
test_data.to_csv('test_data.csv', index=False)
files.download('test_data.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# **End**