In [1]:
" Import the libraries " 

import os
import sys 
import math
import copy

import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [2]:
" Import the scripts of SD for Explaining "

absFilePath = os.path.dirname(os.path.dirname(os.getcwd()))
newPath = os.path.join(absFilePath, 'SplitSD4X')
sys.path.append(newPath)

from fill_missing_values import *
from missing_values_table import *
from neighbors_generation import *
from patterns_extraction import *
from performances import *
from subgroups_discovery import *
from sp_lime import *

## Data Preparation 

In [3]:
"Loading and preparing data" 

datasets_path = os.path.join(os.path.dirname(os.path.dirname(os.getcwd())), 'Datasets\\')
url = datasets_path + 'data_credit_card.csv'
df = pd.read_csv(url)
df = df.drop(['Unnamed: 0'],axis =1)
df = df.rename(columns={'default payment next month': 'Credible'})
df.head()

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,Credible
0,20000,2,2,1,24,2,2,-1,-1,-2,...,0,0,0,0,689,0,0,0,0,1
1,120000,2,2,2,26,-1,2,0,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,90000,2,2,2,34,0,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,50000,2,2,1,37,0,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,50000,1,2,1,57,-1,0,-1,0,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0


In [4]:
a = df['PAY_2'].values
df['PAY_2'].unique()

array([ 2,  0, -1, -2,  3,  5,  7,  4,  1,  6,  8], dtype=int64)

In [5]:
steps = (pd.cut(a,11, retbins=True,include_lowest=True))[1][1:-1]
steps = np.unique(np.trunc(steps))
steps

array([-1., -0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.])

In [6]:
" Handling data "

df['EDUCATION'] = df['EDUCATION'].replace({0:4, 5:4, 6:4})
df['MARRIAGE'] = df['MARRIAGE'].replace({0:3})
df['SEX'] = df['SEX'] - 1
df['EDUCATION'] = df['EDUCATION'] - 1
df['MARRIAGE'] = df['MARRIAGE'] - 1

In [7]:
" Decode Categorical Features " 
sex_mapper = {0 : 'M',  1 : 'F'}
sex_mapper_inv = dict(map(reversed, sex_mapper.items()))
df['SEX'] = df['SEX'].replace(sex_mapper)

education_mapper = {0 : 'gradution_school', 1 : 'university', 2 : 'high_school', 3 : 'others'}
education_mapper_inv = dict(map(reversed, education_mapper.items()))
df['EDUCATION'] = df['EDUCATION'].replace(education_mapper)

marital_mapper =  {0 : 'married' , 1 : 'single',  2 : 'others' }
marital_mapper_inv = dict(map(reversed, marital_mapper.items()))
df['MARRIAGE'] = df['MARRIAGE'].replace(marital_mapper)

df.head()

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,Credible
0,20000,F,university,married,24,2,2,-1,-1,-2,...,0,0,0,0,689,0,0,0,0,1
1,120000,F,university,single,26,-1,2,0,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,90000,F,university,single,34,0,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,50000,F,university,married,37,0,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,50000,M,university,married,57,-1,0,-1,0,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0


In [8]:
" display the features types "
df.dtypes

LIMIT_BAL     int64
SEX          object
EDUCATION    object
MARRIAGE     object
AGE           int64
PAY_0         int64
PAY_2         int64
PAY_3         int64
PAY_4         int64
PAY_5         int64
PAY_6         int64
BILL_AMT1     int64
BILL_AMT2     int64
BILL_AMT3     int64
BILL_AMT4     int64
BILL_AMT5     int64
BILL_AMT6     int64
PAY_AMT1      int64
PAY_AMT2      int64
PAY_AMT3      int64
PAY_AMT4      int64
PAY_AMT5      int64
PAY_AMT6      int64
Credible      int64
dtype: object

In [9]:
" Checking missing values "
df.replace('?', np.nan, inplace=True)
missing_values_table(df)

Your slelected dataframe has 24 columns.
There are 0 columns that have missing values.


Unnamed: 0,Missing Values,% of Total Values


In [10]:
" separate the data and the target "
data_df = df.drop(columns=['Credible'])
target_df = df['Credible']

In [11]:
" calculate the categorical features mask "
categorical_feature_mask = (data_df.dtypes == object)
categorical_feature_mask

LIMIT_BAL    False
SEX           True
EDUCATION     True
MARRIAGE      True
AGE          False
PAY_0        False
PAY_2        False
PAY_3        False
PAY_4        False
PAY_5        False
PAY_6        False
BILL_AMT1    False
BILL_AMT2    False
BILL_AMT3    False
BILL_AMT4    False
BILL_AMT5    False
BILL_AMT6    False
PAY_AMT1     False
PAY_AMT2     False
PAY_AMT3     False
PAY_AMT4     False
PAY_AMT5     False
PAY_AMT6     False
dtype: bool

In [12]:
categorical_cols_names = data_df.columns[categorical_feature_mask].tolist()
categorical_cols_names

['SEX', 'EDUCATION', 'MARRIAGE']

In [13]:
numerical_cols_names = data_df.columns[~categorical_feature_mask].tolist()
numerical_cols_names

['LIMIT_BAL',
 'AGE',
 'PAY_0',
 'PAY_2',
 'PAY_3',
 'PAY_4',
 'PAY_5',
 'PAY_6',
 'BILL_AMT1',
 'BILL_AMT2',
 'BILL_AMT3',
 'BILL_AMT4',
 'BILL_AMT5',
 'BILL_AMT6',
 'PAY_AMT1',
 'PAY_AMT2',
 'PAY_AMT3',
 'PAY_AMT4',
 'PAY_AMT5',
 'PAY_AMT6']

In [14]:
" if no values missed we execute this code : "
data_df = pd.concat([data_df[numerical_cols_names].astype(float), data_df[categorical_cols_names]],axis = 1)
data_df.head()

Unnamed: 0,LIMIT_BAL,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,...,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,SEX,EDUCATION,MARRIAGE
0,20000.0,24.0,2.0,2.0,-1.0,-1.0,-2.0,-2.0,3913.0,3102.0,...,0.0,0.0,689.0,0.0,0.0,0.0,0.0,F,university,married
1,120000.0,26.0,-1.0,2.0,0.0,0.0,0.0,2.0,2682.0,1725.0,...,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,F,university,single
2,90000.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,29239.0,14027.0,...,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,F,university,single
3,50000.0,37.0,0.0,0.0,0.0,0.0,0.0,0.0,46990.0,48233.0,...,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,F,university,married
4,50000.0,57.0,-1.0,0.0,-1.0,0.0,0.0,0.0,8617.0,5670.0,...,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,M,university,married


In [15]:
" Encoding categorical features" 

data_df['SEX'] = data_df['SEX'].replace(sex_mapper_inv)
data_df['EDUCATION'] = data_df['EDUCATION'].replace(education_mapper_inv)
data_df['MARRIAGE'] = data_df['MARRIAGE'].replace(marital_mapper_inv)

data_df.head()

Unnamed: 0,LIMIT_BAL,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,...,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,SEX,EDUCATION,MARRIAGE
0,20000.0,24.0,2.0,2.0,-1.0,-1.0,-2.0,-2.0,3913.0,3102.0,...,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1,1,0
1,120000.0,26.0,-1.0,2.0,0.0,0.0,0.0,2.0,2682.0,1725.0,...,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1,1,1
2,90000.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,29239.0,14027.0,...,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,1,1,1
3,50000.0,37.0,0.0,0.0,0.0,0.0,0.0,0.0,46990.0,48233.0,...,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,1,1,0
4,50000.0,57.0,-1.0,0.0,-1.0,0.0,0.0,0.0,8617.0,5670.0,...,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0,1,0


In [16]:
data_target_df = pd.concat([data_df, target_df], axis=1) 

In [17]:
" generate the Test SET "
nb_test_instances = 1000 
test_df = data_target_df.sample(n=nb_test_instances)
data_test_df = test_df.drop(columns=['Credible'])
target_test_df = test_df['Credible']

In [18]:
" generate the Training SET "
train_df = pd.concat([data_target_df,test_df]).drop_duplicates(keep=False)
data_train_df = train_df.drop(columns=['Credible'])
target_train_df = train_df['Credible']

In [19]:
" Extract values of the test set to generate the neighbors"

data_test = data_test_df.values
target_test = target_test_df.values

In [20]:
numerical_cols = np.arange(0,len(numerical_cols_names)) 
categorical_cols = np.arange(len(numerical_cols_names),data_df.shape[1])

## Neighbors Generation

In [21]:
nb_neighbors = 50
list_neigh = generate_all_neighbors(data_test,numerical_cols,categorical_cols,nb_neighbors)

In [22]:
" store all the neighbors together "
n = np.size(data_test,0)
all_neighbors = list_neigh[0]
for i in range(1,n) :
    all_neighbors = np.concatenate((all_neighbors, list_neigh[i]), axis=0)

### One hot encoding 

In [23]:
df_neigh = pd.DataFrame(data = all_neighbors,columns= numerical_cols_names + categorical_cols_names)
df_neigh[categorical_cols_names] = df_neigh[categorical_cols_names].astype(int,errors='ignore')

" Decode all the data neighbors to perform one hot encoding "
df_neigh['SEX'] = df_neigh['SEX'].replace(sex_mapper)
df_neigh['EDUCATION'] = df_neigh['EDUCATION'].replace(education_mapper)
df_neigh['MARRIAGE'] = df_neigh['MARRIAGE'].replace(marital_mapper)
df_neigh.head()

Unnamed: 0,LIMIT_BAL,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,...,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,SEX,EDUCATION,MARRIAGE
0,210527.82966,41.237638,-2.002524,-1.966718,-1.060964,-0.940325,-0.954414,0.107962,-4650.580343,-2640.722191,...,36614.384385,558.714124,-637.063048,3411.634714,40173.845132,5298.996144,5.99413,F,high_school,married
1,242897.239997,41.189297,-2.037772,-2.270425,-1.156977,-1.261776,-1.172986,-0.200246,-15098.699584,-14293.176547,...,24862.18126,-1336.127755,1442.422214,3680.2885,35642.715957,6075.74127,858.909286,F,university,single
2,229433.948837,41.057827,-1.96853,-1.907876,-0.931836,-1.04759,-1.166639,-0.14198,1395.956611,-4896.927529,...,36927.414926,-2933.558128,-236.574134,3593.791938,39972.56372,5293.360996,5043.217604,F,university,married
3,251362.973854,42.654651,-1.973775,-2.103622,-1.004522,-1.062027,-1.093554,-0.027434,4880.687128,3820.230086,...,37940.89143,-1433.279214,1195.429644,3466.112279,38838.902685,5146.979551,655.689733,F,high_school,married
4,244192.328137,42.271117,-2.149528,-2.144614,-1.132466,-1.053969,-1.104467,-0.063027,-8546.492828,-1953.429653,...,27551.825471,1067.719722,-3024.059253,3337.190477,36197.602386,4659.051206,1844.960956,F,university,single


In [24]:
" One hot encoding "
df_neigh = pd.get_dummies(df_neigh, prefix_sep='_', drop_first=True)
df_neigh

Unnamed: 0,LIMIT_BAL,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,...,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,SEX_M,EDUCATION_high_school,EDUCATION_others,EDUCATION_university,MARRIAGE_others,MARRIAGE_single
0,210527.829660,41.237638,-2.002524,-1.966718,-1.060964,-0.940325,-0.954414,0.107962,-4650.580343,-2640.722191,...,3411.634714,40173.845132,5298.996144,5.994130,0,1,0,0,0,0
1,242897.239997,41.189297,-2.037772,-2.270425,-1.156977,-1.261776,-1.172986,-0.200246,-15098.699584,-14293.176547,...,3680.288500,35642.715957,6075.741270,858.909286,0,0,0,1,0,1
2,229433.948837,41.057827,-1.968530,-1.907876,-0.931836,-1.047590,-1.166639,-0.141980,1395.956611,-4896.927529,...,3593.791938,39972.563720,5293.360996,5043.217604,0,0,0,1,0,0
3,251362.973854,42.654651,-1.973775,-2.103622,-1.004522,-1.062027,-1.093554,-0.027434,4880.687128,3820.230086,...,3466.112279,38838.902685,5146.979551,655.689733,0,1,0,0,0,0
4,244192.328137,42.271117,-2.149528,-2.144614,-1.132466,-1.053969,-1.104467,-0.063027,-8546.492828,-1953.429653,...,3337.190477,36197.602386,4659.051206,1844.960956,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,18199.502686,45.566190,0.066865,-0.041215,-0.032475,2.031919,0.024669,0.060227,3671.253873,5536.070218,...,-2399.740681,-895.211162,1735.187214,3643.885846,0,0,0,0,0,0
49996,58606.638076,45.838523,0.099324,-0.040741,-0.024186,1.915742,-0.188029,-0.066874,11164.697151,11005.198451,...,562.918236,4066.385031,835.085756,1370.816171,0,1,0,0,0,0
49997,33416.954255,44.298638,-0.083740,0.022604,0.079251,2.150624,0.046936,0.016723,10325.153398,13450.716940,...,532.767765,-2109.751650,960.985645,3843.826344,1,0,0,0,0,0
49998,48363.799914,46.203568,0.102353,-0.090172,-0.051240,1.992400,-0.071335,-0.052584,1099.428631,1979.053567,...,-233.387842,-1283.032787,3212.707149,4978.688563,0,0,0,0,0,0


In [25]:
" Scale the neighbors data "
data_neigh = df_neigh.values

scaler_neigh = StandardScaler()
data_neigh_s = scaler_neigh.fit_transform(data_neigh)

" Store the neighbors in a list "
n = np.size(data_test,0)
list_neigh = []
j = 0
for i in range(0,n):
    list_neigh.append(data_neigh_s[j:(j+nb_neighbors),:])
    j += nb_neighbors

####  One hot encoding for the training and the test sets

In [26]:
data_train_df['SEX'] = data_train_df['SEX'].replace(sex_mapper)
data_train_df['EDUCATION'] = data_train_df['EDUCATION'].replace(education_mapper)
data_train_df['MARRIAGE'] = data_train_df['MARRIAGE'].replace(marital_mapper)

In [27]:
data_train_df = pd.get_dummies(data_train_df, prefix_sep='_', drop_first=True)
data_train_df.head()

Unnamed: 0,LIMIT_BAL,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,...,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,SEX_M,EDUCATION_high_school,EDUCATION_others,EDUCATION_university,MARRIAGE_others,MARRIAGE_single
0,20000.0,24.0,2.0,2.0,-1.0,-1.0,-2.0,-2.0,3913.0,3102.0,...,0.0,0.0,0.0,0.0,0,0,0,1,0,0
1,120000.0,26.0,-1.0,2.0,0.0,0.0,0.0,2.0,2682.0,1725.0,...,1000.0,1000.0,0.0,2000.0,0,0,0,1,0,1
2,90000.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,29239.0,14027.0,...,1000.0,1000.0,1000.0,5000.0,0,0,0,1,0,1
3,50000.0,37.0,0.0,0.0,0.0,0.0,0.0,0.0,46990.0,48233.0,...,1200.0,1100.0,1069.0,1000.0,0,0,0,1,0,0
4,50000.0,57.0,-1.0,0.0,-1.0,0.0,0.0,0.0,8617.0,5670.0,...,10000.0,9000.0,689.0,679.0,1,0,0,1,0,0


In [28]:
data_train = data_train_df.values
target_train = target_train_df.values

In [29]:
data_test_df['SEX'] = data_test_df['SEX'].replace(sex_mapper)
data_test_df['EDUCATION'] = data_test_df['EDUCATION'].replace(education_mapper)
data_test_df['MARRIAGE'] = data_test_df['MARRIAGE'].replace(marital_mapper)

In [30]:
data_test_df = pd.get_dummies(data_test_df, prefix_sep='_', drop_first=True)
data_test_df.head()

Unnamed: 0,LIMIT_BAL,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,...,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,SEX_M,EDUCATION_high_school,EDUCATION_others,EDUCATION_university,MARRIAGE_others,MARRIAGE_single
28769,230000.0,42.0,-2.0,-2.0,-1.0,-1.0,-1.0,0.0,-138.0,-69.0,...,4749.0,38000.0,5000.0,3000.0,0,0,0,1,0,0
22988,500000.0,42.0,0.0,0.0,0.0,0.0,0.0,0.0,237066.0,171440.0,...,10000.0,50000.0,20000.0,50000.0,0,0,0,0,0,1
13938,50000.0,56.0,0.0,0.0,0.0,0.0,0.0,0.0,48316.0,34161.0,...,898.0,864.0,900.0,998.0,0,0,0,1,0,1
17243,110000.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,105433.0,107065.0,...,4031.0,4000.0,3000.0,2810.0,0,0,0,1,0,0
25580,150000.0,32.0,2.0,0.0,0.0,0.0,0.0,2.0,35099.0,35601.0,...,2000.0,3419.0,13.0,2088.0,0,0,0,0,0,1


In [31]:
data_test = data_test_df.values
target_test = target_test_df.values

In [32]:
" Scale the training and the test sets data"
scaler_train = StandardScaler()
data_train_s = scaler_train.fit_transform(data_train)

scaler_test = StandardScaler()
data_test_s = scaler_test.fit_transform(data_test)

In [33]:
" Define the functions to save and load data "
import pickle
def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [34]:
'SAVE THE DATA'

path = './saved_data/'
save_obj(data_train_s, path + 'data_train_s')
save_obj(target_train, path + 'target_train')
save_obj(data_test, path  + 'data_test')
save_obj(data_test_s, path  + 'data_test_s')
save_obj(target_test, path + 'target_test')
save_obj(list_neigh, path + 'list_neighbors')

## Training the models

In [35]:
" Logistic Regression : "
lr = LogisticRegression(class_weight = "balanced",random_state=0,max_iter = 1000)
model_lr = lr.fit(data_train_s,target_train)
target_pred_lr = model_lr.predict(data_test_s)

In [36]:
" Random Forest : "
rdclassifier = RandomForestClassifier(class_weight = "balanced",n_estimators=100,max_depth=5, random_state=0) 
model_rd = rdclassifier.fit(data_train_s,target_train)
target_pred_rd = model_rd.predict(data_test_s)

In [37]:
" SVM : "
clf = svm.SVC(class_weight = "balanced",probability=True)
model_svm = clf.fit(data_train_s, target_train)
target_pred_svm = model_svm.predict(data_test_s)

In [38]:
" Sklearn MLP Classifier : "
mlp = MLPClassifier(hidden_layer_sizes=(50,30), max_iter=1000,
                    solver='adam', random_state=1,
                    learning_rate_init=.1)

model_nt = mlp.fit(data_train_s, target_train)
target_pred_mlp = model_nt.predict(data_test_s)

## Scores of the black box models 

In [39]:
print(f"{'The score of the logistic regression model is ' :<50}{': {}'.format(round(model_lr.score(data_test_s,target_test),4))}")
print(f"{'The score of the Random Forest  model is ' :<50}{': {}'.format(round(model_rd.score(data_test_s,target_test),4))}")
print(f"{'The score of the SVM model is ' :<50}{': {}'.format(round(model_svm.score(data_test_s,target_test),4))}")
print(f"{'The score of the Multi-Layer-Perceptron model is ' :<50}{': {}'.format(round(model_nt.score(data_test_s,target_test),4))}")

The score of the logistic regression model is     : 0.66
The score of the Random Forest  model is          : 0.809
The score of the SVM model is                     : 0.794
The score of the Multi-Layer-Perceptron model is  : 0.83


## Execution of Split Based Selection Form Algorithm : 


In [40]:
split_point = len(numerical_cols)
nb_models = 100
(L_Subgroups,P) = SplitBasedSelectionForm (data_test_s, target_test, nb_models, model_nt, list_neigh,split_point,2)

In [41]:
'SAVE THE LIST OF THE SUBGROUPS'
save_obj(L_Subgroups, path + 'list_subgroups')

## Subgroups Descriptions

In [42]:
att_names = data_test_df.columns
data_test_means = scaler_test.mean_
data_test_stds = np.sqrt(scaler_test.var_)
patt_descriptions = patterns_sc(P,split_point,data_test_s,att_names,data_test_means,data_test_stds)

subrgoup 0
0.22 < PAY_0 <= 8.0
-2.0 < PAY_2 <= -1.0
-------------------------------------------------------------------
subrgoup 1
0.22 < PAY_0 <= 8.0
-1.0 < PAY_2 <= 1.0
38905.9 < BILL_AMT2 <= 508581.0
-2.0 < PAY_4 <= -0.67
-------------------------------------------------------------------
subrgoup 2
-0.67 < PAY_0 <= 0.22
-2.0 < PAY_3 <= 0.0
-2.0 < PAY_4 <= 0.0
126000.0 < LIMIT_BAL <= 740000.0
-------------------------------------------------------------------
subrgoup 3
0.22 < PAY_0 <= 8.0
-1.0 < PAY_2 <= 1.0
-4338.0 < BILL_AMT2 <= 38905.9
21.0 < AGE <= 46.7
154000.0 < LIMIT_BAL <= 740000.0
-------------------------------------------------------------------
subrgoup 4
-2.0 < PAY_0 <= -0.5
-2.0 < PAY_3 <= 0.0
0.0 < PAY_4 <= 8.0
-------------------------------------------------------------------
subrgoup 5
0.22 < PAY_0 <= 8.0
3.0 < PAY_2 <= 7.0
0.5 < PAY_4 <= 8.0
-------------------------------------------------------------------
subrgoup 6
-2.0 < PAY_0 <= -0.67
-2.0 < PAY_3 <= 0.0
-2

-0.0 < PAY_6 <= 6.0
9425.8 < BILL_AMT1 <= 46224.4
1415.2 < PAY_AMT3 <= 215917.0
0.0 < PAY_AMT4 <= 2400.0
0.0 < PAY_AMT5 <= 1200.0
-------------------------------------------------------------------
subrgoup 44
-2.0 < PAY_0 <= 0.22
0.0 < PAY_3 <= 6.0
0.0 < PAY_4 <= 8.0
-0.0 < PAY_6 <= 6.0
0.0 < PAY_AMT1 <= 8420.0
0.0 < PAY_AMT5 <= 1150.5
-------------------------------------------------------------------
subrgoup 45
0.22 < PAY_0 <= 8.0
-1.0 < PAY_2 <= 1.0
38905.9 < BILL_AMT2 <= 508581.0
-0.67 < PAY_4 <= 8.0
-21295.0 < BILL_AMT6 <= 96604.2
2069.8 < PAY_AMT3 <= 2560.0
-------------------------------------------------------------------
subrgoup 46
0.22 < PAY_0 <= 8.0
-1.0 < PAY_2 <= 1.0
38905.9 < BILL_AMT2 <= 508581.0
-0.67 < PAY_4 <= 8.0
-21295.0 < BILL_AMT6 <= 96604.2
0.0 < PAY_AMT3 <= 2069.8
-------------------------------------------------------------------
subrgoup 47
-0.5 < PAY_0 <= 0.22
0.0 < PAY_3 <= 6.0
-2.0 < PAY_4 <= 0.0
10000.0 < LIMIT_BAL <= 118000.0
21.0 < AGE <= 33.2
1236.0 

10000.0 < LIMIT_BAL <= 118000.0
EDUCATION_high_school = 1
-------------------------------------------------------------------
subrgoup 77
-2.0 < PAY_0 <= -0.5
0.0 < PAY_3 <= 6.0
-2.0 < PAY_4 <= -1.5
10000.0 < LIMIT_BAL <= 118000.0
EDUCATION_high_school = 1
-------------------------------------------------------------------
subrgoup 78
-0.67 < PAY_0 <= 0.22
-2.0 < PAY_3 <= 0.0
-2.0 < PAY_4 <= 0.0
10000.0 < LIMIT_BAL <= 126000.0
37.2 < AGE <= 67.0
-0.0 < PAY_AMT2 <= 2300.0
-0.0 < PAY_6 <= 6.0
-165580.0 < BILL_AMT1 <= 46224.4
0.0 < PAY_AMT3 <= 1415.2
-21295.0 < BILL_AMT6 <= 11625.6
-37594.0 < BILL_AMT5 <= 13693.1
1783.4 < PAY_AMT1 <= 505000.0
-------------------------------------------------------------------
subrgoup 79
-0.67 < PAY_0 <= 0.22
-2.0 < PAY_3 <= 0.0
-2.0 < PAY_4 <= 0.0
10000.0 < LIMIT_BAL <= 126000.0
37.2 < AGE <= 67.0
-0.0 < PAY_AMT2 <= 2300.0
-0.0 < PAY_6 <= 6.0
-165580.0 < BILL_AMT1 <= 46224.4
0.0 < PAY_AMT3 <= 1415.2
-21295.0 < BILL_AMT6 <= 11625.6
-37594.0 < BILL_AMT5 <=

In [43]:
'SAVE THE SUBGROUPS PATTERNS'
save_obj(patt_descriptions, path + 'patterns')
save_obj(att_names, path + 'att_names')