# ***Predicting WBK Clients Status with Binary Classification***

In [267]:
import pandas                  as pd
import numpy                   as np
import matplotlib.pyplot       as plt
from   sklearn.model_selection import train_test_split
from   sklearn.metrics         import confusion_matrix, accuracy_score
from   sklearn.preprocessing   import LabelEncoder, OneHotEncoder, StandardScaler
from   sklearn.compose         import ColumnTransformer
from   sklearn.linear_model    import LogisticRegression

In [268]:
df = pd.read_csv('./WBKClientsTreated.csv')
df.shape

(250, 22)

In [269]:
df.head()

Unnamed: 0,Cliente,Estado,Sexo,Duracao,HistoricoCredito,Proposito,Valor,Investimento,Emprego,TempoParcelamento,...,ResidenciaDesde,Idade,OutrosFinanciamentos,Habitacao,EmprestimoExistente,Profissao,Dependentes,SocioEmpresa,Estrangeiro,Status
0,Adelina Buenaventura,RJ,M,2,Adelina Buenaventura,2,1,5951,2,3.0,...,3,2.0,22,3,1.0,1,4,1,0,0
1,Adelino Gago,RJ,M,3,Adelino Gago,1,2,2096,2,4.0,...,3,3.0,49,3,1.0,1,3,2,0,0
2,Adélio Lisboa,SE,M,4,Adélio Lisboa,2,3,7882,2,4.0,...,4,4.0,45,3,2.0,1,4,2,0,0
3,Adérito Bahía,MA,M,5,Adérito Bahía,3,4,4870,2,3.0,...,3,4.0,53,3,2.0,2,4,2,0,0
4,Adolfo Patrício,PE,M,6,Adolfo Patrício,2,2,9055,1,3.0,...,3,4.0,35,3,2.0,1,3,2,1,0


In [270]:
df.dtypes

Cliente                  object
Estado                   object
Sexo                     object
Duracao                   int64
HistoricoCredito         object
Proposito                 int64
Valor                     int64
Investimento              int64
Emprego                   int64
TempoParcelamento       float64
EstadoCivil               int64
Fiador                    int64
ResidenciaDesde           int64
Idade                   float64
OutrosFinanciamentos      int64
Habitacao                 int64
EmprestimoExistente     float64
Profissao                 int64
Dependentes               int64
SocioEmpresa              int64
Estrangeiro               int64
Status                    int64
dtype: object

In [271]:
df = df.drop(['Cliente', 'Estado', 'Sexo' ,'HistoricoCredito'], axis = 1)

In [272]:
df.head(3)

Unnamed: 0,Duracao,Proposito,Valor,Investimento,Emprego,TempoParcelamento,EstadoCivil,Fiador,ResidenciaDesde,Idade,OutrosFinanciamentos,Habitacao,EmprestimoExistente,Profissao,Dependentes,SocioEmpresa,Estrangeiro,Status
0,2,2,1,5951,2,3.0,2,1,3,2.0,22,3,1.0,1,4,1,0,0
1,3,1,2,2096,2,4.0,2,4,3,3.0,49,3,1.0,1,3,2,0,0
2,4,2,3,7882,2,4.0,2,4,4,4.0,45,3,2.0,1,4,2,0,0


### Getting dataframe values

In [273]:
data = df.iloc[:, :17].values
target = df.iloc[:, 17].values

print(f'DATA: {data[0]}\n\nTARGET: {target[0]}')

DATA: [2.000e+00 2.000e+00 1.000e+00 5.951e+03 2.000e+00 3.000e+00 2.000e+00
 1.000e+00 3.000e+00 2.000e+00 2.200e+01 3.000e+00 1.000e+00 1.000e+00
 4.000e+00 1.000e+00 0.000e+00]

TARGET: 0


In [274]:
data = np.asarray(data).astype(np.float32)

In [275]:
uniques, quant = np.unique(df['Status'], return_counts = True)
print(f'UNIQUES: {uniques}\nQUANT: {quant}')

UNIQUES: [0 1]
QUANT: [240  10]


### Separating TRAIN and TEST data

In [276]:
X_train, X_test, y_train, y_test = train_test_split(data, target,
                                                    test_size = 0.3,
                                                    random_state = 0)

print(len(X_train), len(X_test), len(y_train), len(y_test))

175 75 175 75


In [277]:
X_train

array([[113.,   2.,   4., ...,   3.,   1.,   0.],
       [253.,   2.,   4., ...,   4.,   1.,   0.],
       [  6.,   2.,   2., ...,   3.,   2.,   1.],
       ...,
       [124.,   1.,   4., ...,   4.,   1.,   1.],
       [ 55.,   3.,   4., ...,   4.,   1.,   1.],
       [181.,   3.,   6., ...,   4.,   1.,   0.]], dtype=float32)

In [278]:
standarscaler = StandardScaler()

X_train = standarscaler.fit_transform(X_train)
X_test = standarscaler.fit_transform(X_test)
print(f'{X_train[:1] = }\n\n{X_test[:1] = }')

X_train[:1] = array([[-0.20602888,  0.0113968 ,  0.21958455,  1.143798  , -0.28665602,
         1.2668138 ,  0.        ,  0.78677034, -0.03382551,  0.18842229,
        -0.73416895,  0.5050774 ,  2.1631784 , -0.68912935, -0.07970692,
        -0.4456794 , -0.8864052 ]], dtype=float32)

X_test[:1] = array([[ 1.4330301 , -1.0067341 , -1.0963516 , -0.60142833, -0.35175458,
         1.0826255 , -0.92688465,  0.75377834, -0.05170877, -1.4102205 ,
         0.57677054,  0.4346792 , -0.52223295,  2.4563034 , -0.12026672,
         2.5495098 , -0.8164966 ]], dtype=float32)


## ***CREATING THE MODEL***

In [279]:
classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

LogisticRegression(random_state=0)

### Predictions

In [280]:
predictions = classifier.predict(X_test)
predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

### Confusion Matrix

In [281]:
confusion_matrix(y_test, predictions)

array([[72,  0],
       [ 3,  0]], dtype=int64)

### Model Accuracy

In [282]:
print(f'ACCURACY: {accuracy_score(y_test, predictions) * 100:.0f}%')

# GOOD PERFORMANCE

ACCURACY: 96%
