In [7]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.compose import make_column_transformer

In [13]:
base = pd.read_csv('Credit2.csv', sep = ';')
base.head()

Unnamed: 0,ID,checking_status,credit_history,duration,credit_amount,installment_commitment,residence_since,age,existing_credits,num_dependents,class
0,1,<0,critical/other existing credit,6,1169,4,4,67,2,1,good
1,2,0<=X<200,existing paid,48,5951,2,2,22,1,1,bad
2,3,no checking,critical/other existing credit,12,2096,2,3,49,1,2,good
3,4,<0,existing paid,42,7882,2,4,45,1,2,good
4,5,<0,delayed previously,24,4870,3,4,53,2,2,bad


In [15]:
#Separacao das variaveis independentes da dependente, ignrando a primeira coluna, pois nao tem valor semantico
X = base.iloc[:,1:10].values
Y = base.iloc[:,10].values

In [19]:
#Utilizaremos o labelencoder para tranformar os dados da coluna 0 (checking_status)
#por ter 4 resultados possiveis, será transformado de 0 a 3
labelencoder = LabelEncoder()
X[:,0] = labelencoder.fit_transform(X[:,0])
X

array([[1, 'critical/other existing credit', 6, ..., 67, 2, 1],
       [0, 'existing paid', 48, ..., 22, 1, 1],
       [3, 'critical/other existing credit', 12, ..., 49, 1, 2],
       ...,
       [3, 'existing paid', 12, ..., 38, 1, 1],
       [1, 'existing paid', 45, ..., 23, 1, 1],
       [0, 'critical/other existing credit', 45, ..., 27, 1, 1]],
      dtype=object)

In [33]:
#Para credit_history, utilizaremos o one hot encoder, ele cria n colunas e deixa com 1 apenas onde representa o antigo valor
#Neste caso ele criará 5 colunas, pois é a quantidade de possibilidades presente na coluna
onehotencoder = make_column_transformer((OneHotEncoder(categories = 'auto', sparse = False), [1]), remainder = 'passthrough')
X = onehotencoder.fit_transform(X)
X



array([[0.0, 1.0, 0.0, ..., 67, 2, 1],
       [0.0, 0.0, 0.0, ..., 22, 1, 1],
       [0.0, 1.0, 0.0, ..., 49, 1, 2],
       ...,
       [0.0, 0.0, 0.0, ..., 38, 1, 1],
       [0.0, 0.0, 0.0, ..., 23, 1, 1],
       [0.0, 1.0, 0.0, ..., 27, 1, 1]], dtype=object)

In [35]:
#Deve se excluir uma das colunas para evitar dummy variable trap
X = X[:, 1:]
X

array([[1.0, 0.0, 0.0, ..., 67, 2, 1],
       [0.0, 0.0, 1.0, ..., 22, 1, 1],
       [1.0, 0.0, 0.0, ..., 49, 1, 2],
       ...,
       [0.0, 0.0, 1.0, ..., 38, 1, 1],
       [0.0, 0.0, 1.0, ..., 23, 1, 1],
       [1.0, 0.0, 0.0, ..., 27, 1, 1]], dtype=object)

In [49]:
#Aplicando labelencoder na classe
labelconderY = LabelEncoder()
Y = labelconderY.fit_transform(Y)
Y

array([1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,

In [57]:
x_treino, x_teste, y_treino, y_teste = train_test_split(X, Y, test_size = 0.2, random_state = 0)
print(len(x_treino),len(x_teste),len(y_treino),len(y_teste))

800 200 800 200


In [125]:
#Agora faremos o feature sclae, para padronizar os valores numericos, utilizando o Z-score
sc = StandardScaler()
x_treino = sc.fit_transform(x_treino)
x_teste = sc.fit_transform(x_teste)
x_teste
#Aqui acaba o tratamento de dados

array([[-0.60816364, -0.31448545,  0.89543386, ..., -0.40142098,
        -0.66494037, -0.45256964],
       [-0.60816364, -0.31448545,  0.89543386, ..., -0.79305121,
        -0.66494037,  2.2096047 ],
       [-0.60816364, -0.31448545,  0.89543386, ...,  0.87137725,
        -0.66494037, -0.45256964],
       ...,
       [ 1.64429429, -0.31448545, -1.11677706, ...,  0.47974703,
         0.97688771, -0.45256964],
       [-0.60816364,  3.17979734, -1.11677706, ...,  0.77346969,
         4.26054386, -0.45256964],
       [-0.60816364, -0.31448545,  0.89543386, ...,  0.77346969,
        -0.66494037,  2.2096047 ]])

In [127]:
#Criando a topologia da rede
modelo = Sequential()
modelo.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 12))
modelo.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
modelo.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
modelo.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [129]:
#Treinando o modelo
modelo.fit(x_treino, y_treino, batch_size = 10, epochs = 100)

Epoch 1/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7107 - loss: 0.6901
Epoch 2/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6775 - loss: 0.6711
Epoch 3/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 975us/step - accuracy: 0.6983 - loss: 0.6023
Epoch 4/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 950us/step - accuracy: 0.6610 - loss: 0.5672
Epoch 5/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 924us/step - accuracy: 0.6999 - loss: 0.5387
Epoch 6/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 924us/step - accuracy: 0.6830 - loss: 0.5506
Epoch 7/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6820 - loss: 0.5488  
Epoch 8/100
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 912us/step - accuracy: 0.6673 - loss: 0.5507
Epoch 9/100
[1m80/80[0m [32m━━━━━

<keras.src.callbacks.history.History at 0x20547237d10>

In [131]:
#Realizando uma previsao
y_previsao = modelo.predict(x_teste)
#Conversao em T or F
y_previsao = (y_previsao > 0.5)
y_previsao

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 


array([[False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [False],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [

In [133]:
#Agora podemos gerar a matriz de confusao
matriz = confusion_matrix(y_teste, y_previsao)
matriz

array([[ 25,  33],
       [ 18, 124]], dtype=int64)