In [1]:
import pandas as pd

In [2]:
# Importando database
base = pd.read_csv('credit_data.csv')

In [3]:
# Estatísticas do database
base.describe()

Unnamed: 0,i#clientid,income,age,loan,c#default
count,2000.0,2000.0,1997.0,2000.0,2000.0
mean,1000.5,45331.600018,40.807559,4444.369695,0.1415
std,577.494589,14326.327119,13.624469,3045.410024,0.348624
min,1.0,20014.48947,-52.42328,1.37763,0.0
25%,500.75,32796.459717,28.990415,1939.708847,0.0
50%,1000.5,45789.117313,41.317159,3974.719419,0.0
75%,1500.25,57791.281668,52.58704,6432.410625,0.0
max,2000.0,69995.685578,63.971796,13766.051239,1.0


In [4]:
# Amostra dos dados
base.head()

Unnamed: 0,i#clientid,income,age,loan,c#default
0,1,66155.925095,59.017015,8106.532131,0
1,2,34415.153966,48.117153,6564.745018,0
2,3,57317.170063,63.108049,8020.953296,0
3,4,42709.534201,45.751972,6103.64226,0
4,5,66952.688845,18.584336,8770.099235,1


In [5]:
# Verificando dados com idade negativa
base.loc[base['age'] < 0]

Unnamed: 0,i#clientid,income,age,loan,c#default
15,16,50501.726689,-28.218361,3977.287432,0
21,22,32197.620701,-52.42328,4244.057136,0
26,27,63287.038908,-36.496976,9595.286289,0


In [6]:
### Maneiras de contornar o problema das idades menores que zero

## 1) Apagar a coluna por inteiro (não recomendada, neste caso)
# base.drop('age', 1, inplace=True)

## 2) Apagar apenas os registros, por completo, que possuem essa incoerência
# base.drop(base[base.age < 0].index, inplace=True)

## 3) Preencher os valores com a média da coluna, apenas dos valores maiores que zero
media = base['age'][base.age > 0].mean()
base.loc[base.age < 0, 'age'] = media

In [7]:
# Verificando valores nulos
base.loc[pd.isnull(base['age'])]

Unnamed: 0,i#clientid,income,age,loan,c#default
28,29,59417.805406,,2082.625938,0
30,31,48528.852796,,6155.78467,0
31,32,23526.302555,,2862.010139,0


In [8]:
# Divisão do dataset entre variáveis preditoras e target
previsores = base.iloc[:, 1:4].values
classe = base.iloc[:, 4].values

In [9]:
# Substituindo os valores missing pela média de cada coluna
from sklearn.preprocessing import Imputer

imputer = Imputer(missing_values='NaN', strategy='mean', axis=0)
imputer.fit(previsores[:, 0:3])

previsores[:, 0:3] = imputer.transform(previsores[:, 0:3])

In [10]:
## Fazendo o escalonamento (normalização) dos atributos
from sklearn.preprocessing import StandardScaler

# Padronização
scaler = StandardScaler()
previsores = scaler.fit_transform(previsores)

# Normalização
# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler()
# previsores = scaler.fit_transform(previsores)

In [11]:
# Dividindo os dados em treino e teste
from sklearn.model_selection import train_test_split

In [12]:
previsores_train, previsores_test, classe_train, classe_test = train_test_split(previsores, classe, test_size=0.25, random_state=0)

In [13]:
# Modelo Redes Neurais com Keras
import tensorflow

In [14]:
from tensorflow import keras

In [15]:
classificador = keras.Sequential()

In [16]:
# Configurar camadas da nossa rede neural
# Definindo nossa primeira camada oculta, com dois neurônios, recebendo 3 entradas
classificador.add(keras.layers.Dense(units=2, activation='relu', input_dim=3))

In [17]:
# Definindo nossa segunda camada oculta, com dois neurônios
classificador.add(keras.layers.Dense(units=2, activation='relu'))

In [18]:
# Definindo nossa camada de saída, com 1 neurônio (problema binário)
classificador.add(keras.layers.Dense(units=1, activation='sigmoid'))

In [19]:
# Compilando nossa rede neural
classificador.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [20]:
# Efetuando o treinamento do modelo, reajustando os pesos a cada 10 registros por 100 vezes
classificador.fit(previsores_train, classe_train, batch_size=10, epochs=100)

Train on 1500 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100

Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x1b34e8199e8>

In [21]:
# Testando o modelo criado à partir dos dados de treinamento
previsoes = classificador.predict(previsores_test)
previsoes = (previsoes > 0.5)

In [22]:
# Calculando a precisão do nosso modelo
from sklearn.metrics import confusion_matrix, accuracy_score

In [23]:
precisao = accuracy_score(classe_test, previsoes)
precisao

0.998

In [24]:
matriz = confusion_matrix(classe_test, previsoes)
matriz

array([[435,   1],
       [  0,  64]], dtype=int64)

## Resultado
### Redes Neurais Tensorflow Keras
0.998