In [1]:
import pandas as pd

In [2]:
# Importando database
base = pd.read_csv('credit_data.csv')

In [3]:
# Estatísticas do database
base.describe()

Unnamed: 0,i#clientid,income,age,loan,c#default
count,2000.0,2000.0,1997.0,2000.0,2000.0
mean,1000.5,45331.600018,40.807559,4444.369695,0.1415
std,577.494589,14326.327119,13.624469,3045.410024,0.348624
min,1.0,20014.48947,-52.42328,1.37763,0.0
25%,500.75,32796.459717,28.990415,1939.708847,0.0
50%,1000.5,45789.117313,41.317159,3974.719419,0.0
75%,1500.25,57791.281668,52.58704,6432.410625,0.0
max,2000.0,69995.685578,63.971796,13766.051239,1.0


In [4]:
# Amostra dos dados
base.head()

Unnamed: 0,i#clientid,income,age,loan,c#default
0,1,66155.925095,59.017015,8106.532131,0
1,2,34415.153966,48.117153,6564.745018,0
2,3,57317.170063,63.108049,8020.953296,0
3,4,42709.534201,45.751972,6103.64226,0
4,5,66952.688845,18.584336,8770.099235,1


In [5]:
# Verificando dados com idade negativa
base.loc[base['age'] < 0]

Unnamed: 0,i#clientid,income,age,loan,c#default
15,16,50501.726689,-28.218361,3977.287432,0
21,22,32197.620701,-52.42328,4244.057136,0
26,27,63287.038908,-36.496976,9595.286289,0


In [6]:
### Maneiras de contornar o problema das idades menores que zero

## 1) Apagar a coluna por inteiro (não recomendada, neste caso)
# base.drop('age', 1, inplace=True)

## 2) Apagar apenas os registros, por completo, que possuem essa incoerência
# base.drop(base[base.age < 0].index, inplace=True)

## 3) Preencher os valores com a média da coluna, apenas dos valores maiores que zero
media = base['age'][base.age > 0].mean()
base.loc[base.age < 0, 'age'] = media

In [7]:
# Verificando valores nulos
base.loc[pd.isnull(base['age'])]

Unnamed: 0,i#clientid,income,age,loan,c#default
28,29,59417.805406,,2082.625938,0
30,31,48528.852796,,6155.78467,0
31,32,23526.302555,,2862.010139,0


In [8]:
# Divisão do dataset entre variáveis preditoras e target
previsores = base.iloc[:, 1:4].values
classe = base.iloc[:, 4].values

In [9]:
# Substituindo os valores missing pela média de cada coluna
from sklearn.preprocessing import Imputer

imputer = Imputer(missing_values='NaN', strategy='mean', axis=0)
imputer.fit(previsores[:, 0:3])

previsores[:, 0:3] = imputer.transform(previsores[:, 0:3])

In [10]:
## Fazendo o escalonamento (normalização) dos atributos
from sklearn.preprocessing import StandardScaler

# Padronização
scaler = StandardScaler()
previsores = scaler.fit_transform(previsores)

# Normalização
# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler()
# previsores = scaler.fit_transform(previsores)

In [11]:
# Dividindo os dados em treino e teste
from sklearn.model_selection import train_test_split

In [12]:
previsores_train, previsores_test, classe_train, classe_test = train_test_split(previsores, classe, test_size=0.25, random_state=0)

In [13]:
# Modelo Redes Neurais
from sklearn.neural_network import MLPClassifier

In [41]:
classificador = MLPClassifier(
    verbose=True,
    max_iter=500,
    tol=0.00001,
    solver='adam',
    hidden_layer_sizes=(100),
    activation='relu',
    )

In [42]:
classificador.fit(previsores_train, classe_train)

Iteration 1, loss = 0.72719090
Iteration 2, loss = 0.65396861
Iteration 3, loss = 0.59078723
Iteration 4, loss = 0.53645600
Iteration 5, loss = 0.49037435
Iteration 6, loss = 0.45015770
Iteration 7, loss = 0.41523607
Iteration 8, loss = 0.38456154
Iteration 9, loss = 0.35728487
Iteration 10, loss = 0.33327620
Iteration 11, loss = 0.31144185
Iteration 12, loss = 0.29199821
Iteration 13, loss = 0.27458335
Iteration 14, loss = 0.25900377
Iteration 15, loss = 0.24493808
Iteration 16, loss = 0.23237911
Iteration 17, loss = 0.22116095
Iteration 18, loss = 0.21098915
Iteration 19, loss = 0.20172527
Iteration 20, loss = 0.19338834
Iteration 21, loss = 0.18588585
Iteration 22, loss = 0.17889662
Iteration 23, loss = 0.17259952
Iteration 24, loss = 0.16677579
Iteration 25, loss = 0.16142745
Iteration 26, loss = 0.15654611
Iteration 27, loss = 0.15205232
Iteration 28, loss = 0.14778792
Iteration 29, loss = 0.14397278
Iteration 30, loss = 0.14034830
Iteration 31, loss = 0.13696992
Iteration 32, los

Iteration 265, loss = 0.02262664
Iteration 266, loss = 0.02249718
Iteration 267, loss = 0.02237529
Iteration 268, loss = 0.02232641
Iteration 269, loss = 0.02224431
Iteration 270, loss = 0.02214494
Iteration 271, loss = 0.02206688
Iteration 272, loss = 0.02204699
Iteration 273, loss = 0.02186943
Iteration 274, loss = 0.02181784
Iteration 275, loss = 0.02176429
Iteration 276, loss = 0.02168752
Iteration 277, loss = 0.02161718
Iteration 278, loss = 0.02150481
Iteration 279, loss = 0.02147178
Iteration 280, loss = 0.02136835
Iteration 281, loss = 0.02131431
Iteration 282, loss = 0.02124676
Iteration 283, loss = 0.02117947
Iteration 284, loss = 0.02121326
Iteration 285, loss = 0.02098908
Iteration 286, loss = 0.02093694
Iteration 287, loss = 0.02085439
Iteration 288, loss = 0.02077838
Iteration 289, loss = 0.02069367
Iteration 290, loss = 0.02065514
Iteration 291, loss = 0.02063742
Iteration 292, loss = 0.02046849
Iteration 293, loss = 0.02048004
Iteration 294, loss = 0.02038452
Iteration 

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=100, learning_rate='constant',
       learning_rate_init=0.001, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=1e-05, validation_fraction=0.1,
       verbose=True, warm_start=False)

In [43]:
# Testando o modelo criado à partir dos dados de treinamento
previsoes = classificador.predict(previsores_test)

In [44]:
# Calculando a precisão do nosso modelo
from sklearn.metrics import confusion_matrix, accuracy_score

In [45]:
precisao = accuracy_score(classe_test, previsoes)
precisao

0.996

In [46]:
matriz = confusion_matrix(classe_test, previsoes)
matriz

array([[435,   1],
       [  1,  63]], dtype=int64)

## Resultado
### Redes Neurais (MLP Classifier - multi layer perceptron)
0.996 - max_iter=200, activation='relu', tol=0.0001, solver='adam'    
0.996 - max_iter=500, activation='relu', tol=0.00001, solver='adam'  
