In [1]:
import pandas as pd

In [2]:
dados = pd.read_csv('.\Dados\creditcard.csv')
dados.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [3]:
n_transacoes = dados['Class'].count()
n_fraudes = dados['Class'].sum()
n_normais = n_transacoes - n_fraudes
fraude_porc = round((n_fraudes / n_transacoes)*100, 2)
normais_porc = round((n_normais / n_transacoes)*100, 2) 

print(f"Número de transações: {n_transacoes}")
print(f"Número de transações fraudes: {n_fraudes}")
print(f"Número de transações normais: {n_normais}")
print(f"Porcentagem de transações fraudes: {fraude_porc}%")
print(f"Porcentagem de transações normais: {normais_porc}%")

Número de transações: 284807
Número de transações fraudes: 492
Número de transações normais: 284315
Porcentagem de transações fraudes: 0.17%
Porcentagem de transações normais: 99.83%


In [4]:
from sklearn.model_selection import StratifiedShuffleSplit

def executarValidador(x, y):
    validador = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=0)
    for treino_id, teste_id in validador.split(x, y):
        x_train, x_test = x[treino_id], x[teste_id]
        y_train, y_test = y[treino_id], y[teste_id]
    return x_train, x_test, y_train, y_test

In [5]:
%%time
from sklearn import tree

def executarClassificador(classificador, x_train, x_test, y_train):
    arvore = classificador.fit(x_train, y_train)
    y_pred = arvore.predict(x_test)
    return y_pred

CPU times: total: 156 ms
Wall time: 270 ms


In [6]:
import matplotlib.pyplot as plt

def salvarArvore(classificador, nome):
    plt.figure(figsize=(200,100))
    tree.plot_tree(classificador, filled=True, fontsize=14)
    plt.savefig(nome)
    plt.close()

In [7]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

def validarArvore(y_test, y_pred):
    print("Accuracy Score: ", accuracy_score(y_test, y_pred))
    print()
    print("Precision Score: ", precision_score(y_test, y_pred))
    print()
    print("Recall Score: ", recall_score(y_test, y_pred))
    print()
    print(confusion_matrix(y_test, y_pred))

In [8]:
#execucao do validador
x = dados.drop('Class', axis=1).values
y = dados['Class'].values
x_train, x_test, y_train, y_test = executarValidador(x, y)

In [9]:
#execucao do classificador
classificador_arvore_decisao = tree.DecisionTreeClassifier()
y_pred_arvore_decisao = executarClassificador(classificador_arvore_decisao, x_train, x_test, y_train)

In [10]:
#criação da figura da arvore de decisão
salvarArvore(classificador_arvore_decisao, "arvore_decisao1.png")

In [11]:
#validação da arvore de decisao
validarArvore(y_test, y_pred_arvore_decisao)

Accuracy Score:  0.9990871107053826

Precision Score:  0.7555555555555555

Recall Score:  0.6938775510204082

[[28421    11]
 [   15    34]]


In [12]:
print(classificador_arvore_decisao)
print(classificador_arvore_decisao.get_depth())

DecisionTreeClassifier()
21


In [13]:
%%time
#execucao do classificador
classificador_arvore_decisao = tree.DecisionTreeClassifier(max_depth=10, random_state=0)
y_pred_arvore_decisao = executarClassificador(classificador_arvore_decisao, x_train, x_test, y_train)

CPU times: total: 13.3 s
Wall time: 13.4 s


In [14]:
validarArvore(y_test, y_pred_arvore_decisao)

Accuracy Score:  0.9994733330992591

Precision Score:  0.9473684210526315

Recall Score:  0.7346938775510204

[[28430     2]
 [   13    36]]


In [15]:
#execucao do classificador
classificador_arvore_decisao = tree.DecisionTreeClassifier(max_depth=10, random_state=0, min_samples_leaf=10)
y_pred_arvore_decisao = executarClassificador(classificador_arvore_decisao, x_train, x_test, y_train)

In [16]:
validarArvore(y_test, y_pred_arvore_decisao)

Accuracy Score:  0.9993679997191109

Precision Score:  0.8604651162790697

Recall Score:  0.7551020408163265

[[28426     6]
 [   12    37]]


In [17]:
#execucao do classificador
classificador_arvore_decisao = tree.DecisionTreeClassifier(max_depth=5, random_state=0)
y_pred_arvore_decisao = executarClassificador(classificador_arvore_decisao, x_train, x_test, y_train)

In [18]:
validarArvore(y_test, y_pred_arvore_decisao)

Accuracy Score:  0.999403110845827

Precision Score:  0.9210526315789473

Recall Score:  0.7142857142857143

[[28429     3]
 [   14    35]]


In [19]:
from sklearn.ensemble import RandomForestClassifier

In [20]:
%%time
classificador_random_forest = RandomForestClassifier(n_estimators=100, random_state=0)
y_pred_random_forest = executarClassificador(classificador_random_forest, x_train, x_test, y_train)

CPU times: total: 5min 16s
Wall time: 5min 49s


In [21]:
salvarArvore(classificador_random_forest.estimators_[0], "random_forest1.png")
salvarArvore(classificador_random_forest.estimators_[1], "random_forest2.png")

In [22]:
validarArvore(y_test, y_pred_random_forest)

Accuracy Score:  0.9995084442259752

Precision Score:  0.9487179487179487

Recall Score:  0.7551020408163265

[[28430     2]
 [   12    37]]


In [23]:
%%time
classificador_random_forest = RandomForestClassifier(n_estimators=50, random_state=0, max_depth=10)
y_pred_random_forest = executarClassificador(classificador_random_forest, x_train, x_test, y_train)

CPU times: total: 1min 23s
Wall time: 1min 24s


In [24]:
validarArvore(y_test, y_pred_random_forest)

Accuracy Score:  0.9995435553526912

Precision Score:  0.9736842105263158

Recall Score:  0.7551020408163265

[[28431     1]
 [   12    37]]


In [25]:
from sklearn.ensemble import AdaBoostClassifier

In [27]:
%%time
classificador_adaboost = AdaBoostClassifier(random_state=0)
y_pred_adaboost = executarClassificador(classificador_adaboost, x_train, x_test, y_train)

CPU times: total: 1min 29s
Wall time: 1min 34s


In [29]:
salvarArvore(classificador_adaboost.estimators_[0], "adaboost1")
salvarArvore(classificador_adaboost.estimators_[1], "adaboost2")

In [30]:
validarArvore(y_test, y_pred_adaboost)

Accuracy Score:  0.9992626663389628

Precision Score:  0.8888888888888888

Recall Score:  0.6530612244897959

[[28428     4]
 [   17    32]]


In [31]:
%%time
classificador_adaboost = AdaBoostClassifier(random_state=0, n_estimators=100)
y_pred_adaboost = executarClassificador(classificador_adaboost, x_train, x_test, y_train)

CPU times: total: 2min 58s
Wall time: 3min 9s


In [32]:
validarArvore(y_test, y_pred_adaboost)

Accuracy Score:  0.999403110845827

Precision Score:  0.8636363636363636

Recall Score:  0.7755102040816326

[[28426     6]
 [   11    38]]
