## Carregamento e visualização dos dados

In [55]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

dataset = pd.read_csv('/content/creditcard.csv', engine='python')
print(dataset['Time'].unique().sum())
dataset.info()

11509265043.0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 

Com base na documentação do dataset, sabe-se que ele é altamente desbalanceado. Das 284.807, apenas 492 são fraudes. Isso pode gerar um problema para a rede neural, que irá ser parcial para a classe em maior quantidade.

## Separe a label das features e o dataset em subsets de treinamento e teste

In [56]:
X = dataset.drop(['Time','Amount','Class'], axis=1).values
y = dataset['Class'].values.reshape(-1, 1)

print(X.shape)
print(y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

(284807, 28)
(284807, 1)


## Inicialização randômica dos pesos

In [3]:
def inicializacaoPesos(tamanhoDaCamada):
    pesos = []
    bias = []
    for i in range(len(tamanhoDaCamada) - 1):
        pesos.append(np.random.randn(tamanhoDaCamada[i], tamanhoDaCamada[i + 1]) * 0.01)
        bias.append(np.zeros((1, tamanhoDaCamada[i + 1])))
    return pesos, bias


## Defina a função de ativação e calcular sua derivada


In [4]:
def sigmoide(x):
    return 1 / (1 + np.exp(-x))

def sigmoideDerivada(x):
    return x * (1 - x)

def relu(x):
    return np.maximum(0, x)

def reluDerivada(x):
    return np.where(x > 0, 1, 0)

## Treine o modelo testando diferentes valores de épocas e learning rate, identificando quando acontece Overfitting e Underfitting

In [5]:
def feedforward(X, pesos, bias):
    activations = [X]
    input = X

    for i in range(len(pesos) - 1):
        z = np.dot(input, pesos[i]) + bias[i]
        a = relu(z)
        activations.append(a)
        input = a

    z = np.dot(input, pesos[-1]) + bias[-1]
    output = sigmoide(z)
    activations.append(output)

    return activations

def backpropagation(X, y, pesos, bias, activations, learningRate):
    m = X.shape[0]
    deltas = [None] * len(pesos)
    L = len(pesos) - 1

    output_error = activations[-1] - y
    deltas[L] = output_error * sigmoideDerivada(activations[-1])

    for i in range(L - 1, -1, -1):
        deltas[i] = np.dot(deltas[i + 1], pesos[i + 1].T) * reluDerivada(activations[i + 1])

    for i in range(len(pesos)):
        pesos[i] -= learningRate * np.dot(activations[i].T, deltas[i]) / m
        bias[i] -= learningRate * np.mean(deltas[i], axis=0, keepdims=True)

    return pesos, bias

In [11]:
def train(X, y, tamanhoDaCamada, learningRate, epocas):
    pesos, bias = inicializacaoPesos(tamanhoDaCamada)

    for epoca in range(epocas):
        activations = feedforward(X, pesos, bias)
        pesos, bias = backpropagation(X, y, pesos, bias, activations, learningRate)

        if epoca % 100 == 0:
            loss = np.mean((y - activations[-1])**2)
            print(f'Epoca {epoca}, Loss: {loss}')

    return pesos, bias

In [12]:
def predicao(X, pesos, bias):
    activations = feedforward(X, pesos, bias)
    return (activations[-1] > 0.5).astype(int)

## Fazer as previsões nos dados de teste e avaliar o modelo

Avalaiando conjunto de teste com Learning Ratte = 0.01 e 1000 epocas.

In [13]:
tamanhoDasCamadas = [X_train.shape[1], 16, 1]

pesos, bias = train(X_train, y_train, tamanhoDasCamadas, learningRate=0.01, epocas=1000)

y_pred = predicao(X_test, pesos, bias)

acuracia = np.mean(y_pred == y_test)
print(f'Accuracy: {acuracia * 100:.2f}%')

Epoca 0, Loss: 0.2498719778080381
Epoca 100, Loss: 0.22057816902627592
Epoca 200, Loss: 0.19507272359982356
Epoca 300, Loss: 0.17305983407141745
Epoca 400, Loss: 0.15415606384198333
Epoca 500, Loss: 0.13795431041975775
Epoca 600, Loss: 0.12406378047010848
Epoca 700, Loss: 0.1121308630085563
Epoca 800, Loss: 0.10184716077441329
Epoca 900, Loss: 0.092949858566766
Accuracy: 99.82%


Avaliando conjunto de train com Learning Ratte = 0.01 e 1000 epocas a fim de comparar com o conjunto de teste.

In [14]:
tamanhoDasCamadas = [X_train.shape[1], 16, 1]

pesos, bias = train(X_train, y_train, tamanhoDasCamadas, learningRate=0.01, epocas=1000)

y_predTrain = predicao(X_train, pesos, bias)

acuracia = np.mean(y_predTrain == y_train)
print(f'Accuracy: {acuracia * 100:.2f}%')

Epoca 0, Loss: 0.2500633973684645
Epoca 100, Loss: 0.22075361051536535
Epoca 200, Loss: 0.19523624353380858
Epoca 300, Loss: 0.17321463266827306
Epoca 400, Loss: 0.1543045940258705
Epoca 500, Loss: 0.1380983971903174
Epoca 600, Loss: 0.1242047655082461
Epoca 700, Loss: 0.1122697316068186
Epoca 800, Loss: 0.10198462661807348
Epoca 900, Loss: 0.09308641853621243
Accuracy: 99.83%


Avalaiando conjunto de teste com Learning Ratte = 0.1 e 500 epocas.

In [15]:
tamanhoDasCamadas = [X_train.shape[1], 16, 1]

pesos, bias = train(X_train, y_train, tamanhoDasCamadas, learningRate=0.1, epocas=500)

y_pred = predicao(X_test, pesos, bias)

acuracia = np.mean(y_pred == y_test)
print(f'Accuracy: {acuracia * 100:.2f}%')

Epoca 0, Loss: 0.2501434577265461
Epoca 100, Loss: 0.08528772440172797
Epoca 200, Loss: 0.043759151992386405
Epoca 300, Loss: 0.02824160502218727
Epoca 400, Loss: 0.020600900935516264
Accuracy: 99.82%


## Sabendo que o dataset é desbalanceado, podemos replicar os casos de fraude que acontecem menos frequentemente, porém o dataset dobraria de tamanho ficando com cerca de 400 mil observações. Outa alternativa ser seria diminuir o caso de maior frequencia, de forma a equilibrar o dataset, porém muita informação seria perdida nesse processo, ja que passaria de mais de 200 mil observações no total para cerca de 1000.

In [16]:
from sklearn.utils import resample


majoritaria = dataset[dataset['Class'] == 0]
minoritaria = dataset[dataset['Class'] == 1]

majoritariaUndersampled = resample(majoritaria, replace=False, n_samples=len(minoritaria), random_state=0)

datasetBalanceado = pd.concat([majoritariaUndersampled, minoritaria])

In [18]:
XBalanceado = datasetBalanceado.drop(['Time','Amount','Class'], axis=1).values
yBalanceado = datasetBalanceado['Class'].values.reshape(-1, 1)

print(XBalanceado.shape)
print(yBalanceado.shape)

X_trainBalanceado, X_testBalanceado, y_trainBalanceado, y_testBalanceado = train_test_split(XBalanceado, yBalanceado, test_size=0.2, random_state=0)

(984, 28)
(984, 1)


In [19]:
tamanhoDasCamadas = [X_trainBalanceado.shape[1], 16, 1]

pesos, bias = train(X_trainBalanceado, y_trainBalanceado, tamanhoDasCamadas, learningRate=0.01, epocas=1000)

y_predTrainBalanceado = predicao(X_trainBalanceado, pesos, bias)

acuracia = np.mean(y_predTrainBalanceado == y_trainBalanceado)
print(f'Accuracy: {acuracia * 100:.2f}%')

Epoca 0, Loss: 0.25044851598460577
Epoca 100, Loss: 0.24378136692544097
Epoca 200, Loss: 0.21014068896471325
Epoca 300, Loss: 0.17534971052408513
Epoca 400, Loss: 0.15891002546120958
Epoca 500, Loss: 0.14871303768818275
Epoca 600, Loss: 0.14112120320896482
Epoca 700, Loss: 0.13488314844034177
Epoca 800, Loss: 0.12948223983400264
Epoca 900, Loss: 0.1246755600216264
Accuracy: 93.77%


Ao realizar o Undersampling do dataset, o modelo perde bastante acurácia, porém a acurácia anterior poderia ser falsa, já que o valor obtido era o mesmo caso todos os casos fossem classificados como 0 (não fraude).

Testando outros valores de Learning Rate e épocas.

In [49]:
tamanhoDasCamadas = [X_trainBalanceado.shape[1], 16, 1]

pesos, bias = train(X_trainBalanceado, y_trainBalanceado, tamanhoDasCamadas, learningRate=0.99, epocas=3000)

y_predTrainBalanceado = predicao(X_trainBalanceado, pesos, bias)

acuracia = np.mean(y_predTrainBalanceado == y_trainBalanceado)
print(f'Accuracy: {acuracia * 100:.2f}%')

Epoca 0, Loss: 0.24978242025769673
Epoca 100, Loss: 0.05003636238278257
Epoca 200, Loss: 0.04347072894509122
Epoca 300, Loss: 0.04028155534177372
Epoca 400, Loss: 0.03723066006366523
Epoca 500, Loss: 0.0343070320624898
Epoca 600, Loss: 0.03179169563781678
Epoca 700, Loss: 0.029438631080737486
Epoca 800, Loss: 0.027100645548561668
Epoca 900, Loss: 0.024576687884413875
Epoca 1000, Loss: 0.02189546797718931
Epoca 1100, Loss: 0.019447585166789138
Epoca 1200, Loss: 0.0172511781670081
Epoca 1300, Loss: 0.015393770858675692
Epoca 1400, Loss: 0.01379752957481008
Epoca 1500, Loss: 0.012367076758453954
Epoca 1600, Loss: 0.011272122081481905
Epoca 1700, Loss: 0.01032979415967882
Epoca 1800, Loss: 0.009528889402792094
Epoca 1900, Loss: 0.008844587063769224
Epoca 2000, Loss: 0.008252275500557935
Epoca 2100, Loss: 0.007724947051533806
Epoca 2200, Loss: 0.007238530727709422
Epoca 2300, Loss: 0.006800925984719128
Epoca 2400, Loss: 0.006418673349381452
Epoca 2500, Loss: 0.006078810014524079
Epoca 2600,

Ao mudar o Learning Rate de 0.01 para 0.99 e o número de épocas de 1000 para 3000. temos uma grande melhora na acurácia, saindo de 93.77% para 99.75%.

## Utilizando TensorFlow para a implementação da rede neural.

In [57]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
print(X_train.shape)
model.add(Dense(16, input_dim=X_train.shape[1], activation='relu'))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

(227845, 28)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [58]:
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

Epoch 1/100
[1m5697/5697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 2ms/step - accuracy: 0.9829 - loss: 0.0750 - val_accuracy: 0.9993 - val_loss: 0.0038
Epoch 2/100
[1m5697/5697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - accuracy: 0.9992 - loss: 0.0043 - val_accuracy: 0.9993 - val_loss: 0.0033
Epoch 3/100
[1m5697/5697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.9995 - loss: 0.0026 - val_accuracy: 0.9994 - val_loss: 0.0031
Epoch 4/100
[1m5697/5697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step - accuracy: 0.9994 - loss: 0.0031 - val_accuracy: 0.9990 - val_loss: 0.0036
Epoch 5/100
[1m5697/5697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9992 - loss: 0.0038 - val_accuracy: 0.9988 - val_loss: 0.0038
Epoch 6/100
[1m5697/5697[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2ms/step - accuracy: 0.9994 - loss: 0.0028 - val_accuracy: 0.9994 - val_loss: 0.0029
Epoc

<keras.src.callbacks.history.History at 0x7e016b85ac50>

Testando o modelo no conjunto de dados de teste

In [59]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

[1m1781/1781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9991 - loss: 0.0072
Loss: 0.0068, Accuracy: 0.9991


## Considerações finais:

O modelo utilizando o TensorFlow teve os melhores resultados, com uma acurácia de 0.9991. Porém os modelo feito from scratch tbm tiveram bons resultados. E o modelo from scratch cujo teve o dataset balanceado, teve um resultado parecido com uma acuracia de o.9975 e uma chance de ter overfiting nesse modelo baixíssima. E no modelo from scratch cujo dataset não foi balanceado, o ajuste da quantidade de épocas e o Learning Rate mostrou uma grande melhoria para a acurácia do modelo.