# Atividade 3 - CIS

Essa atividade tem como objetivo implementar um Perceptron, esse é o tipo mais simples de rede neural. Para implementar essa rede foi necessário entender conceitos como feedforward e backpropagation. Para testar esse modelo foi utilizado um conjunto de dados de cartões de créditos. Ao final da atividade foi criado outro modelo usando um TensorFlow para fins de comparação.

In [3]:
import numpy as np 
import pandas as pd
from imblearn.over_sampling import SMOTE
from collections import Counter

# Explorando o Dado

A primeira etapa foi explorar o dataset para ver como os dados estavam distribuidos.

In [4]:
df = pd.read_csv('/kaggle/input/creditcard/creditcard.csv')

In [7]:
df.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,...,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0
mean,94813.859575,1.168375e-15,3.416908e-16,-1.379537e-15,2.074095e-15,9.604066e-16,1.487313e-15,-5.556467e-16,1.213481e-16,-2.406331e-15,...,1.654067e-16,-3.568593e-16,2.578648e-16,4.473266e-15,5.340915e-16,1.683437e-15,-3.660091e-16,-1.22739e-16,88.349619,0.001727
std,47488.145955,1.958696,1.651309,1.516255,1.415869,1.380247,1.332271,1.237094,1.194353,1.098632,...,0.734524,0.7257016,0.6244603,0.6056471,0.5212781,0.482227,0.4036325,0.3300833,250.120109,0.041527
min,0.0,-56.40751,-72.71573,-48.32559,-5.683171,-113.7433,-26.16051,-43.55724,-73.21672,-13.43407,...,-34.83038,-10.93314,-44.80774,-2.836627,-10.2954,-2.604551,-22.56568,-15.43008,0.0,0.0
25%,54201.5,-0.9203734,-0.5985499,-0.8903648,-0.8486401,-0.6915971,-0.7682956,-0.5540759,-0.2086297,-0.6430976,...,-0.2283949,-0.5423504,-0.1618463,-0.3545861,-0.3171451,-0.3269839,-0.07083953,-0.05295979,5.6,0.0
50%,84692.0,0.0181088,0.06548556,0.1798463,-0.01984653,-0.05433583,-0.2741871,0.04010308,0.02235804,-0.05142873,...,-0.02945017,0.006781943,-0.01119293,0.04097606,0.0165935,-0.05213911,0.001342146,0.01124383,22.0,0.0
75%,139320.5,1.315642,0.8037239,1.027196,0.7433413,0.6119264,0.3985649,0.5704361,0.3273459,0.597139,...,0.1863772,0.5285536,0.1476421,0.4395266,0.3507156,0.2409522,0.09104512,0.07827995,77.165,0.0
max,172792.0,2.45493,22.05773,9.382558,16.87534,34.80167,73.30163,120.5895,20.00721,15.59499,...,27.20284,10.50309,22.52841,4.584549,7.519589,3.517346,31.6122,33.84781,25691.16,1.0


In [9]:
df["Class"].value_counts().tolist()

[284315, 492]

É possível perceber que o dado é extremamente desbalanceado, 284315 são transações legítimas e apenas 492 são transações fraudulentas. Além disso, as features do dataset não apresentam labels que nos ajudam a entender o problema.

----

# Criando o Perceptron

In [164]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

class Perceptron():
    def __init__(self, data, target):
        self.inputs = data
        self.outputs = np.zeros((1,len(target)))[0]
        self.target = target
        self.input_layer = self.inputs[0]
        self.v_hidden_layer = [0, 0]
        self.weights_l1 = np.random.rand(1,2*len(self.input_layer))[0]
        self.weights_l2 = [0.1,0.1]
        self.output_v = [0]
        
    def hidden_layer(self):
        for i in range(len(self.v_hidden_layer)):
            for j in range(len(self.input_layer)):
                self.v_hidden_layer[i] += self.input_layer[j] * self.weights_l1[i*len(self.input_layer)+j]
            self.v_hidden_layer[i] = sigmoid(self.v_hidden_layer[i])

    def output(self):
        self.output_v[0] = sigmoid(np.dot(self.v_hidden_layer, self.weights_l2))
        
    def forward(self):
        for k in range(len(self.inputs)):
            self.input_layer = self.inputs[k]
            self.hidden_layer()
            self.output()
            self.outputs[k] = self.output_v[0]
        
    def loss(self):
        return np.mean(1/2*(np.array(p.outputs) - np.array(p.target))**2)
        
    def backpropagation(self, lr):
        for i in range(len(self.target)):
            self.weights_l2[0] += (self.outputs[i]-self.target[i])*self.outputs[i]*(1-self.outputs[i])*self.v_hidden_layer[0] * lr
            self.weights_l2[1] += (self.outputs[i]-self.target[i])*self.outputs[i]*(1-self.outputs[i])*self.v_hidden_layer[1] * lr
            self.weights_l1[0] += (self.outputs[i]-self.target[i])*self.outputs[i]*(1-self.outputs[i])*self.weights_l2[0]*self.v_hidden_layer[0]*(1*self.v_hidden_layer[0])*self.input_layer[0] * lr
            self.weights_l1[1] += (self.outputs[i]-self.target[i])*self.outputs[i]*(1-self.outputs[i])*self.weights_l2[0]*self.v_hidden_layer[0]*(1*self.v_hidden_layer[0])*self.input_layer[1] * lr
            self.weights_l1[2] += (self.outputs[i]-self.target[i])*self.outputs[i]*(1-self.outputs[i])*self.weights_l2[0]*self.v_hidden_layer[1]*(1*self.v_hidden_layer[1])*self.input_layer[0] * lr
            self.weights_l1[3] += (self.outputs[i]-self.target[i])*self.outputs[i]*(1-self.outputs[i])*self.weights_l2[0]*self.v_hidden_layer[1]*(1*self.v_hidden_layer[1])*self.input_layer[1] * lr
            
    def train(self, epoch, lr):
        for i in range(epoch):
            self.forward()
            print(f'loss = {self.loss()}')
            self.backpropagation(lr)
        

In [115]:
# Separando as features do target
df_x = df.drop('Class', axis = 1)
df_y = df['Class']

In [117]:
data = df_x.values.tolist()

In [170]:
#Realizando o treinamento da rede
p = Perceptron(data, df_y)
p.train(4, 0.1)

loss = 0.1510726241683578


  return 1/(1+np.exp(-x))


loss = 0.49913625718468996
loss = 0.49913625718468996
loss = 0.49913625718468996


In [171]:
# Verificando como estava a saída do modelo
p.outputs

array([1., 1., 1., ..., 1., 1., 1.])

# Aplicando o Oversampling

Para balancear o conjunto de dados eu fiz uso da técnica de oversampling, já que na última atividade essa técnica trouxe uma melhora significativa para o modelo.

In [148]:
data_x = df_x.values
target = df_y.values

In [149]:
smote = SMOTE(random_state=42, k_neighbors=1)

X_smote, y_smote = smote.fit_resample(data_x, target)

In [166]:
#Realizando o treinamento da rede
p = Perceptron(X_smote, y_smote)
p.train(4, 0.1)

loss = 0.12624171141326673


  return 1/(1+np.exp(-x))


loss = 0.25
loss = 0.25
loss = 0.25


In [161]:
np.count_nonzero(p.outputs)

0

Em ambos os treinamentos os modelos não se saíram bem, acredito que esse problema seja causado por algum erro na implementação do algoritmo Perceptron. 

# Usando o TensorFlow

In [None]:
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split

Nessa parte eu fiz uso do tensorflow para criar um modelo e tentar obter melhores resultados.

In [194]:
#Separando o dado em treinamento e validação
X_train, X_test, y_train, y_test = train_test_split(X_smote, y_smote, test_size=0.2, random_state=42)

In [195]:
model = keras.Sequential([
    keras.layers.Dense(10, input_shape=(30,), activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']) #Como o dado ficou balanceado optei por continuar utilizando a acurácia como métrica
  
model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7c88b424c520>

In [196]:
model.evaluate(X_test, y_test)



[1.8540579080581665, 0.9623041152954102]

Usando o TensorFlow com Oversampling foi possível consegui um resultado bem melhor (Acurácia de 96%).

# Conclusão 

Nessa atividade foi criado uma rede neural Perceptron do zero, para isso foi necessário entender os conceitos das redes neurais de forma mais profunda, como o backpropagation. Devido a isso, foi uma atividade que eu encontrei mais dificuldade.