
# Treinamento CIS - 4º Período (Redes Neurais)
---

Nome: Gustavo M. Tonnera

Repositório: https://github.com/GMTonnera/ieee-cis-trainee

## Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

Importar datasets do google drive

In [2]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


## Funções auxiliares

In [3]:
def checkNullValues(df):
  flag = False
  # Verificar se alguma coluna possui dados faltantes
  for column in df.columns.values:
      x = df[column].isnull().sum()
      if x > 0:
          print(f'{column} = {x}')
          flag = True

  if not flag:
      print("Nenhum dado faltante!")

def fillNullWithMode(df, columns):
  newDf = df.copy()
  print()
  for column in columns:
      mode_value = newDf[column].mode()[0]
      newDf[column] = newDf[column].fillna(mode_value)

  return newDf

def fillNullWithMedian(df, columns):
  newDf = df.copy()
  for column in columns:
      median_value = newDf[column].median()
      newDf[column] = newDf[column].fillna(median_value)

  return newDf

def barChart(xValues, yValues, yLabel, title, yRange, rotation=0):
  fig, ax = plt.subplots(figsize=(15, 5))
  ax.bar(xValues, yValues)

  plt.xticks(rotation=90)
  ax.set_ylabel(yLabel)
  ax.set_title(title)
  plt.ylim(yRange)

  plt.show()


def horizontalBarChart(yValues, xValues, title, yLabel, xLabel):
  plt.figure(figsize=(5, 15))
  plt.barh(yValues, xValues)
  plt.title(title)
  plt.ylabel(yLabel)
  plt.xlabel(xLabel)
  plt.show()


def oneHotEncoding(df, columns):
  newDf = df.copy()
  for column in columns:
    one_hot = pd.get_dummies(newDf[column])
    one_hot = one_hot.rename(columns={col: f'{column}-{col}' for col in one_hot.columns})
    for col in one_hot.columns:
      one_hot[col] = one_hot[col].astype(int)
    newDf = newDf.drop(column, axis=1)
    newDf = newDf.join(one_hot)

  return newDf

## Datasets

In [4]:
TRAINING_DATAFRAME = pd.read_csv('/content/drive/MyDrive/datasets/neuralNetwork/creditcard.csv')
pd.set_option("display.max_columns", None)

In [5]:
TRAINING_DATAFRAME.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,-0.5516,-0.617801,-0.99139,-0.311169,1.468177,-0.470401,0.207971,0.025791,0.403993,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,1.612727,1.065235,0.489095,-0.143772,0.635558,0.463917,-0.114805,-0.183361,-0.145783,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,0.624501,0.066084,0.717293,-0.165946,2.345865,-2.890083,1.109969,-0.121359,-2.261857,0.52498,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,-0.226487,0.178228,0.507757,-0.287924,-0.631418,-1.059647,-0.684093,1.965775,-1.232622,-0.208038,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,-0.822843,0.538196,1.345852,-1.11967,0.175121,-0.451449,-0.237033,-0.038195,0.803487,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


## Tratamento de dados

Como descrito na documentação do dataset, as colunas V1,...,V28 são os componentes principais da aplicação de um PCA. Por isso, essas colunas não serão tratadas.

Nas colunas "Time" e "Amount", será aplicada uma Normalização do tipo Min-Max, para que os valores fiquem na escala (0, 1).

In [6]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

TRAINING_DATAFRAME['Time'] = scaler.fit_transform(TRAINING_DATAFRAME['Time'].values.reshape(-1, 1))
TRAINING_DATAFRAME['Amount'] = scaler.fit_transform(TRAINING_DATAFRAME['Amount'].values.reshape(-1, 1))

TRAINING_DATAFRAME.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,-0.5516,-0.617801,-0.99139,-0.311169,1.468177,-0.470401,0.207971,0.025791,0.403993,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,0.005824,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,1.612727,1.065235,0.489095,-0.143772,0.635558,0.463917,-0.114805,-0.183361,-0.145783,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,0.000105,0
2,6e-06,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,0.624501,0.066084,0.717293,-0.165946,2.345865,-2.890083,1.109969,-0.121359,-2.261857,0.52498,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,0.014739,0
3,6e-06,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,-0.226487,0.178228,0.507757,-0.287924,-0.631418,-1.059647,-0.684093,1.965775,-1.232622,-0.208038,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,0.004807,0
4,1.2e-05,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,-0.822843,0.538196,1.345852,-1.11967,0.175121,-0.451449,-0.237033,-0.038195,0.803487,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,0.002724,0


## Tarefas

### 1. Criar uma rede neural “from scratch” de classificação binária para prever fraudes nas transações com cartões de crédito. Use como embasamento a playlist Neural Networks from Scratch in Python.

#### a) A rede deve conter uma camada oculta (quantidade de neurônios a critério).

#### b) Separe a label das features e o dataset em subsets detreinamento e teste;

#### c) Inicialização randômica dos pesos;

#### d) Defina a função de ativação e calcular sua derivada (Sinta-se à vontade para experimentar mais de uma);

#### e) Treine o modelo testando diferentes valores de épocas e learning rate, identificando quando acontece Overfitting e Underfitting.

### Rede Neural

In [46]:
from math import exp

class Layer:
    def __init__(self, n_inputs, n_neurons):
        # c) Inicializacao randomica dos pesos
        self.weights = np.random.rand(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    def forward(self, inputs):
        return np.dot(inputs, self.weights) + self.biases

# d) Funcao de ativacao
class Activation_ReLU:
    def forward(self, inputs):
        return np.maximum(0, inputs)

    def derivative(x):
        return np.where(x > 0, 1, 0)


class Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        return exp_values / np.sum(exp_values, axis=1, keepdims=True)


class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        return np.mean(sample_losses)

class Loss_CategoricalCrossentropy(Loss):
    def forward(self, y_pred, y_true):
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)

        return -np.log(correct_confidences)


def train(epochs, learing_rate):
    for i in range(epochs+1):
        # Calcular a saida do hidden layer
        hidden_output = hidden_layer.forward(x_train)
        activation_output = activation_function.forward(hidden_output)

        # Calcular a saida do output layer
        output = output_layer.forward(activation_output)
        softmax_output = softmax.forward(output)

        # Calcular o erro
        loss = Loss_CategoricalCrossentropy()
        loss_value = loss.calculate(softmax_output, y_train)
        # Backpropagation
        ## Calculo do erro do output do output_layer
        output_layer_error = softmax_output - y_train
        ## Calculo do gradiente dos pesos do output_layer
        gradient_weights_output_layer = np.dot(activation_output.T, output_layer_error) / x_train.shape[0]
        ## Calculo do gradiente dos biases do output_layer
        gradient_biases_output_layer = np.sum(output_layer_error, axis=0, keepdims=True) / x_train.shape[0]

        ## Calculo do erro do output do hidden_layer
        hidden_layer_error = np.dot(output_layer_error, output_layer.weights.T) * Activation_ReLU.derivative(hidden_output)
        ## Calculo do gradiente dos pesos do hidden_layer
        gradient_weights_hidden_layer = np.dot(x_train.T, hidden_layer_error) / x_train.shape[0]
        ## Calculo do gradiente dos biases do hidden_layer
        gradient_biases_hidden_layer = np.sum(hidden_layer_error, axis=0, keepdims=True) / x_train.shape[0]

        # Atualizar os pesos e biases
        output_layer.weights -= learning_rate * gradient_weights_output_layer
        output_layer.biases -= learning_rate * gradient_biases_output_layer
        hidden_layer.weights -= learning_rate * gradient_weights_hidden_layer
        hidden_layer.biases -= learning_rate * gradient_biases_hidden_layer

        if i % (epochs // 10) == 0:
            print(f'Epoch: {i}, Loss: {loss_value}')

In [39]:
from sklearn.model_selection import train_test_split

X = TRAINING_DATAFRAME.drop('Class', axis=1)
y = TRAINING_DATAFRAME['Class']

# b) Separar a label das features e o dataset em subsets detreinamento e teste
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=57)
# One Hot Encoding
y_train = np.eye(2)[y_train]
y_test = np.eye(2)[y_test]

In [44]:
# Funcao de ativacao do hidden layer
activation_function = Activation_ReLU()
# Funcao de ativacao do output layer
softmax = Softmax()

# Numero de iteracoes
epochs = 100

In [48]:
learning_rates = [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]
for learning_rate in learning_rates:
    print(f"Learning rate = {learning_rate}")
    # Hidden Layer
    hidden_layer = Layer(len(X.columns), 50)
    # Output Layer
    output_layer = Layer(50, 2)
    # Treinar a rede
    train(epochs, learning_rate)
    # Validar a rede
    hidden_output = hidden_layer.forward(x_test)
    activation_output = activation_function.forward(hidden_output)
    output = output_layer.forward(activation_output)
    softmax_output = softmax.forward(output)
    # Calcular o erro
    loss = Loss_CategoricalCrossentropy()
    loss_value = loss.calculate(softmax_output, y_test)
    print(f'Test Loss = {loss_value}')
    print()

Learning rate = 0.5
Epoch: 0, Loss: 0.4814454532297881
Epoch: 10, Loss: 0.061093756887525816
Epoch: 20, Loss: 0.04188978872810692
Epoch: 30, Loss: 0.03292986861222469
Epoch: 40, Loss: 0.02778192050316156
Epoch: 50, Loss: 0.024461501047712584
Epoch: 60, Loss: 0.022153708463583457
Epoch: 70, Loss: 0.020463451718102106
Epoch: 80, Loss: 0.01917612030454221
Epoch: 90, Loss: 0.01816537741633864
Epoch: 100, Loss: 0.01735198380501993
Test Loss = 0.015924691040830913

Learning rate = 1.0
Epoch: 0, Loss: 0.5462044469113057
Epoch: 10, Loss: 0.03650615397229705
Epoch: 20, Loss: 0.02622809623355745
Epoch: 30, Loss: 0.021431924414266938
Epoch: 40, Loss: 0.01870923973608758
Epoch: 50, Loss: 0.01696747461749843
Epoch: 60, Loss: 0.01575897213231886
Epoch: 70, Loss: 0.014865043109083542
Epoch: 80, Loss: 0.014167795220098872
Epoch: 90, Loss: 0.013591590132248818
Epoch: 100, Loss: 0.013101335664510088
Test Loss = 0.012082279578702303

Learning rate = 1.5
Epoch: 0, Loss: 2.056477461320617
Epoch: 10, Loss: 

O modelo apresenta overfeating para todos os valores de "learning_rate" testados. Isso ocorre devido ao fato de que o dataset apresentar um número maior de amostras de uma classe em relação a outra: a classe que representa uma transação fraudulenta possui menos de 500 amostras, enquanto que a classe que representa uma transação não fraudulenta possui mais de 280.000 amostras.

In [49]:
TRAINING_DATAFRAME['Class'].value_counts()

Unnamed: 0_level_0,count
Class,Unnamed: 1_level_1
0,284315
1,492
