# Trabalho 2 - Redes
Criar um detector de anomalias usando pytorch. Faça um experimento com um dataset. Neste experimento você deve gerar o gráfico com curvas de erro de treino e de validação. Você deve escolher o melhor modelo de acordo com este gráfico e aplicar num conjunto de teste separado este modelo. Você deve ao final saber a taxa de acerto de anomalias vc não anomalias. Utilize métricas como f1 por classe e mcc (mathews correlation coeficient).

Importante. Para a detecção de anomalias, é necessário a escolha (tanto devios padrão da média) ou calibração (conjunto de validação) de um limiar para o erro de reconstrução.

## Imports
Primeiro é preciso importar as bibliotecas e funções a serem utilizadas.

In [20]:
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from torch import nn
import matplotlib.pyplot as plt
import pandas as pd
import torch.utils.data as data_utils
import numpy as np
from sklearn.model_selection import train_test_split

In [55]:
df = pd.read_csv("data/creditcard.csv")
df["Time"] = df["Time"].apply(lambda x : x / 3600 % 24)
y = df['Class']
X = df.drop('Class', axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [60]:
train = torch.tensor(X_train.values.astype(np.float32))
train_target = torch.tensor(X_train.values.astype(np.float32))
train_tensor = data_utils.TensorDataset(train, train_target) 
train_loader = data_utils.DataLoader(dataset = train_tensor, batch_size = 64, shuffle = True)

In [61]:
test_target = torch.tensor(X_test.values.astype(np.float32))
test = torch.tensor(X_test.values.astype(np.float32)) 
test_tensor = data_utils.TensorDataset(test, test_target) 
test_loader = data_utils.DataLoader(dataset = test_tensor, batch_size = 64, shuffle = True)

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cpu device


In [46]:
class NeuralNetwork(nn.Module):
    
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.autoencoder = nn.Sequential(
            torch.nn.Linear(30, 16),
            torch.nn.ReLU(),
            torch.nn.Linear(16, 8),
            torch.nn.ReLU(),
            torch.nn.Linear(8, 4),
            torch.nn.Linear(4, 8),
            torch.nn.ReLU(),
            torch.nn.Linear(8, 16),
            torch.nn.ReLU(),
            torch.nn.Linear(16, 30)
        )
        
    def forward(self, x):
        logits = self.autoencoder(x)
        return logits

In [47]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (autoencoder): Sequential(
    (0): Linear(in_features=30, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=8, bias=True)
    (3): ReLU()
    (4): Linear(in_features=8, out_features=4, bias=True)
    (5): Linear(in_features=4, out_features=8, bias=True)
    (6): ReLU()
    (7): Linear(in_features=8, out_features=16, bias=True)
    (8): ReLU()
    (9): Linear(in_features=16, out_features=30, bias=True)
  )
)


In [48]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [83]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    loss_history = []
    batch_history = []
    
    for batch, (X, y) in enumerate(dataloader):

        pred = model(X)
        loss = loss_fn(pred, y)
        
            
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
                        
            loss_history.append(loss)
            batch_history.append(current)
            
    return loss_history, batch_history

In [84]:
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    test_loss, correct = 0, 0

    with torch.no_grad():
      for X, y in dataloader:
          pred = model(X)
          test_loss += loss_fn(pred, y).item()

    test_loss /= size
    correct   /= size  
    print(f"Test Error: \n Accuracy: {100*correct:>0.1f}%, Avg loss: {test_loss:>8f} \n")
    
    return test_loss, correct

In [86]:
epochs = 10
loss_test_history = []
acc_history = []
for t in range(epochs):
    print(f"Epoch {t+1}\n-----------------------------------")
    loss_train_history, batch_history = train_loop(train_loader, model, loss_fn, optimizer)
    loss, accuracy = test_loop(test_loader, model, loss_fn)
    loss_test_history.append(loss)
    acc_history.append(accuracy)

Epoch 1
-----------------------------------
loss: 1628.358765 [    0/227845]
loss: 1053.160522 [ 6400/227845]
loss: 1080.104858 [12800/227845]
loss: 712.869934 [19200/227845]
loss: 390.994080 [25600/227845]
loss: 550.815063 [32000/227845]
loss: 1697.056885 [38400/227845]
loss: 245.271011 [44800/227845]
loss: 334.425446 [51200/227845]
loss: 269.451080 [57600/227845]
loss: 1449.752441 [64000/227845]
loss: 1517.780640 [70400/227845]
loss: 3977.324707 [76800/227845]
loss: 727.254028 [83200/227845]
loss: 493.111267 [89600/227845]
loss: 2044.235718 [96000/227845]
loss: 1516.672363 [102400/227845]
loss: 1701.577637 [108800/227845]
loss: 1394.666870 [115200/227845]
loss: 35979.398438 [121600/227845]
loss: 356.709381 [128000/227845]
loss: 584.702393 [134400/227845]
loss: 341915.187500 [140800/227845]
loss: 348.034698 [147200/227845]
loss: 385.774689 [153600/227845]
loss: 1887.979614 [160000/227845]
loss: 510.031708 [166400/227845]
loss: 1007.337769 [172800/227845]
loss: 1422.181152 [179200/2278

loss: 974.345093 [89600/227845]
loss: 2057.885498 [96000/227845]
loss: 408.443695 [102400/227845]
loss: 319.407410 [108800/227845]
loss: 1858.038696 [115200/227845]
loss: 388.467651 [121600/227845]
loss: 1559.246826 [128000/227845]
loss: 399.226227 [134400/227845]
loss: 287.955292 [140800/227845]
loss: 215.487640 [147200/227845]
loss: 922.305237 [153600/227845]
loss: 3363.707764 [160000/227845]
loss: 1021.307007 [166400/227845]
loss: 295.991577 [172800/227845]
loss: 2549.912354 [179200/227845]
loss: 629.899780 [185600/227845]
loss: 231.352646 [192000/227845]
loss: 11781.158203 [198400/227845]
loss: 332.823730 [204800/227845]
loss: 669.564331 [211200/227845]
loss: 7509.319336 [217600/227845]
loss: 2571.292236 [224000/227845]
Test Error: 
 Accuracy: 0.0%, Avg loss: 27.559310 

Epoch 8
-----------------------------------
loss: 315.859985 [    0/227845]
loss: 1507.178467 [ 6400/227845]
loss: 812.703857 [12800/227845]
loss: 330.524048 [19200/227845]
loss: 343.722076 [25600/227845]
loss: 210