In [1]:
from torch import nn
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
from typing import Tuple
from datetime import datetime as dt

import random 
import torch.optim as optim
import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
SEED = 1701
EPOCHS = 20
MODEL_REPO = "/home/luizp/projects/pibit/src/cleaner/cicClean.csv"
BATCH_SIZE = 512
LEARNING_RATE = 1e-3

In [3]:
class ToImage:
    def __call__(self, array: torch.Tensor, keep_normalization=True):
        """ The idea is to convert a 1d array to a 2d array by resizing (with padding) to the square root of the 1d shape

        Ex: 
            - shape: 2048  
            - sqrt(shape) = 45.25 -> round to ceil (46)
            - resize the feature vector to 46x46 
            - return the new feature vector as a RGB PIL Image for torchvision transforms
         """
        #feat = array.shape[0]
        feat = array.shape[0]
        n = int(np.ceil(feat ** 0.5))

        array = array.cpu().numpy().copy()
        
        # Squared size with padding
        array.resize((n, n))
        if not keep_normalization:
            return (array * 255).astype(np.uint8)

        return torch.Tensor(array.astype(np.float32)).unsqueeze(0)

In [4]:
class CustomDataset(Dataset):
    """ Custom dataset class used for applying transforms to the features. """
    def __init__(self, subset: Tuple[torch.Tensor, torch.Tensor], transform=None):
        self.subset = subset
        self.transform = transform
        
    def __getitem__(self, index):
        x, y = self.subset[0][index], self.subset[1][index]
    
        if self.transform:
            x = self.transform(x)

        return x, y
        
    def __len__(self):
        return self.subset[0].size(0)

In [5]:
def validate(device: str, epoch: int, loss_fn, MODEL, dataset: DataLoader):
    # Validation
    MODEL.eval()
    it_eval = tqdm(enumerate(dataset), total=len(dataset))
    running_loss = 0.
    correct = 0
    qt = 1
    metrics = dict(tp=0, tn=0, fp=0, fn=0)
    y_pred = list()
    y_true = list()
    with torch.no_grad():
        for _, (x, y) in it_eval:
            x = x.to(device)
            y = y.to(device)

            output = MODEL(x)
            running_loss += loss_fn(output, y).item()
            y_pred.extend(torch.argmax(output, 1).cpu().numpy())
            y_true.extend(y.data.cpu().numpy())
            correct += torch.sum(torch.argmax(output, 1).eq(y)).item()
            qt += len(x)
            desc = f"[{now()}] Epoch {str(epoch).zfill(3)} Val. Acc: {correct/qt:.4f} Val. Loss: {running_loss / len(dataset):.8f}"
            it_eval.set_description(desc)
    
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    metrics["tp"] = tp
    metrics["fp"] = fp
    metrics["tn"] = tn
    metrics["fn"] = fn
    return running_loss / len(dataset), correct/qt, metrics

In [6]:
def now():
    return dt.now().strftime("%d-%m-%Y %H-%M-%S")

In [7]:
def train(device: str, epoch: int, optimizer, loss_fn, MODEL, dataset: DataLoader):
    # Put the MODEL in the training mode
    MODEL.train()
    running_loss = 0.
    qt = 1
    correct = 0

    # Just add a fancy progress bar to the terminal...
    it = tqdm(enumerate(dataset), total=len(dataset))

    for _, (x, y) in it:
        x = x.to(device)
        y = y.to(device)
        
        # Make predictions for this batch
        outputs = MODEL(x)

        # Zero your gradients for every batch!
        optimizer.zero_grad()
        loss = loss_fn(outputs, y)
        loss.backward()

        # Adjust learning weights
        optimizer.step()
        correct += torch.sum(torch.argmax(outputs, 1).eq(y)).item()
        qt += len(x)
    
        # Gather data and report
        running_loss += loss.item()

        desc = f"[{now()}] Epoch {str(epoch).zfill(3)} Acc: {correct/qt:.4f} Loss: {running_loss / len(dataset):.8f}"
        it.set_description(desc)
    
    # Loss / Accuracy
    return running_loss / len(dataset), correct/qt

In [8]:
class AlexNet(nn.Module):
  def __init__(self, num_classes=2):
    super(AlexNet, self).__init__()
    self.features = nn.Sequential(
      nn.Conv2d(1, 128, kernel_size=7, stride=4, padding=2),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=1, stride=2),
        nn.Conv2d(128, 128, kernel_size=1, padding=2),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2),
        nn.Conv2d(128, 128, kernel_size=2, padding=1),
        nn.ReLU(inplace=True),            
        nn.Conv2d(128, 128, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),            
        nn.Conv2d(128, 128, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2),
    )
        
    self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
    self.classifier = nn.Sequential(
      nn.Dropout(),
      nn.Linear(4608, 128),
      nn.ReLU(inplace=True),            
      nn.Dropout(),
      nn.Linear(128, 128),
      nn.ReLU(inplace=True),            
      nn.Linear(128, num_classes)
    )

  def forward(self, x):
    x = self.features(x)
    x = self.avgpool(x)
    x = torch.flatten(x, 1)
    x = self.classifier(x)
    return x

In [9]:
torch.cuda.is_available()

True

In [11]:
def no_update():
    # Reproducibility
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(device)

    MODEL = AlexNet().to('cuda')

    #if MODEL is None or not isinstance(nn.Module):
    #    raise TypeError("The model does not exist or isn't an instance of nn.Module (PyTorch)")
    
    transform = transforms.Compose([
        ToImage(),
    ])
    csv_path = "/home/luizp/projects/pibit/src/cleaner/cicClean.csv"
    df = pd.read_csv(csv_path)
    y = df.iloc[:, -1].values
    x = df.iloc[:, :-1].values
    y = y[::-1]
    x = np.squeeze(x)
    #x = x.reshape((1, -1))
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.1, random_state=42
    )

    #raise Exception("Precisa carregar e separar (treino, validação e teste) um dataset qualquer para executar")
    x_train = torch.Tensor(x_train)
    y_train = torch.LongTensor(y_train)
    x_test  = torch.Tensor(x_test)
    y_test  = torch.LongTensor(y_test)
    #x_val   = torch.Tensor(x_val)
    #y_val   = torch.LongTensor(y_val)

    # A parte de fazer o reshape está quando passo uma transformada (ToImage()) como parâmetro
    cd_train = CustomDataset(subset=(x_train, y_train), transform=transform)
    cd_test  = CustomDataset(subset=(x_test, y_test), transform=transform)
    #cd_val   = CustomDataset(subset=(X_val, y_val), transform=transform)

    data_train = DataLoader(cd_train, shuffle=True, batch_size=BATCH_SIZE, num_workers=8)
    data_test  = DataLoader(cd_test, shuffle=True, batch_size=BATCH_SIZE, num_workers=8)
    #data_val   = DataLoader(cd_val, shuffle=True, batch_size=BATCH_SIZE, num_workers=8)

    loss_fn   = nn.CrossEntropyLoss()
    optimizer = optim.Adam(MODEL.parameters(), lr=LEARNING_RATE)

    for epoch in range(1, EPOCHS + 1):
        train_loss, train_acc = train(device, epoch, optimizer, loss_fn, MODEL, data_train)
        #val_loss, val_acc, _ = validate(device, epoch, optimizer, loss_fn, MODEL, data_val)

        _, _, metrics = validate(device, epoch, loss_fn, MODEL, data_test)
        print(metrics)


In [12]:
if __name__ == "__main__":
    no_update()
    print("Finished experiment!")

cuda


[31-01-2024 09-54-08] Epoch 001 Acc: 0.6555 Loss: 0.63297328: 100%|██████████| 1844/1844 [02:00<00:00, 15.31it/s]
[31-01-2024 09-54-14] Epoch 001 Val. Acc: 0.6561 Val. Loss: 0.63252811: 100%|██████████| 205/205 [00:05<00:00, 34.25it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 09-56-19] Epoch 002 Acc: 0.6557 Loss: 0.63211529: 100%|██████████| 1844/1844 [02:04<00:00, 14.87it/s]
[31-01-2024 09-56-25] Epoch 002 Val. Acc: 0.6561 Val. Loss: 0.63212484: 100%|██████████| 205/205 [00:05<00:00, 37.29it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 09-58-27] Epoch 003 Acc: 0.6558 Loss: 0.63194792: 100%|██████████| 1844/1844 [02:01<00:00, 15.24it/s]
[31-01-2024 09-58-32] Epoch 003 Val. Acc: 0.6561 Val. Loss: 0.63158665: 100%|██████████| 205/205 [00:05<00:00, 37.22it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 10-00-35] Epoch 004 Acc: 0.6559 Loss: 0.63203318: 100%|██████████| 1844/1844 [02:01<00:00, 15.14it/s]
[31-01-2024 10-00-41] Epoch 004 Val. Acc: 0.6561 Val. Loss: 0.63192009: 100%|██████████| 205/205 [00:05<00:00, 35.37it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 10-02-44] Epoch 005 Acc: 0.6556 Loss: 0.63201356: 100%|██████████| 1844/1844 [02:02<00:00, 15.08it/s]
[31-01-2024 10-02-50] Epoch 005 Val. Acc: 0.6561 Val. Loss: 0.63205908: 100%|██████████| 205/205 [00:05<00:00, 34.96it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 10-04-54] Epoch 006 Acc: 0.6556 Loss: 0.63197632: 100%|██████████| 1844/1844 [02:02<00:00, 15.02it/s]
[31-01-2024 10-05-00] Epoch 006 Val. Acc: 0.6561 Val. Loss: 0.63196921: 100%|██████████| 205/205 [00:05<00:00, 37.07it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 10-07-03] Epoch 007 Acc: 0.6555 Loss: 0.63191644: 100%|██████████| 1844/1844 [02:02<00:00, 15.00it/s]
[31-01-2024 10-07-09] Epoch 007 Val. Acc: 0.6561 Val. Loss: 0.63201082: 100%|██████████| 205/205 [00:05<00:00, 35.23it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 10-09-13] Epoch 008 Acc: 0.6555 Loss: 0.63189860: 100%|██████████| 1844/1844 [02:03<00:00, 14.98it/s]
[31-01-2024 10-09-19] Epoch 008 Val. Acc: 0.6561 Val. Loss: 0.63189306: 100%|██████████| 205/205 [00:05<00:00, 35.30it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 10-11-23] Epoch 009 Acc: 0.6556 Loss: 0.63186521: 100%|██████████| 1844/1844 [02:03<00:00, 14.98it/s]
[31-01-2024 10-11-29] Epoch 009 Val. Acc: 0.6545 Val. Loss: 0.63203266: 100%|██████████| 205/205 [00:05<00:00, 35.55it/s]


{'tp': 60255, 'tn': 8377, 'fp': 27686, 'fn': 8540}


[31-01-2024 10-13-33] Epoch 010 Acc: 0.6552 Loss: 0.63188502: 100%|██████████| 1844/1844 [02:03<00:00, 14.97it/s]
[31-01-2024 10-13-39] Epoch 010 Val. Acc: 0.6561 Val. Loss: 0.63190730: 100%|██████████| 205/205 [00:05<00:00, 34.98it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 10-15-43] Epoch 011 Acc: 0.6557 Loss: 0.63182784: 100%|██████████| 1844/1844 [02:03<00:00, 14.97it/s]
[31-01-2024 10-15-49] Epoch 011 Val. Acc: 0.6545 Val. Loss: 0.63202446: 100%|██████████| 205/205 [00:05<00:00, 35.12it/s]


{'tp': 60259, 'tn': 8375, 'fp': 27688, 'fn': 8536}


[31-01-2024 10-17-53] Epoch 012 Acc: 0.6556 Loss: 0.63246456: 100%|██████████| 1844/1844 [02:03<00:00, 14.95it/s]
[31-01-2024 10-18-00] Epoch 012 Val. Acc: 0.6561 Val. Loss: 0.63431229: 100%|██████████| 205/205 [00:05<00:00, 34.78it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 10-20-04] Epoch 013 Acc: 0.6560 Loss: 0.63400803: 100%|██████████| 1844/1844 [02:03<00:00, 14.95it/s]
[31-01-2024 10-20-10] Epoch 013 Val. Acc: 0.6561 Val. Loss: 0.63240052: 100%|██████████| 205/205 [00:05<00:00, 34.37it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 10-22-14] Epoch 014 Acc: 0.6558 Loss: 0.63208248: 100%|██████████| 1844/1844 [02:03<00:00, 14.95it/s]
[31-01-2024 10-22-20] Epoch 014 Val. Acc: 0.6561 Val. Loss: 0.63288143: 100%|██████████| 205/205 [00:05<00:00, 34.84it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 10-24-24] Epoch 015 Acc: 0.6558 Loss: 0.63201031: 100%|██████████| 1844/1844 [02:03<00:00, 14.96it/s]
[31-01-2024 10-24-31] Epoch 015 Val. Acc: 0.6561 Val. Loss: 0.63244671: 100%|██████████| 205/205 [00:05<00:00, 34.91it/s]


{'tp': 68795, 'tn': 1, 'fp': 36062, 'fn': 0}


[31-01-2024 10-26-35] Epoch 016 Acc: 0.6556 Loss: 0.63198838: 100%|██████████| 1844/1844 [02:03<00:00, 14.88it/s]
[31-01-2024 10-26-42] Epoch 016 Val. Acc: 0.6561 Val. Loss: 0.63224859: 100%|██████████| 205/205 [00:05<00:00, 34.76it/s]


{'tp': 68795, 'tn': 1, 'fp': 36062, 'fn': 0}


[31-01-2024 10-28-46] Epoch 017 Acc: 0.6558 Loss: 0.63194420: 100%|██████████| 1844/1844 [02:03<00:00, 14.87it/s]
[31-01-2024 10-28-52] Epoch 017 Val. Acc: 0.6561 Val. Loss: 0.63225664: 100%|██████████| 205/205 [00:05<00:00, 34.74it/s]


{'tp': 68795, 'tn': 0, 'fp': 36063, 'fn': 0}


[31-01-2024 10-30-59] Epoch 018 Acc: 0.6554 Loss: 0.63199108: 100%|██████████| 1844/1844 [02:03<00:00, 14.91it/s]
[31-01-2024 10-31-06] Epoch 018 Val. Acc: 0.6561 Val. Loss: 0.63197990: 100%|██████████| 205/205 [00:05<00:00, 34.73it/s]


{'tp': 68789, 'tn': 4, 'fp': 36059, 'fn': 6}


[31-01-2024 10-33-11] Epoch 019 Acc: 0.6555 Loss: 0.63192780: 100%|██████████| 1844/1844 [02:04<00:00, 14.78it/s]
[31-01-2024 10-33-17] Epoch 019 Val. Acc: 0.6561 Val. Loss: 0.63205421: 100%|██████████| 205/205 [00:05<00:00, 34.26it/s]


{'tp': 68795, 'tn': 1, 'fp': 36062, 'fn': 0}


[31-01-2024 10-35-23] Epoch 020 Acc: 0.6557 Loss: 0.63192527: 100%|██████████| 1844/1844 [02:05<00:00, 14.74it/s]
[31-01-2024 10-35-30] Epoch 020 Val. Acc: 0.6560 Val. Loss: 0.63198259: 100%|██████████| 205/205 [00:05<00:00, 34.77it/s]


{'tp': 68724, 'tn': 61, 'fp': 36002, 'fn': 71}
Finished experiment!


: 