In [1]:
from torch import nn
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
from typing import Tuple
from datetime import datetime as dt

import random 
import torch.optim as optim
import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
SEED = 1701
EPOCHS = 20
MODEL_REPO = "/home/luiz/pibit/src/cleaner/botClean.csv"
BATCH_SIZE = 128
LEARNING_RATE = 1e-3

In [3]:
class ToImage:
    def __call__(self, array: torch.Tensor, keep_normalization=True):
        """ The idea is to convert a 1d array to a 2d array by resizing (with padding) to the square root of the 1d shape

        Ex: 
            - shape: 2048  
            - sqrt(shape) = 45.25 -> round to ceil (46)
            - resize the feature vector to 46x46 
            - return the new feature vector as a RGB PIL Image for torchvision transforms
         """
        #feat = array.shape[0]
        feat = array.shape[0]
        n = int(np.ceil(feat ** 0.5))

        array = array.cpu().numpy().copy()
        
        # Squared size with padding
        array.resize((n, n))
        if not keep_normalization:
            return (array * 255).astype(np.uint8)

        return torch.Tensor(array.astype(np.float32)).unsqueeze(0)

In [4]:
class CustomDataset(Dataset):
    """ Custom dataset class used for applying transforms to the features. """
    def __init__(self, subset: Tuple[torch.Tensor, torch.Tensor], transform=None):
        self.subset = subset
        self.transform = transform
        
    def __getitem__(self, index):
        x, y = self.subset[0][index], self.subset[1][index]
    
        if self.transform:
            x = self.transform(x)

        return x, y
        
    def __len__(self):
        return self.subset[0].size(0)

In [5]:
def validate(device: str, epoch: int, loss_fn, MODEL, dataset: DataLoader):
    # Validation
    MODEL.eval()
    it_eval = tqdm(enumerate(dataset), total=len(dataset))
    running_loss = 0.
    correct = 0
    qt = 1
    metrics = dict(tp=0, tn=0, fp=0, fn=0)
    y_pred = list()
    y_true = list()
    with torch.no_grad():
        for _, (x, y) in it_eval:
            x = x.to(device)
            y = y.to(device)

            output = MODEL(x)
            running_loss += loss_fn(output, y).item()
            y_pred.extend(torch.argmax(output, 1).cpu().numpy())
            y_true.extend(y.data.cpu().numpy())
            correct += torch.sum(torch.argmax(output, 1).eq(y)).item()
            qt += len(x)
            desc = f"[{now()}] Epoch {str(epoch).zfill(3)} Val. Acc: {correct/qt:.4f} Val. Loss: {running_loss / len(dataset):.8f}"
            it_eval.set_description(desc)
    
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    metrics["tp"] = tp
    metrics["fp"] = fp
    metrics["tn"] = tn
    metrics["fn"] = fn
    return running_loss / len(dataset), correct/qt, metrics

In [6]:
def now():
    return dt.now().strftime("%d-%m-%Y %H-%M-%S")

In [7]:
def train(device: str, epoch: int, optimizer, loss_fn, MODEL, dataset: DataLoader):
    # Put the MODEL in the training mode
    MODEL.train()
    running_loss = 0.
    qt = 1
    correct = 0

    # Just add a fancy progress bar to the terminal...
    it = tqdm(enumerate(dataset), total=len(dataset))

    for _, (x, y) in it:
        x = x.to(device)
        y = y.to(device)
        
        # Make predictions for this batch
        outputs = MODEL(x)

        # Zero your gradients for every batch!
        optimizer.zero_grad()
        loss = loss_fn(outputs, y)
        loss.backward()

        # Adjust learning weights
        optimizer.step()
        correct += torch.sum(torch.argmax(outputs, 1).eq(y)).item()
        qt += len(x)
    
        # Gather data and report
        running_loss += loss.item()

        desc = f"[{now()}] Epoch {str(epoch).zfill(3)} Acc: {correct/qt:.4f} Loss: {running_loss / len(dataset):.8f}"
        it.set_description(desc)
    
    # Loss / Accuracy
    return running_loss / len(dataset), correct/qt

In [8]:
class AlexNet(nn.Module):
  def __init__(self, num_classes=2):
    super(AlexNet, self).__init__()
    self.features = nn.Sequential(
      nn.Conv2d(1, 128, kernel_size=7, stride=4, padding=2),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=1, stride=2),
        nn.Conv2d(128, 128, kernel_size=1, padding=2),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2),
        nn.Conv2d(128, 128, kernel_size=2, padding=1),
        nn.ReLU(inplace=True),            
        nn.Conv2d(128, 128, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),            
        nn.Conv2d(128, 128, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2),
    )
        
    self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
    self.classifier = nn.Sequential(
      nn.Dropout(),
      nn.Linear(4608, 128),
      nn.ReLU(inplace=True),            
      nn.Dropout(),
      nn.Linear(128, 128),
      nn.ReLU(inplace=True),            
      nn.Linear(128, num_classes)
    )

  def forward(self, x):
    x = self.features(x)
    x = self.avgpool(x)
    x = torch.flatten(x, 1)
    x = self.classifier(x)
    return x

In [9]:
torch.cuda.is_available()

True

In [10]:
def no_update():
    # Reproducibility
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(device)

    MODEL = AlexNet().to('cuda')

    #if MODEL is None or not isinstance(nn.Module):
    #    raise TypeError("The model does not exist or isn't an instance of nn.Module (PyTorch)")
    
    transform = transforms.Compose([
        ToImage(),
    ])
    csv_path = "../../pibit/src/cleaner/botClean.csv"
    df = pd.read_csv(csv_path)
    y = df.iloc[:, -1].values
    x = df.iloc[:, :-1].values
    y = y[::-1]
    x = np.squeeze(x)
    #x = x.reshape((1, -1))
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.1, random_state=42
    )

    #raise Exception("Precisa carregar e separar (treino, validação e teste) um dataset qualquer para executar")
    x_train = torch.Tensor(x_train)
    y_train = torch.LongTensor(y_train)
    x_test  = torch.Tensor(x_test)
    y_test  = torch.LongTensor(y_test)
    #x_val   = torch.Tensor(x_val)
    #y_val   = torch.LongTensor(y_val)

    # A parte de fazer o reshape está quando passo uma transformada (ToImage()) como parâmetro
    cd_train = CustomDataset(subset=(x_train, y_train), transform=transform)
    cd_test  = CustomDataset(subset=(x_test, y_test), transform=transform)
    #cd_val   = CustomDataset(subset=(X_val, y_val), transform=transform)

    data_train = DataLoader(cd_train, shuffle=True, batch_size=BATCH_SIZE, num_workers=8)
    data_test  = DataLoader(cd_test, shuffle=True, batch_size=BATCH_SIZE, num_workers=8)
    #data_val   = DataLoader(cd_val, shuffle=True, batch_size=BATCH_SIZE, num_workers=8)

    loss_fn   = nn.CrossEntropyLoss()
    optimizer = optim.Adam(MODEL.parameters(), lr=LEARNING_RATE)

    for epoch in range(1, EPOCHS + 1):
        train_loss, train_acc = train(device, epoch, optimizer, loss_fn, MODEL, data_train)
        #val_loss, val_acc, _ = validate(device, epoch, optimizer, loss_fn, MODEL, data_val)

        _, _, metrics = validate(device, epoch, loss_fn, MODEL, data_test)
        print(metrics)


In [11]:
if __name__ == "__main__":
    no_update()
    print("Finished experiment!")

cuda


[17-01-2024 13-05-33] Epoch 001 Acc: 0.9991 Loss: 0.01371267: 100%|██████████| 3516/3516 [01:11<00:00, 49.17it/s]
[17-01-2024 13-05-37] Epoch 001 Val. Acc: 0.9990 Val. Loss: 0.00833749: 100%|██████████| 391/391 [00:04<00:00, 96.77it/s] 


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-06-54] Epoch 002 Acc: 0.9991 Loss: 0.00933641: 100%|██████████| 3516/3516 [01:16<00:00, 45.92it/s]
[17-01-2024 13-06-58] Epoch 002 Val. Acc: 0.9990 Val. Loss: 0.00804073: 100%|██████████| 391/391 [00:04<00:00, 92.32it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-08-17] Epoch 003 Acc: 0.9991 Loss: 0.00835600: 100%|██████████| 3516/3516 [01:18<00:00, 44.99it/s]
[17-01-2024 13-08-21] Epoch 003 Val. Acc: 0.9990 Val. Loss: 0.00991317: 100%|██████████| 391/391 [00:04<00:00, 94.94it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-09-41] Epoch 004 Acc: 0.9991 Loss: 0.00827173: 100%|██████████| 3516/3516 [01:19<00:00, 44.15it/s]
[17-01-2024 13-09-46] Epoch 004 Val. Acc: 0.9990 Val. Loss: 0.00827812: 100%|██████████| 391/391 [00:04<00:00, 89.58it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-11-07] Epoch 005 Acc: 0.9991 Loss: 0.00790517: 100%|██████████| 3516/3516 [01:20<00:00, 43.70it/s]
[17-01-2024 13-11-11] Epoch 005 Val. Acc: 0.9990 Val. Loss: 0.00812390: 100%|██████████| 391/391 [00:04<00:00, 89.75it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-12-33] Epoch 006 Acc: 0.9991 Loss: 0.00766364: 100%|██████████| 3516/3516 [01:21<00:00, 43.21it/s]
[17-01-2024 13-12-38] Epoch 006 Val. Acc: 0.9990 Val. Loss: 0.00843223: 100%|██████████| 391/391 [00:04<00:00, 87.75it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-14-00] Epoch 007 Acc: 0.9991 Loss: 0.00766413: 100%|██████████| 3516/3516 [01:21<00:00, 43.00it/s]
[17-01-2024 13-14-04] Epoch 007 Val. Acc: 0.9990 Val. Loss: 0.00804377: 100%|██████████| 391/391 [00:04<00:00, 88.55it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-15-29] Epoch 008 Acc: 0.9991 Loss: 0.00763732: 100%|██████████| 3516/3516 [01:24<00:00, 41.76it/s]
[17-01-2024 13-15-33] Epoch 008 Val. Acc: 0.9990 Val. Loss: 0.00808509: 100%|██████████| 391/391 [00:04<00:00, 92.39it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-17-02] Epoch 009 Acc: 0.9991 Loss: 0.00762502: 100%|██████████| 3516/3516 [01:28<00:00, 39.61it/s]
[17-01-2024 13-17-07] Epoch 009 Val. Acc: 0.9990 Val. Loss: 0.00804565: 100%|██████████| 391/391 [00:04<00:00, 92.09it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-18-45] Epoch 010 Acc: 0.9991 Loss: 0.00763468: 100%|██████████| 3516/3516 [01:37<00:00, 36.15it/s]
[17-01-2024 13-18-49] Epoch 010 Val. Acc: 0.9990 Val. Loss: 0.00804406: 100%|██████████| 391/391 [00:04<00:00, 92.57it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-20-18] Epoch 011 Acc: 0.9991 Loss: 0.00954055: 100%|██████████| 3516/3516 [01:28<00:00, 39.57it/s]
[17-01-2024 13-20-23] Epoch 011 Val. Acc: 0.9990 Val. Loss: 0.00915355: 100%|██████████| 391/391 [00:04<00:00, 96.09it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-21-42] Epoch 012 Acc: 0.9991 Loss: 0.00784138: 100%|██████████| 3516/3516 [01:18<00:00, 44.76it/s]
[17-01-2024 13-21-46] Epoch 012 Val. Acc: 0.9990 Val. Loss: 0.00806245: 100%|██████████| 391/391 [00:04<00:00, 95.98it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-23-04] Epoch 013 Acc: 0.9991 Loss: 0.00762851: 100%|██████████| 3516/3516 [01:18<00:00, 44.95it/s]
[17-01-2024 13-23-09] Epoch 013 Val. Acc: 0.9990 Val. Loss: 0.00817182: 100%|██████████| 391/391 [00:04<00:00, 92.17it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-24-28] Epoch 014 Acc: 0.9991 Loss: 0.00758672: 100%|██████████| 3516/3516 [01:18<00:00, 44.87it/s]
[17-01-2024 13-24-32] Epoch 014 Val. Acc: 0.9990 Val. Loss: 0.00811244: 100%|██████████| 391/391 [00:04<00:00, 91.77it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-25-51] Epoch 015 Acc: 0.9991 Loss: 0.01158754: 100%|██████████| 3516/3516 [01:18<00:00, 44.86it/s]
[17-01-2024 13-25-55] Epoch 015 Val. Acc: 0.9990 Val. Loss: 0.00893141: 100%|██████████| 391/391 [00:04<00:00, 95.55it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-27-14] Epoch 016 Acc: 0.9991 Loss: 0.00793733: 100%|██████████| 3516/3516 [01:18<00:00, 44.90it/s]
[17-01-2024 13-27-18] Epoch 016 Val. Acc: 0.9990 Val. Loss: 0.00807012: 100%|██████████| 391/391 [00:04<00:00, 96.35it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-28-37] Epoch 017 Acc: 0.9991 Loss: 0.00784509: 100%|██████████| 3516/3516 [01:18<00:00, 44.84it/s]
[17-01-2024 13-28-42] Epoch 017 Val. Acc: 0.9990 Val. Loss: 0.00806483: 100%|██████████| 391/391 [00:04<00:00, 90.48it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-30-00] Epoch 018 Acc: 0.9991 Loss: 0.00758805: 100%|██████████| 3516/3516 [01:18<00:00, 44.90it/s]
[17-01-2024 13-30-05] Epoch 018 Val. Acc: 0.9990 Val. Loss: 0.00822665: 100%|██████████| 391/391 [00:04<00:00, 90.98it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-31-23] Epoch 019 Acc: 0.9991 Loss: 0.00758863: 100%|██████████| 3516/3516 [01:18<00:00, 44.90it/s]
[17-01-2024 13-31-28] Epoch 019 Val. Acc: 0.9990 Val. Loss: 0.00807164: 100%|██████████| 391/391 [00:04<00:00, 90.80it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}


[17-01-2024 13-32-47] Epoch 020 Acc: 0.9991 Loss: 0.00758739: 100%|██████████| 3516/3516 [01:18<00:00, 44.83it/s]
[17-01-2024 13-32-51] Epoch 020 Val. Acc: 0.9990 Val. Loss: 0.00803751: 100%|██████████| 391/391 [00:04<00:00, 90.92it/s]


{'tp': 0, 'tn': 49949, 'fp': 0, 'fn': 51}
Finished experiment!
