**IMPORTS**

In [1]:
!pip install torchinfo
!pip install livelossplot

Collecting torchinfo
  Downloading torchinfo-1.6.5-py3-none-any.whl (21 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.6.5
Collecting livelossplot
  Downloading livelossplot-0.5.5-py3-none-any.whl (22 kB)
Collecting ipython==7.*
  Downloading ipython-7.32.0-py3-none-any.whl (793 kB)
[K     |████████████████████████████████| 793 kB 6.7 MB/s 
Collecting prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0
  Downloading prompt_toolkit-3.0.29-py3-none-any.whl (381 kB)
[K     |████████████████████████████████| 381 kB 41.2 MB/s 
Installing collected packages: prompt-toolkit, ipython, livelossplot
  Attempting uninstall: prompt-toolkit
    Found existing installation: prompt-toolkit 1.0.18
    Uninstalling prompt-toolkit-1.0.18:
      Successfully uninstalled prompt-toolkit-1.0.18
  Attempting uninstall: ipython
    Found existing installation: ipython 5.5.0
    Uninstalling ipython-5.5.0:
      Successfully uninstalled ipython-5.5.0
[31mERROR: pip's dependency res

In [1]:
!pip install livelossplot



In [2]:
import torch
print(torch.__version__)

1.11.0


In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch import Tensor
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.nn import Module
from torch.optim import SGD, Adam
from torch.nn import BCELoss, BCEWithLogitsLoss
from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_
from IPython.display import display
import seaborn as sns
import matplotlib.pyplot as plt
from torchinfo import summary
from livelossplot import PlotLosses

**Preparar os Dados**

In [4]:
PATH = 'Datasets/train.csv'

device = torch.device("cpu")

EPOCHS = 50
BATCH_SIZE = 64
LEARNING_RATE = 0.001

In [5]:
class CSVDataset(Dataset):

  def __init__(self, path):
    df = pd.read_csv(path, header=None)
    self.X = df.values[1:,:]
    self.X = np.delete(self.X,1,1) #remover a idade
    self.y = df.values[1:, 1]
    self.X = self.X.astype('float32')
    self.y = self.y.astype('float32')
    self.y = self.y.reshape((len(self.y), 1)) 

  def __len__(self):
    return len(self.X)

  def __getitem__(self, idx):
    return [self.X[idx], self.y[idx]] 
  
  def get_splits(self, n_test=0.33): 
    test_size = round(n_test * len(self.X)) 
    train_size = len(self.X) - test_size 
    return random_split(self, [train_size, test_size])

In [6]:
def prepare_data(path):
        dataset = CSVDataset(path)
        train, test = dataset.get_splits()
        train_dl = DataLoader(train, batch_size=len(train), shuffle=True)
        test_dl = DataLoader(test, batch_size=1024, shuffle=False)
        train_dl_all = DataLoader(train, batch_size=len(train), shuffle=False)
        test_dl_all = DataLoader(test, batch_size=len(test), shuffle=False)
        return train_dl, test_dl, train_dl_all, test_dl_all
        
train_dl, test_dl,  train_dl_all, test_dl_all = prepare_data(PATH)

x,y = next(iter(train_dl))
print(x.shape, y.shape)
x,y = next(iter(test_dl))
print(x.shape, y.shape)

torch.Size([75, 3]) torch.Size([75, 1])
torch.Size([37, 3]) torch.Size([37, 1])


**Visualizar os Dados**

In [7]:
def visualize_data(path):
  df = pd.read_csv(path, header=None)
  display(df)

def visualize_dataset(train_dl, test_dl):
  print(f"Quantidade de casos de Treino:{len(train_dl.dataset)}")
  print(f"Quantidade de casos de Teste:{len(test_dl.dataset)}")
  x, y = next(iter(train_dl))
  print(f"Shape tensor batch casos treino, input: {x.shape}, output: {y.shape}")
  x, y = next(iter(test_dl))
  print(f"Shape tensor batch casos teste, input: {x.shape}, output: {y.shape}")

visualize_data(PATH)
visualize_dataset(train_dl, test_dl)

Unnamed: 0,0,1,2,3
0,id,age,sex,education
1,1,13,1,7
2,2,14,0,8
3,3,15,1,9
4,4,15,1,9
...,...,...,...,...
108,108,77,1,4
109,109,67,0,4
110,110,55,0,4
111,111,76,1,3


Quantidade de casos de Treino:75
Quantidade de casos de Teste:37
Shape tensor batch casos treino, input: torch.Size([75, 3]), output: torch.Size([75, 1])
Shape tensor batch casos teste, input: torch.Size([37, 3]), output: torch.Size([37, 1])


In [36]:
def visualize_holdout_balance(y_train, y_test):
  _, y_train = next(iter(train_dl_all))
  _, y_test = next(iter(test_dl_all))
  sns.set_style('whitegrid')
  casos_treino=len(y_train)
  casos_test=len(y_test)
  b_Train=np.count_nonzero(y_train == 0)
  g_Train = np.count_nonzero(y_train == 1)
  b_Test=np.count_nonzero(y_test == 0)
  g_Test = np.count_nonzero(y_test == 1)
  print("casos_treino:",casos_treino)
  print("g_Train: ", g_Train)
  print("b_Train: ", b_Train)
  print("g_Train/b_Train: ", g_Train/b_Train) 
  print("casos_test:",casos_test)
  print("g_Test: ", g_Test)
  print("b_Test: ", b_Test)
  print("g_Test/b_Test: ", g_Test/b_Test) 
  grafico=sns.barplot(
      x=['g_Train','b_Train', 'g_Test', 'b_Test'], 
      y=[g_Train,b_Train, g_Test, b_Test])
  grafico.set_title('Data balance ')
  plt.xticks(rotation=70)
  plt.tight_layout()
  plt.show() 

visualize_holdout_balance(train_dl_all, test_dl_all)

ValueError: ignored

**Preparar o Modelo**

In [None]:
class MLP(Module):
  
  def __init__(self, n_inputs):
    super(MLP, self).__init__()
    self.hidden1 = Linear(n_inputs, 10)
    kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
    self.act1 = ReLU()
    self.hidden2 = Linear(10, 8)
    kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
    self.act2 = ReLU()
    self.hidden3 = Linear(8, 1)
    xavier_uniform_(self.hidden3.weight)
    self.act3 = Sigmoid()

  def forward(self, X): 
    X = self.hidden1(X) 
    X = self.act1(X) 
    X = self.hidden2(X) 
    X = self.act2(X) 
    X = self.hidden3(X) 
    X = self.act3(X) 
    return X

model = MLP(34)
print(summary(model, input_size=(BATCH_SIZE, 34), verbose=0))
model.to(device)

Layer (type:depth-idx)                   Output Shape              Param #
MLP                                      --                        --
├─Linear: 1-1                            [64, 10]                  350
├─ReLU: 1-2                              [64, 10]                  --
├─Linear: 1-3                            [64, 8]                   88
├─ReLU: 1-4                              [64, 8]                   --
├─Linear: 1-5                            [64, 1]                   9
├─Sigmoid: 1-6                           [64, 1]                   --
Total params: 447
Trainable params: 447
Non-trainable params: 0
Total mult-adds (M): 0.03
Input size (MB): 0.01
Forward/backward pass size (MB): 0.01
Params size (MB): 0.00
Estimated Total Size (MB): 0.02


MLP(
  (hidden1): Linear(in_features=34, out_features=10, bias=True)
  (act1): ReLU()
  (hidden2): Linear(in_features=10, out_features=8, bias=True)
  (act2): ReLU()
  (hidden3): Linear(in_features=8, out_features=1, bias=True)
  (act3): Sigmoid()
)

In [None]:
def train_model(train_dl, model):
  liveloss = PlotLosses()
  criterion = BCEWithLogitsLoss()
  optimizer = Adam(model.parameters(), lr=LEARNING_RATE)
  for epoch in range(EPOCHS):
        logs = {}
        epoch_loss  = 0 
        epoch_acc  = 0 
        for i, (inputs, labels) in enumerate(train_dl):
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            acc = accuracy_score(labels.numpy(), np.argmax(outputs.detach().numpy(), axis=1))
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        print(f'Epoch {epoch:03}: | Loss: {epoch_loss/len(train_dl):.5f} | Acc: {epoch_acc/len(train_dl):.3f}')      
        logs['loss'] = epoch_loss
        logs['accuracy'] = epoch_acc/len(train_dl)
        liveloss.update(logs)
        liveloss.send()

train_model(train_dl, model)

NameError: ignored