In [None]:
import torch
import pandas as pd
import numpy as np

# Import dataset utils
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import importlib
if importlib.util.find_spec('ipywidgets') is not None:
    from tqdm.auto import tqdm
else:
    from tqdm import tqdm

In [None]:
dataframe = pd.read_csv('../data/final.csv', sep=';')
dataframe.head()

In [None]:
df_suic = dataframe[['Suicidio','sexo', 'Estado_civil', 'Tipo_Resid','idade',
                   'Alcoolatra', 'Droga', 'Suic_familia', 'Dep_familia',
                   'Alc_familia', 'Drog_familia',
                   'Neuro',
                   'psiquiatrica', 'Anos educacao formal', 'Capaz de desfrutar das coisas',
                   'Impacto de sua familia e amigos',
                   'Capaz de tomar decisões importantes', 'Estudante',
                   'Insonia',
                   'Deprimido', 'Ansiedade',
                   'Perda de insights', 'Apetite', 'Perda de peso', 'Ansiedade somática',
                   'Hipocondriase', 'Sentimentos_culpa', 
                   'Trabalho e interesses', 'Energia', 'Lentidao pensamento e fala',
                   'Agitação', 'Libido', 'Pontuação total', 'TOC']]

df_suic['sexo'].replace({'M': 0, 'F': 1}, inplace=True)
df_suic['sexo'].fillna(0, inplace=True)

df_suic.dropna(inplace=True)
df_suic = df_suic.astype(int)

In [None]:
class MyDataset(Dataset):
 
  def __init__(self, input_dataframe, split="train", target="Suicidio", ignore_columns=[], train_ratio=0.8):
    
    self.split = split
    self.target = target
    self.ignore_columns = ignore_columns

    for coll in self.ignore_columns:
      if coll in input_dataframe.columns:
        input_dataframe = input_dataframe.drop(coll, axis=1)

    self.classification_dim = len(input_dataframe[self.target].unique())
    self.data_dim = len(input_dataframe.columns) - 1
    self.embbeding_dim = input_dataframe.max().max() + 1

    y = input_dataframe[target].values
    x = input_dataframe.drop(target, axis = 1).values

    self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(x, y, test_size=1-train_ratio, random_state=42)

  def __len__(self):
    if self.split == "train":
      return len(self.x_train)
    elif self.split == "test":
      return len(self.x_test)
    else:
      raise ValueError("Split must be train or test")

  def __getitem__(self,idx):
    if self.split == "train":
      return torch.tensor(self.x_train[idx], dtype=torch.long), torch.tensor(self.y_train[idx], dtype=torch.long)
    elif self.split == "test":
      return torch.tensor(self.x_test[idx], dtype=torch.long), torch.tensor(self.y_test[idx], dtype=torch.long)
    else:
      raise ValueError("Split must be train or test")


In [None]:
train_dataset = MyDataset(df_suic, split="train", target="Suicidio", ignore_columns=[], train_ratio=0.8)
test_dataset = MyDataset(df_suic, split="test", target="Suicidio", ignore_columns=[], train_ratio=0.8)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

In [None]:
## Define a MLP model with an embedding layer

import torch.nn as nn
import torch.nn.functional as F

class MLP(nn.Module):
    def __init__(self, input_dim, embedding_dim, embedding_out, hidden_dim, output_dim):
        super(MLP, self).__init__()

        self.embedding = nn.Embedding(embedding_dim, embedding_out)
        self.fc1 = nn.Linear(embedding_out*input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        x = self.embedding(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# test the model
example_batch = next(iter(train_loader))
example_data, example_targets = example_batch
model = MLP(input_dim=train_dataset.data_dim, embedding_dim=110, embedding_out=64, hidden_dim=128, output_dim=train_dataset.classification_dim)
print(model)
print("Batch shape:", example_data.shape)
res = model(example_data)
print("Output shape:", res.shape)
print("Output:", res[0])




In [None]:
## Make Lightning Module
from pytorch_lightning import LightningModule

class BaseModel(LightningModule):
    """A LightningModule organizes your PyTorch code into 6 sections:
        - Computations (init)
        - Validation loop (validation_step)
        - Train loop (training_step)
        - Test loop (test_step)
        - Prediction Loop (predict_step)
        - Optimizers and LR Schedulers (configure_optimizers)
    """

    def __init__(self, input_dim, embedding_dim, embedding_out, hidden_dim, output_dim):
        super().__init__()
        self.model = MLP(input_dim, embedding_dim, embedding_out, hidden_dim, output_dim)

    def step(self, batch):
        x, y = batch
        y_hat = self.model(x).squeeze().float()
        # loss = F.binary_cross_entropy_with_logits(y_hat, y)
        # L1 Loss
        loss = F.l1_loss(y_hat, y)
        return loss

    def training_step(self, batch, batch_idx):
        loss = self.step(batch)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        loss = self.step(batch)
        self.log('val_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer


In [None]:
# Import trainer
from pytorch_lightning.trainer import Trainer


# Initialize model
model = BaseModel(input_dim=train_dataset.data_dim, embedding_dim=train_dataset.embbeding_dim, embedding_out=64, hidden_dim=128, output_dim=1)

In [None]:
# Import callbacks
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

# Initialize callbacks
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    dirpath='checkpoints/',
    filename='best-checkpoint',
    save_top_k=1,
    mode='min',
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    min_delta=0.00,
    patience=10,
    verbose=False,
    mode='min'
)

callbacks = [checkpoint_callback, early_stopping]


# Initialize a trainer
trainer = Trainer(accelerator='gpu', devices=1, check_val_every_n_epoch=10, log_every_n_steps=10, callbacks=callbacks)

In [None]:
# Train the model ⚡
trainer.fit(model, train_loader, test_loader)