#### Metadata

1. Updated implementation of the perturbation process using the Uniform distribution and setting the perturbation factor.

2. Correct rule criterion (use the $\math{{y}^{^}})

$$ y^{~^} $$

In [1]:
import pandas as pd
import torch

In [2]:
data = pd.read_csv("data/cardio_train.csv", sep = ";")

In [3]:
data.head()

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,18393,2,168,62.0,110,80,1,1,0,0,1,0
1,1,20228,1,156,85.0,140,90,3,1,0,0,1,1
2,2,18857,1,165,64.0,130,70,3,1,0,0,0,1
3,3,17623,2,169,82.0,150,100,1,1,0,0,1,1
4,4,17474,1,156,56.0,100,60,1,1,0,0,0,0


In [4]:
data.shape

(70000, 13)

In [5]:
# Drop `id` feature
data.drop(labels = ["id"], axis = 1, inplace = True)

In [6]:
# Cardinality
data.nunique()

age            8076
gender            2
height          109
weight          287
ap_hi           153
ap_lo           157
cholesterol       3
gluc              3
smoke             2
alco              2
active            2
cardio            2
dtype: int64

In [7]:
categorical_features = [
    "gender",
    "cholesterol",
    "gluc",
    "smoke",
    "alco",
    "active",
]

scale_features = [
    "age",
    "height",
    "weight",
    "ap_hi",
    "ap_lo"
]

In [8]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

In [9]:
encoder = OneHotEncoder()

In [10]:
# Get features and target
X, y = data.drop(labels = ["cardio"], axis = 1), data["cardio"]

X_train, X_test, y_train,  y_test = train_test_split(X, y, test_size = .4, stratify = y)

In [11]:
X_train.head()

Unnamed: 0,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active
6184,22586,1,165,68.0,160,80,1,1,0,0,1
16111,16110,2,175,78.0,120,80,1,1,1,0,1
31190,21934,1,158,78.0,130,80,1,1,0,0,1
24372,22669,2,174,88.0,160,90,3,3,0,0,1
68629,23389,1,156,74.0,140,100,1,1,0,0,0


In [12]:
# Fit encoder
encoder.fit(X_train.loc[:, categorical_features])

In [13]:
encoder.get_feature_names_out()

array(['gender_1', 'gender_2', 'cholesterol_1', 'cholesterol_2',
       'cholesterol_3', 'gluc_1', 'gluc_2', 'gluc_3', 'smoke_0',
       'smoke_1', 'alco_0', 'alco_1', 'active_0', 'active_1'],
      dtype=object)

In [14]:
encoder.transform(X_train.loc[:, categorical_features]).shape

(42000, 14)

In [15]:
def transform_data(train, test, categorical_features):
    X_train, y_train = train
    X_test, y_test = test

    encoder = ColumnTransformer(
        transformers=[
            ("encoder", OneHotEncoder(), categorical_features),
            ("scaler", StandardScaler(), scale_features)
        ],
        remainder = "passthrough"
    )

    encoder.fit(X_train)
    columns = encoder.get_feature_names_out()

    X_train = encoder.transform(X_train)
    X_test = encoder.transform(X_test)

    X_train = pd.DataFrame(data = X_train, columns = columns)
    X_test = pd.DataFrame(data = X_test, columns = columns)

    return X_train, y_train, X_test, y_test

In [16]:
X_train, y_train, X_test, y_test = transform_data([X_train, y_train], [X_test, y_test], categorical_features)

In [17]:
X_train.head()

Unnamed: 0,encoder__gender_1,encoder__gender_2,encoder__cholesterol_1,encoder__cholesterol_2,encoder__cholesterol_3,encoder__gluc_1,encoder__gluc_2,encoder__gluc_3,encoder__smoke_0,encoder__smoke_1,encoder__alco_0,encoder__alco_1,encoder__active_0,encoder__active_1,scaler__age,scaler__height,scaler__weight,scaler__ap_hi,scaler__ap_lo
0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.263613,0.075246,-0.427516,0.205307,-0.092932
1,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,-1.358803,1.285136,0.265649,-0.057572,-0.092932
2,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.99959,-0.771676,0.265649,0.008148,-0.092932
3,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.297223,1.164147,0.958815,0.205307,-0.036404
4,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.588783,-1.013654,-0.011617,0.073867,0.020124


### Model Design

Architectures of interest:
+ Encoders: [FC100,ReLU,FC16]
+ Decision block: [FC1,Sigmoid]

In [18]:
from torch import nn

In [19]:
class Encoder(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()

        self.in_features = in_features
        self.out_features = out_features
        
        self.fc1 = nn.Linear(in_features, 100)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(100, out_features)

    def forward(self, x,):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [20]:
class DecisionBlock(nn.Module):
    def __init__(self, in_features):
        super().__init__()
        self.fc = nn.Linear(in_features, 1)

    def forward(self, x):
        x = self.fc(x)
        return torch.sigmoid(x)

In [21]:
class Combiner(nn.Module):
    """Combines rule and data encoders and decision block."""
    
    def __init__(self, in_features, out_features, combination_method="concat"):
        super().__init__()

        self.method = combination_method # How to combine rule and data results
        
        self.data_encoder = Encoder(in_features = in_features, out_features = out_features)
        self.rule_encoder = Encoder(in_features = in_features, out_features = out_features)

        if self.method == "concat":
            decision_input_size = out_features * 2
        elif self.method == "add":
            decision_input_size = out_features
        
        self.decision_block = DecisionBlock(in_features = decision_input_size)

    def combine_encodings(self, rule_encoding, data_encoding, alpha_factor):
        if self.method == "add":
            x = self.add_encodings(rule_encoding=rule_encoding, data_encoding=data_encoding, alpha_factor=alpha_factor)
        elif self.method == "concat":
            x = self.concatenate_encodings(rule_encoding=rule_encoding, data_encoding=data_encoding, alpha_factor=alpha_factor)
        
        return x

    def add_encodings(self, rule_encoding, data_encoding, alpha_factor):
        x = (alpha_factor * rule_encoding) + ((1 - alpha_factor) * data_encoding)
        return x

    def concatenate_encodings(self, rule_encoding, data_encoding, alpha_factor):
        x = torch.cat([(alpha_factor * rule_encoding), ((1 - alpha_factor) * data_encoding)], dim = -1)
        return x

    def forward(self, x, alpha_factor=0.):
        rule_encoding = self.rule_encoder(x) # Get rule encoding
        data_encoding = self.data_encoder(x) # Get data encoding

        combined_encodings = self.combine_encodings(
            rule_encoding=rule_encoding,
            data_encoding=data_encoding,
            alpha_factor=alpha_factor
        ) # Combine rule and data encoding

        y = self.decision_block(combined_encodings) # Get final prediction
        
        return y

---

### Data Preparation

In [22]:
from torch.utils.data import TensorDataset, DataLoader

In [23]:
BATCH_SIZE = 32

In [24]:
# Prepare Datasets
train = TensorDataset(torch.as_tensor(X_train.values, dtype = torch.float32), torch.as_tensor(y_train.values, dtype = torch.float32))
test = TensorDataset(torch.as_tensor(X_test.values, dtype = torch.float32), torch.as_tensor(y_test.values, dtype = torch.float32))

In [25]:
# Prepare DataLoader
train_dl = DataLoader(train, batch_size = BATCH_SIZE, shuffle = True)
test_dl = DataLoader(test, batch_size = BATCH_SIZE, shuffle = True)

In [26]:
 # Sample DataLoader
a, b = next(iter(train_dl))

In [27]:
a.shape

torch.Size([32, 19])

In [28]:
a.type()

'torch.FloatTensor'

In [29]:
b.shape

torch.Size([32])

In [30]:
# Test model architectures
model = Combiner(in_features = 19, out_features = 16, combination_method = "add")

In [31]:
model(a, .9).shape

torch.Size([32, 1])

---

### Training

In [32]:
from sklearn.metrics import accuracy_score
from torch import optim
from torch.distributions import Beta, Uniform

In [33]:
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
EPOCHS = 1000
LR = 1e-3

In [34]:
# Instantiate model
model = Combiner(in_features = 19, out_features = 16, combination_method = "add").to(DEVICE)

In [35]:
# Loss function and optimizer
criterion = nn.BCELoss()
opt = optim.Adam(params=model.parameters(), lr = LR)

In [36]:
def rule_criterion(x, x_perturbed, y, y_perturbed):
    # MBP = diastolic blood pressure (DBP) + 1/3 [systolic blood pressure (SBP) – DBP].
    # BLOOD_PRESSURE = x[:,5] + ((x[:,4] - x[:,5]) / 3) # Calculate mean blood pressure
    BLOOD_PRESSURE = x[:,-1] + ((x[:,-2] - x[:,-1]) / 3) # Calculate mean blood pressure
    BLOOD_PRESSURE_PERTUBED = x_perturbed[:,-1] + ((x_perturbed[:,-2] - x_perturbed[:,-1]) / 3)
    
    metric = (y_perturbed - y) / (BLOOD_PRESSURE_PERTUBED - BLOOD_PRESSURE)
    
    return torch.relu(metric).mean()

In [37]:
# Test rule criterion
rule_criterion(a, a+100, b, b-1000).mean()

tensor(0.)

In [38]:
def get_loss(x, x_perturbed, y, y_perturbed, y_pred, alpha_factor = 0.):
    # L = αbLrule + (1 − αb)Ltask
    task_loss = criterion(y_pred, y)
    # rule_loss = rule_criterion(x, x_perturbed, y, y_perturbed)
    rule_loss = rule_criterion(x = x, x_perturbed = x_perturbed, y = y_pred, y_perturbed = y_perturbed)
    loss = (alpha_factor * rule_loss) + ((1 - alpha_factor) * task_loss)
    
    return task_loss, rule_loss, loss

In [39]:
# Instantiate Beta distribution instance
beta_distribution = Beta(torch.tensor([.1]), torch.tensor([.1]))

In [40]:
# Test Beta distribution instance
beta_distribution.sample()

tensor([0.0127])

In [41]:
def perturb_data(x, factor = .01):
    # Instantiate Uniform distribution instance
    uniform_distribution = Uniform(low = 0, high = factor)
    gamma = uniform_distribution.sample()

    delta_x = gamma * torch.pow(torch.pow(x, 2).sum(), .5)
    
    return (x + delta_x)

In [None]:
TRAIN_LOSSES, TEST_LOSSES = [], []
TRAIN_ACCS, TEST_ACCS = [], []

for epoch in range(EPOCHS):
    train_losses, test_losses = [], []
    train_accs, test_accs = [], []
    
    train_task_losses, test_task_losses = [], []
    train_rule_losses, test_rule_losses = [], []
    
    model.train()
    
    for X, y in iter(train_dl):
        opt.zero_grad()
        X, y = X.to(DEVICE), y.to(DEVICE)

        # Get alpha factor
        alpha_factor = beta_distribution.sample().to(DEVICE)
        
        y_pred = model(x = X.to(DEVICE), alpha_factor = alpha_factor).squeeze()

        X_perturbed = perturb_data(X, .1)
        y_perturbed = model(x = X_perturbed, alpha_factor = alpha_factor)

        # Get task and rule losses
        
        train_task_loss, train_rule_loss, train_loss = get_loss(
            x = X, x_perturbed = X_perturbed,
            y = y, y_perturbed = y_perturbed,
            y_pred = y_pred, alpha_factor = alpha_factor
        )
        
        train_loss.backward()
        opt.step()
        
        train_losses.append(train_loss.item())
        
        train_task_losses.append(train_task_loss.item())
        train_rule_losses.append(train_rule_loss.item())

        train_acc = accuracy_score(y.cpu().numpy(), (y_pred.detach().cpu().numpy() > .5).astype(int))
        train_accs.append(train_acc)

    with torch.no_grad():
        model.eval()
        for X_, y_ in iter(test_dl):
            X_, y_ = X_.to(DEVICE), y_.to(DEVICE)

            alpha_factor = beta_distribution.sample().to(DEVICE)
            
            y_p = model(x = X_, alpha_factor = alpha_factor).squeeze()

            X__perturbed = perturb_data(X_, .1)
            y__perturbed = model(x = X__perturbed, alpha_factor = alpha_factor)
            
            test_task_loss, test_rule_loss, test_loss = get_loss(
                x = X_, x_perturbed = X__perturbed,
                y = y_, y_perturbed = y__perturbed,
                y_pred = y_p, alpha_factor = alpha_factor
            )
            
            test_losses.append(test_loss.item())

            test_task_losses.append(test_task_loss.item())
            test_rule_losses.append(test_rule_loss.item())

            test_acc = accuracy_score(y_.cpu().numpy(), (y_p.detach().cpu().numpy() > .5).astype(int))
            test_accs.append(test_acc)
        
    avg_train_loss = sum(train_losses) / len(train_losses)
    avg_test_loss = sum(test_losses) / len(test_losses)

    avg_train_acc = sum(train_accs) / len(train_accs)
    avg_test_acc = sum(test_accs) / len(test_accs)

    # Average task losses
    avg_train_task_loss = sum(train_task_losses) / len(train_task_losses)
    avg_test_task_loss = sum(test_task_losses) / len(test_task_losses)

    # Average rule losses
    avg_train_rule_loss = sum(train_rule_losses) / len(train_rule_losses)
    avg_test_rule_loss = sum(test_rule_losses) / len(test_rule_losses)

    # print(
    #     f"Epoch: {epoch+1} | Train loss: {avg_train_loss: .3f} (Task: {avg_train_task_loss: .3f} | Rule: {avg_train_rule_loss: .3f}) |",
    #     f" Test loss: {avg_test_loss: .3f} (Task: {avg_test_task_loss: .3f} | Rule: {avg_test_rule_loss: .3f}) |",
    #     f"\n\tTrain accuracy: {avg_train_acc: .3f} | Test accuracy: {avg_test_acc: .3f}"
    # )
    print(
        f"Epoch: {epoch+1} | Train loss: {avg_train_loss: .3f} | Test loss: {avg_test_loss: .3f} |",
        f" Train accuracy: {avg_train_acc: .3f} | Test accuracy: {avg_test_acc: .3f}"
    )

    TRAIN_LOSSES.append(avg_train_loss)
    TEST_LOSSES.append(avg_test_loss)

    TRAIN_ACCS.append(avg_train_acc)
    TEST_ACCS.append(avg_test_acc)

Epoch: 1 | Train loss:  0.357 | Test loss:  0.329 |  Train accuracy:  0.596 | Test accuracy:  0.601
Epoch: 2 | Train loss:  0.374 | Test loss:  0.364 |  Train accuracy:  0.598 | Test accuracy:  0.563
Epoch: 3 | Train loss:  0.324 | Test loss:  0.330 |  Train accuracy:  0.598 | Test accuracy:  0.611
Epoch: 4 | Train loss:  0.298 | Test loss:  0.307 |  Train accuracy:  0.613 | Test accuracy:  0.613
Epoch: 5 | Train loss:  0.319 | Test loss:  0.411 |  Train accuracy:  0.622 | Test accuracy:  0.619
Epoch: 6 | Train loss:  0.296 | Test loss:  0.289 |  Train accuracy:  0.622 | Test accuracy:  0.620
Epoch: 7 | Train loss:  0.353 | Test loss:  0.311 |  Train accuracy:  0.616 | Test accuracy:  0.627
Epoch: 8 | Train loss:  0.297 | Test loss:  0.326 |  Train accuracy:  0.625 | Test accuracy:  0.633
Epoch: 9 | Train loss:  0.319 | Test loss:  0.342 |  Train accuracy:  0.623 | Test accuracy:  0.634
Epoch: 10 | Train loss:  0.308 | Test loss:  0.306 |  Train accuracy:  0.630 | Test accuracy:  0.633