In [1]:
import os, sys
project_dir = os.path.join(os.getcwd(),'..')
if project_dir not in sys.path:
    sys.path.append(project_dir)

from Sparse.modules.variational import LinearCD
import torch

In [111]:
from torch import nn
import torch.nn.functional as F
import numpy as np

class LinearCD(nn.Linear):
    r'''
        Linear layer with Concrete Dropout regularization.

        Code strongly inspired by: 
            https://github.com/danielkelshaw/ConcreteDropout/blob/master/condrop/concrete_dropout.py

        Note the relationship between the weight regularizer (w_reg) and dropout regularization (drop_reg):
        
            w_reg/drop_reg = (l^2)/2 
        
        with prior lengthscale l (number of in_features). 
        
        Note also that the factor of two should be ignored for cross-entropy loss, and used only for the
        Euclidean loss.
    '''
    def __init__(self, in_features, out_features, bias=True, threshold=.95, init_min=0.5, init_max=0.51):
        super(LinearCD, self).__init__(in_features, out_features, bias)        
        logit_init_min = np.log(init_min) - np.log(1. - init_min)
        logit_init_max = np.log(init_max) - np.log(1. - init_max)
        
        # The probability of deactive a neuron.
        self.logit_p = nn.Parameter(torch.rand(in_features) * (logit_init_max - logit_init_min) + logit_init_min)
        self.logit_threshold = np.log(threshold) - np.log(1. - threshold)

    def forward(self, x):
        if self.training:
            return F.linear(self.concrete_bernoulli(x), self.weight, self.bias)

        return F.linear(x, self.weight * (self.logit_p < self.logit_threshold).float(), self.bias) 

    def concrete_bernoulli(self, x):
        eps = 1e-8
        unif_noise = torch.cuda.FloatTensor(*x.size()).uniform_() if self.logit_p.is_cuda else torch.FloatTensor(*x.size()).uniform_()

        p = torch.sigmoid(self.logit_p)
        tmp = .1

        drop_prob = (torch.log(p + eps) - torch.log((1-p) + eps) + torch.log(unif_noise + eps)
        - torch.log((1. - unif_noise) + eps))
        drop_prob = torch.sigmoid(drop_prob / tmp)

        random_tensor = 1 - drop_prob
        retain_prob = 1 - p # rescale factor typical for dropout

        if self.training:
            self.activation_reg = random_tensor.sum(dim=1).mean() # Penalizing the number of features activated

        return torch.mul(x, random_tensor)

    def reg(self):
        return self.activation_reg

# Breast Cancer Wisconsin Dataset

In [277]:
from sklearn import datasets
from torch.utils.data import Dataset, DataLoader

class BreastCancer(Dataset):
    r'''
        Breast Cancer Wisconsin Dataset
    '''
    def __init__(self, normalize=False):
        dataset = datasets.load_breast_cancer()
        self.data = torch.tensor(dataset.data).float()
        self.targets = torch.tensor(dataset.target)
    
        if normalize:
            self.data /= torch.max(self.data, dim=0)[0]

    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

    def __len__(self):
        return len(self.data)

In [319]:
import torch
from torch import nn

class Model(nn.Module):
    def __init__(self, nb_features, threshold = .75):
        super(Model, self).__init__()
        self.model = nn.Sequential(
            LinearCD(30, nb_features, bias=False, threshold=threshold),
            nn.ReLU(),
            nn.Linear(nb_features, nb_features//2),
            nn.ReLU(),
            nn.Linear(nb_features//2, 2)
        )
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.model(x)

In [332]:
dataset = BreastCancer(normalize=True)

eval_len = len(dataset) // 5 # 20% of the dataset
train_set, eval_set = torch.utils.data.random_split(dataset, [len(dataset) - eval_len, eval_len])

loader = DataLoader(eval_set, batch_size=128, shuffle=True)

In [367]:
from tqdm import tqdm

def train(model, dataset, batch_size = 128, n_epochs=10):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    reg = 1e-6

    epoch_iterator = tqdm(
            range(n_epochs),
            leave=True,
            unit="epoch",
            postfix={"tls": "%.4f" % 1},
        )

    modules = []
    for i in model.modules():
        if isinstance(i, LinearCD):
            modules.append(i)

    for _ in epoch_iterator:
        reg = min(reg + .1e-4, 5e-2)
        for idx, (inputs, targets) in enumerate(loader):
            optimizer.zero_grad()

            inputs = inputs.to(device)
            targets = targets.to(device)
            pred = model(inputs)

            reg_value = 0
            for module in modules:
                reg_value += module.reg()

            loss = criterion(pred, targets) + reg*reg_value
            loss.backward()
            optimizer.step()

            if idx % 10 == 0:
                epoch_iterator.set_postfix(tls="%.4f" % loss.item())

    print(reg)
    return model

In [368]:
model = Model(512, threshold=.75)
model = train(model, train_set, n_epochs=500)

100%|██████████| 500/500 [00:09<00:00, 53.40epoch/s, tls=0.1820]

0.005000999999999965





In [369]:
torch.sigmoid(model.model[0].logit_p).data.cpu().numpy() < .5

array([ True,  True, False,  True, False, False,  True,  True, False,
        True,  True, False,  True,  True, False,  True,  True, False,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True, False])

In [370]:
features_score, index = torch.sigmoid(model.model[0].logit_p).sort()

features_names = datasets.load_breast_cancer(as_frame=True).data.columns[index.cpu()]

print('Features:{}'.format(features_names))
print('Features Score:{}'.format(features_score))

Features:['worst area' 'area error' 'mean concave points' 'worst concave points'
 'mean concavity' 'mean area' 'mean fractal dimension' 'concavity error'
 'worst perimeter' 'worst compactness' 'worst texture' 'mean texture'
 'worst concavity' 'worst radius' 'fractal dimension error' 'radius error'
 'compactness error' 'worst smoothness' 'worst symmetry' 'perimeter error'
 'mean radius' 'mean perimeter' 'smoothness error' 'mean smoothness'
 'mean symmetry' 'symmetry error' 'mean compactness' 'texture error'
 'concave points error' 'worst fractal dimension']
Features Score:tensor([0.3493, 0.3732, 0.3772, 0.3806, 0.4115, 0.4185, 0.4258, 0.4310, 0.4372,
        0.4438, 0.4445, 0.4451, 0.4456, 0.4488, 0.4500, 0.4536, 0.4812, 0.4842,
        0.4843, 0.4861, 0.4898, 0.5139, 0.5196, 0.5216, 0.5275, 0.5303, 0.5307,
        0.5345, 0.5371, 0.5551], device='cuda:0', grad_fn=<SortBackward0>)


In [325]:

Features:['area error' 'mean concave points' 'worst concavity' 'mean concavity'
 'worst area' 'worst concave points' 'fractal dimension error'
 'compactness error' 'worst smoothness' 'mean compactness'
 'worst compactness' 'mean texture' 'concave points error' 'mean area'
 'mean radius' 'radius error' 'mean symmetry' 'worst texture'
 'mean perimeter' 'smoothness error' 'mean smoothness' 'worst radius'
 'mean fractal dimension' 'texture error' 'worst perimeter'
 'worst fractal dimension' 'symmetry error' 'worst symmetry'
 'perimeter error' 'concavity error']
Features Score:tensor([0.1855, 0.2411, 0.2672, 0.2806, 0.2903, 0.2936, 0.2979, 0.3000, 0.3354,
        0.3424, 0.3433, 0.3501, 0.3506, 0.3542, 0.3587, 0.3588, 0.3607, 0.3622,
        0.3638, 0.3640, 0.3642, 0.3652, 0.3667, 0.3734, 0.3784, 0.3801, 0.3804,
        0.3987, 0.4080, 0.4085], device='cuda:0', grad_fn=<SortBackward0>)

SyntaxError: invalid syntax (3944691740.py, line 10)

In [371]:
x, y = next(iter(loader))

In [375]:
threshold = .6
model.model[0].logit_threshold = torch.tensor(np.log(threshold) - np.log(1. - threshold))
model.eval()
torch.argmax(torch.softmax(model(x.cuda()), dim=1), dim=1)

tensor([1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
        1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
        0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
        0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0')

In [376]:
y

tensor([1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
        1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1,
        0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1,
        0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1])

In [377]:
(torch.argmax(torch.softmax(model(x.cuda()), dim=1), dim=1) == y.cuda()).sum() / len(y)

tensor(0.9646, device='cuda:0')

In [266]:
len(dataset)

569

In [273]:
int(569 * .2)

113

In [276]:
569 // 5

113

In [385]:
model.model[0].weight.mean(axis=0)

tensor([-0.0150, -0.0165, -0.0161, -0.0251, -0.0167, -0.0141, -0.0267, -0.0231,
        -0.0137, -0.0221, -0.0198, -0.0128, -0.0275, -0.0310, -0.0075, -0.0228,
        -0.0126, -0.0235, -0.0144, -0.0143, -0.0109, -0.0138, -0.0307, -0.0285,
        -0.0140, -0.0238, -0.0226, -0.0229, -0.0124, -0.0095], device='cuda:0',
       grad_fn=<MeanBackward1>)