In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from tqdm import trange
from pymoo.factory import get_reference_directions
from pymoo.factory import get_performance_indicator

from phn import EPOSolver, LinearScalarizationSolver
from phn.utils import *

from transformer.Network import *

In [2]:
no_cuda = True
device = torch.device("cuda" if torch.cuda.is_available() and not no_cuda else "cpu")

In [3]:
def circle_points(K, min_angle=None, max_angle=None):
    # generate evenly distributed preference vector
    ang0 = 1e-6 if min_angle is None else min_angle
    ang1 = np.pi / 2 - ang0 if max_angle is None else max_angle
    angles = np.linspace(ang0, ang1, K, endpoint=True)
    x = np.cos(angles)
    y = np.sin(angles)
    return np.c_[x, y]

In [4]:
@torch.no_grad()
def evaluate(hypernet, targetnet, loader, rays, device, criterion):
    hypernet.eval()
    results = {"ray" : [], "loss" : []}
    
    for ray in rays:
        ray = torch.from_numpy(ray.astype(np.float32)).to(device)

        ray /= ray.sum()

        full_losses = []
        for batch in loader:
            hypernet.zero_grad()

            batch = (t.to(device) for t in batch)
            X, Y = batch

            weights = hypernet(ray)
            
            transferParameters(targetnet, weights)
            
            pred = targetnet(X)
            pred = convertIntoBinary(pred)  # pred in 0 or 1

            # Loss
            curr_losses = criterion(pred, Y)
            
            # Metrics
            ray = ray.squeeze(0)

            # Losses
            full_losses.append(curr_losses.detach().cpu().numpy())


        results["ray"].append(ray.cpu().numpy().tolist())
        results["loss"].append(np.array(full_losses).mean(0).tolist())
        
    hv = get_performance_indicator(
        "hv",
        ref_point=np.ones(
            7,
        ),
    )
    results["hv"] = hv.do(np.array(results["loss"]))

    return results

In [5]:
def train(net, loaders, criterions, solver_type: str, hidden_dim: int, no_val_eval: bool, no_test_eval: bool, alpha: float, 
          n_rays: int, epochs: int, lr: float, wd: float):
    # ----
    # Hypernetwork
    # ----
    hnet = HyperNet(ray_hidden_dim=hidden_dim, model=net)

    net = net.to(device)
    hnet = hnet.to(device)

    optimizer = torch.optim.Adam(hnet.parameters(), lr=lr, weight_decay=wd)

    # ------
    # Solver
    # ------
    solvers = dict(ls=LinearScalarizationSolver, epo=EPOSolver)

    solver_method = solvers[solver_type]
    if solver_type == "epo":
        nb_params = sum(p.numel() for p in hnet.parameters() if p.requires_grad)
        solver = solver_method(n_tasks=2, n_params=nb_params)
    else:
        # ls
        solver = solver_method(n_tasks=2)

    # ----
    # Data
    # ----
    train_loader = loaders['train_loader']
    val_loader = loaders['val_loader']
    test_loader = loaders['test_loader']
    
    # ----
    # Losses
    # ----
    train_criterion = criterions['train_loss']
    val_criterion = criterions['val_loss']
    test_criterion = criterions['test_loss']
    
    # ----------
    # Train loop
    # ----------
    epoch_iter = trange(epochs)
 
    for epoch in epoch_iter:
        for batch in train_loader:
            hnet.train()
            optimizer.zero_grad()
            
            X, Y = batch
            X = X.to(device)
            Y = Y.to(device)
            
            # Compute ray
            if alpha > 0:
                ray = torch.from_numpy(
                    np.random.dirichlet([alpha] * 2, 1).astype(np.float32).flatten()
                ).to(device)
            else:
                alpha = torch.empty(
                    1,
                ).uniform_(0.0, 1.0).item()
                ray = torch.tensor([alpha, 1 - alpha]).to(device)
   
            weights = hnet(ray)  
            
            # Save model paramaters
            params, parameter_names = save_model_parameters(net) 
            # This operation does not break the back propagation graph. 
            # But 'net.load_state_dict(weights)' will
            transferParameters(net, weights) 
            
            pred = net(X)
            pred = convertIntoBinary(pred)  # pred in 0 or 1
                  
            losses = train_criterion(pred, Y)

            ray = ray.squeeze(0)
            # If you are not willingly using all of you layer, put allow_unused=True at line 78 in phn.solver.py
            loss = solver(losses, ray, list(hnet.parameters()), feat=X, label=Y, model=net, weights=weights, 
                          criterion=train_criterion)       

            # Load model paramaters
            # We can't just use 'net.load_state_dict(weights)' because the state_dict is already empty
            load_model_parameters(params, parameter_names, weights) 

            loss.backward()
            optimizer.step()
                    
            epoch_iter.set_description(
                f"total weighted loss: {loss.item():.3f}"
                #f", ray {ray.cpu().numpy().tolist()}"
            )          

    min_angle = 0.1
    max_angle = np.pi / 2 - 0.1
    test_rays = circle_points(n_rays, min_angle=min_angle, max_angle=max_angle)
    
    results = dict()
    if not no_val_eval:
        eval_epoch_results = evaluate(
            hypernet=hnet,
            targetnet=net,
            loader=val_loader,
            rays=test_rays,
            device=device,
            criterion=val_criterion
        )
        results['val'] = eval_epoch_results
    
    if not no_test_eval:
        test_epoch_results = evaluate(
            hypernet=hnet,
            targetnet=net,
            loader=test_loader,
            rays=test_rays,
            device=device,
            criterion=test_criterion
        )
        results['test'] = test_epoch_results
    
        
    return net, results

In [6]:
from experiments.multimnist.data import Dataset

from phn.Hypernetwork import HyperNet

In [7]:
df = pd.read_csv('data/pre_processed_dataset.csv')

# Train = 0.6
# Val = 0.1
# Test = 0.3
from sklearn.model_selection import train_test_split

samplelist = df["Patient_ID"].unique()
training_samp, split_samp = train_test_split(samplelist, train_size=0.6, test_size=0.4, random_state=5, shuffle=True)
validation_samp, test_samp = train_test_split(samplelist, train_size=0.25, test_size=0.75, random_state=5, shuffle=True)
    
train_df = df[df['Patient_ID'].isin(training_samp)]
val_df = df[df['Patient_ID'].isin(validation_samp)]
test_df = df[df['Patient_ID'].isin(test_samp)]

In [8]:
def splitDatasetPerPatient(dataset, window_size=6):
    data = []
    label = []

    for patientId in dataset['Patient_ID'].unique():
        tmp_data = dataset[dataset['Patient_ID'] == patientId]
        if(len(tmp_data) >= window_size):
            data.append(tmp_data.drop(['Hour', 'Patient_ID', 'SepsisLabel'], axis=1).to_numpy())
            label.append(tmp_data['SepsisLabel'].to_numpy())
            
    return data, label

In [9]:
train_data, train_label = splitDatasetPerPatient(train_df)
val_data, val_label = splitDatasetPerPatient(val_df)
test_data, test_label = splitDatasetPerPatient(test_df)

In [10]:
def toTimeSeriesDataloader(feat, label, window_size=6):
    data_labels = []

    # One patient per batch
    data_loader = []

    for i in range(len(feat)):
        patient_data = feat[i]
        labels = label[i]
        X_data = []
        Y_data = []

        for j in range(len(patient_data) - (window_size - 1)):
            X_data.append(patient_data[j:(j + window_size)])
            Y_data.append([labels[(j + window_size - 1)]])
            data_labels.append(labels[(j + window_size - 1)])

        data_loader.append([torch.Tensor(X_data), torch.Tensor(Y_data)])
        
    return data_loader, data_labels

In [11]:
train_loader, train_label_ts = toTimeSeriesDataloader(train_data, train_label)
val_loader, val_label_ts = toTimeSeriesDataloader(val_data, val_label)
test_loader, test_label_ts = toTimeSeriesDataloader(test_data, test_label)

In [12]:
loaders = {'train_loader': train_loader, 'val_loader': val_loader, 'test_loader': test_loader}

In [13]:
def convertIntoBinary(pred):
    pred = nn.functional.sigmoid(pred) # pred in ]0, 1[
    pred = nn.functional.threshold(pred, 0.5, 0.0) # pred in 0 or ]0.5, 1[
    pred = torch.sub(torch.tensor(1.0), pred) # pred in ]0, 0.5[ or 1
    pred = nn.functional.threshold(pred, 0.5, 0.0) # pred in 0 or 1 (reverted)
    pred = torch.sub(torch.tensor(1.0), pred) # pred in 0 or 1
    return pred

In [14]:
'''
# Our old version => Specific to the transfomer
def transferParameters(model, weights):  
    for name in weights:
        obj = model._modules
        names = name.split('.')
        index = 0

        while(index < len(names) - 1):
            if (obj.__class__.__name__ == 'OrderedDict'): # Dict
                obj = obj[names[index]]
            elif (obj.__class__.__name__ == 'ModuleList'): # List
                obj = obj[int(names[index])]
            else: # Object
                obj = getattr(obj, names[index])

            index += 1

        if(names[index] == 'weight'):
            del obj.weight           
            obj.weight = weights[name]
        elif(names[index] == 'bias'):
            del obj.bias
            obj.bias = weights[name]
        elif(names[index] == 'pe'):
            del obj.pe
            obj.pe = weights[name]
'''

def transferParameters(model, weights):  
    deleted_parameters = []
    deleted_buffers = []
    
    for full_name, m in model.named_modules():
        # Get parameters reference
        for _, _ in m.named_parameters(recurse=False):
            deleted_parameters.append((full_name, m))
             
        # Get buffers reference
        for n, _ in m.named_buffers(recurse=False):
            if (getattr(m, n).__class__.__name__ == 'Tensor'): # Maybe adding Linear or Parameter for some cases
                deleted_buffers.append((full_name, n, m))

        # Transfer parameters
        for name, parameter in deleted_parameters:
            del parameter.weight
            parameter.weight = weights[name + '.weight']
            if(parameter.bias != None):
                del parameter.bias
                parameter.bias = weights[name + '.bias']
          
        # Transfer buffers
        for name, attribute, buffer in deleted_buffers:
            delattr(buffer, attribute)
            setattr(buffer, attribute, weights[name + '.' + attribute])

In [35]:
# Preference vector 
alphas = [45.0, 90.0, 180.0] 

# Output
trained_nets = []
results = []

# Loss
trainLossWeight = torch.tensor([train_label_ts.count(0) / train_label_ts.count(1)]).to(device)
valLossWeight = torch.tensor([val_label_ts.count(0) / val_label_ts.count(1)]).to(device)
testLossWeight = torch.tensor([test_label_ts.count(0) / test_label_ts.count(1)]).to(device)
criterions = dict()
criterions['train_loss'] = nn.BCEWithLogitsLoss(pos_weight = trainLossWeight)
criterions['val_loss'] = nn.BCEWithLogitsLoss(pos_weight = valLossWeight)  
criterions['test_loss'] = nn.BCEWithLogitsLoss(pos_weight = testLossWeight)  

for alpha in alphas:
    net = Transformer(dim_val=10, dim_attn=2, input_size=12, dec_seq_len=6, out_seq_len=1, 
                      n_decoder_layers=0, n_encoder_layers=4, n_heads=4)

    trained_net, result = train(
        net=net,
        loaders=loaders,
        criterions = criterions,
        solver_type="epo",       
        hidden_dim=10, #100
        no_val_eval=True,
        no_test_eval=True,
        alpha=alpha,
        n_rays=20, #25
        epochs=6,
        lr=0.002,
        wd=0.0
    )
    
    trained_nets.append(trained_net)
    results.append(result)

total weighted loss: 1.386: 100%|██████████| 6/6 [5:08:14<00:00, 3082.38s/it]   
total weighted loss: 1.386: 100%|██████████| 6/6 [5:10:46<00:00, 3107.82s/it]   
total weighted loss: 2.627: 100%|██████████| 6/6 [5:13:16<00:00, 3132.71s/it]   


<h1> Score</h1>

In [26]:
load_result = False

if load_result:
    trained_nets = []
    trained_nets.append(torch.load("weights/hn_0.0", map_location=torch.device(device)))
    trained_nets.append(torch.load("weights/hn_0.1", map_location=torch.device(device)))
    trained_nets.append(torch.load("weights/hn_0.2", map_location=torch.device(device)))
    trained_nets.append(torch.load("weights/hn_0.3", map_location=torch.device(device)))
    trained_nets.append(torch.load("weights/hn_0.4", map_location=torch.device(device)))
    trained_nets.append(torch.load("weights/hn_0.5", map_location=torch.device(device)))
    trained_nets.append(torch.load("weights/hn_0.6", map_location=torch.device(device)))
    trained_nets.append(torch.load("weights/hn_0.7", map_location=torch.device(device)))
    trained_nets.append(torch.load("weights/hn_0.8", map_location=torch.device(device)))
    trained_nets.append(torch.load("weights/hn_0.9", map_location=torch.device(device)))
    trained_nets.append(torch.load("weights/hn_1.0", map_location=torch.device(device)))

In [36]:
from metric import *

BCEResult = []
PhysionetLoss = []
sumOfWeight = []

testLossWeight = torch.tensor([test_label_ts.count(0) / test_label_ts.count(1)]).to(device)
criterion = nn.BCEWithLogitsLoss(pos_weight = testLossWeight)

for net in trained_nets:
    test_preds = []
    loss_preds = []
    
    for b in test_loader:
        X, Y = b
        X = X.to(device)
        Y = Y.to(device)
        
        pred = net(X)
        pred = convertIntoBinary(pred)  # pred in 0 or 1
  
        losses = criterion(pred, Y)
        loss_preds = np.append(loss_preds, compute_prediction_utility(pred.detach().cpu().numpy().flatten(), Y))
        
        test_preds = np.append(test_preds, pred.detach().cpu().numpy().flatten())
             
    BCEResult.append(criterion(torch.tensor(test_preds).to(device), torch.tensor(test_label_ts).to(device)))
    PhysionetLoss.append(sum(loss_preds) / len(loss_preds))
    print("BCEWithLogits loss: ", BCEResult[-1])
    print("Physionet loss: ", PhysionetLoss[-1])

    res = torch.tensor(0.0)
    for _, w in net.state_dict().items():
        res = torch.add(res, w.abs().sum())

    print("sum of weights: ", res)
    sumOfWeight.append(res)

BCEWithLogits loss:  tensor(1.3609, dtype=torch.float64)
Physionet loss:  -0.030669046674600135
sum of weights:  tensor(10168.8574)
BCEWithLogits loss:  tensor(1.3609, dtype=torch.float64)
Physionet loss:  -0.030669046674600135
sum of weights:  tensor(11141.1660)
BCEWithLogits loss:  tensor(1.5967, dtype=torch.float64)
Physionet loss:  -9.497682430546403
sum of weights:  tensor(9874.4316)


In [37]:
BCEResult

[tensor(1.3609, dtype=torch.float64),
 tensor(1.3609, dtype=torch.float64),
 tensor(1.5967, dtype=torch.float64)]

In [38]:
PhysionetLoss

[-0.030669046674600135, -0.030669046674600135, -9.497682430546403]

In [39]:
sumOfWeight

[tensor(10168.8574), tensor(11141.1660), tensor(9874.4316)]