In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from tqdm import trange
from pymoo.factory import get_reference_directions
from pymoo.factory import get_performance_indicator

from phn import EPOSolver, LinearScalarizationSolver

from transformer.utils import *
from transformer.Network import *

In [2]:
no_cuda = False
device = torch.device("cuda" if torch.cuda.is_available() and not no_cuda else "cpu")

In [3]:
def circle_points(K, min_angle=None, max_angle=None):
    # generate evenly distributed preference vector
    ang0 = 1e-6 if min_angle is None else min_angle
    ang1 = np.pi / 2 - ang0 if max_angle is None else max_angle
    angles = np.linspace(ang0, ang1, K, endpoint=True)
    x = np.cos(angles)
    y = np.sin(angles)
    return np.c_[x, y]

In [4]:
@torch.no_grad()
def evaluate(hypernet, targetnet, loader, rays, device, lossWeight):
    hypernet.eval()
    results = {"ray" : [], "loss" : []}
    
    for ray in rays:
        ray = torch.from_numpy(ray.astype(np.float32)).to(device)

        ray /= ray.sum()

        total = 0.0
        full_losses = []
        for batch in loader:
            hypernet.zero_grad()

            batch = (t.to(device) for t in batch)
            X, Y = batch
            bs = len(Y)

            weights = hypernet(ray)
            
            transferParameters(targetnet, weights)
            
            pred = targetnet(X)

            # loss
            criterion = nn.BCEWithLogitsLoss(pos_weight = lossWeight)
            curr_losses = criterion(pred, Y)
            
            # metrics
            ray = ray.squeeze(0)

            # losses
            full_losses.append(curr_losses.detach().cpu().numpy())
            total += bs

        results["ray"].append(ray.cpu().numpy().tolist())
        results["loss"].append(np.array(full_losses).mean(0).tolist())
        
    hv = get_performance_indicator(
        "hv",
        ref_point=np.ones(
            7,
        ),
    )
    results["hv"] = hv.do(np.array(results["loss"]))

    return results

In [204]:
def train(net, loaders, solver_type: str, hidden_dim: int, no_val_eval: bool, alpha: float, 
          n_rays: int, epochs: int, lr: float, wd: float):
    # ----
    # Hypernetwork
    # ----
    hnet = HyperNet(ray_hidden_dim=hidden_dim, model=net)

    net = net.to(device)
    hnet = hnet.to(device)

    optimizer = torch.optim.Adam(hnet.parameters(), lr=lr, weight_decay=wd)

    # ------
    # Solver
    # ------
    solvers = dict(ls=LinearScalarizationSolver, epo=EPOSolver)

    solver_method = solvers[solver_type]
    if solver_type == "epo":
        nb_params = sum(p.numel() for p in hnet.parameters() if p.requires_grad)
        solver = solver_method(n_tasks=2, n_params=nb_params)
    else:
        # ls
        solver = solver_method(n_tasks=2)

    # ----
    # Data
    # ----
    train_loader = loaders['train_loader']
    val_loader = loaders['val_loader']
    test_loader = loaders['test_loader']

    # ----------
    # Train loop
    # ----------
    epoch_iter = trange(epochs)

    val_results = dict()
    test_results = dict()

    trainLossWeight = torch.tensor([train_label_ts.count(0) / train_label_ts.count(1)]).to(device)
    valLossWeight = torch.tensor([val_label_ts.count(0) / val_label_ts.count(1)]).to(device)
    testLossWeight = torch.tensor([test_label_ts.count(0) / test_label_ts.count(1)]).to(device)
    for epoch in epoch_iter:
        for i, batch in enumerate(train_loader):
            hnet.train()
            optimizer.zero_grad()
            X, Y = batch
            X = X.to(device)
            Y = Y.to(device)
            
            if alpha > 0:
                ray = torch.from_numpy(
                    np.random.dirichlet([alpha] * 2, 1).astype(np.float32).flatten()
                ).to(device)
            else:
                alpha = torch.empty(
                    1,
                ).uniform_(0.0, 1.0).item()
                ray = torch.tensor([alpha, 1 - alpha]).to(device)
   
            weights = hnet(ray)  
            
            # Save state dict
            '''
            params = []
            buffers = []
            for m in net.modules():
                for n, _ in m.named_parameters(recurse=False):
                    params.append((m, n))

                for n, _ in m.named_buffers(recurse=False):
                    buffers.append((m, n))
            '''
       
            #net.load_state_dict(weights) # Will break the back propagation         
            transferParameters(net, weights)
            
            pred = net(X)
                  
            criterion = nn.BCEWithLogitsLoss(pos_weight = trainLossWeight)
            losses = criterion(pred, Y)

            ray = ray.squeeze(0)
            loss = solver(losses, ray, list(hnet.parameters()), feat=X, label=Y, model=net, weights=weights, 
                          lossWeight=trainLossWeight)
            # Load state dict
            '''
            for m, n in params:
                setattr(m, n, nn.parameter.Parameter(w)) 

            for m, n in buffers:
                setattr(m, n, nn.parameter.Parameter(w))
            '''

            loss.backward()
                    
            epoch_iter.set_description(
                f"total weighted loss: {loss.item():.3f}"
                #f", ray {ray.cpu().numpy().tolist()}"
            )
            
            optimizer.step()

    min_angle = 0.1
    max_angle = np.pi / 2 - 0.1
    test_rays = circle_points(n_rays, min_angle=min_angle, max_angle=max_angle)
    results = {}
    if not no_val_eval:
        epoch_results = evaluate(
            hypernet=hnet,
            targetnet=net,
            loader=val_loader,
            rays=test_rays,
            device=device,
            lossWeight=valLossWeight
        )
        results['val'] = epoch_results
    '''
    test_epoch_results = evaluate(
        hypernet=hnet,
        targetnet=net,
        loader=test_loader,
        rays=test_rays,
        device=device,
        lossWeight=testLossWeight
    )
    results['test'] = test_epoch_results
    '''
        
    return net, results

In [6]:
from experiments.multimnist.data import Dataset

from models import HyperNet

In [7]:
df = pd.read_csv('data/pre_processed_dataset.csv')

# Train = 0.6
# Val = 0.1
# Test = 0.3
from sklearn.model_selection import train_test_split

samplelist = df["Patient_ID"].unique()
training_samp, split_samp = train_test_split(samplelist, train_size=0.6, test_size=0.4, random_state=5, shuffle=True)
validation_samp, test_samp = train_test_split(samplelist, train_size=0.25, test_size=0.75, random_state=5, shuffle=True)
    
train_df = df[df['Patient_ID'].isin(training_samp)]
val_df = df[df['Patient_ID'].isin(validation_samp)]
test_df = df[df['Patient_ID'].isin(test_samp)]

In [8]:
def splitDatasetPerPatient(dataset, window_size=6):
    data = []
    label = []

    for patientId in dataset['Patient_ID'].unique():
        tmp_data = dataset[dataset['Patient_ID'] == patientId]
        if(len(tmp_data) >= window_size):
            data.append(tmp_data.drop(['Hour', 'Patient_ID', 'SepsisLabel'], axis=1).to_numpy())
            label.append(tmp_data['SepsisLabel'].to_numpy())
            
    return data, label

In [9]:
train_data, train_label = splitDatasetPerPatient(train_df)
val_data, val_label = splitDatasetPerPatient(val_df)
test_data, test_label = splitDatasetPerPatient(test_df)

In [10]:
def toTimeSeriesDataloader(feat, label, window_size=6):
    data_labels = []

    # One patient per batch
    data_loader = []

    for i in range(len(feat)):
        patient_data = feat[i]
        labels = label[i]
        X_data = []
        Y_data = []

        for j in range(len(patient_data) - (window_size - 1)):
            X_data.append(patient_data[j:(j + window_size)])
            Y_data.append([labels[(j + window_size - 1)]])
            data_labels.append(labels[(j + window_size - 1)])

        data_loader.append([torch.Tensor(X_data), torch.Tensor(Y_data)])
        
    return data_loader, data_labels

In [11]:
train_loader, train_label_ts = toTimeSeriesDataloader(train_data, train_label)
val_loader, val_label_ts = toTimeSeriesDataloader(val_data, val_label)
test_loader, test_label_ts = toTimeSeriesDataloader(test_data, test_label)

In [12]:
loaders = {'train_loader': train_loader, 'val_loader': val_loader, 'test_loader': test_loader}

In [13]:
def transferParameters(model, weights):  
    for name in weights:
        obj = model._modules
        names = name.split('.')
        index = 0

        while(index < len(names) - 1):
            if (obj.__class__.__name__ == 'OrderedDict'): # Dict
                obj = obj[names[index]]
            elif (obj.__class__.__name__ == 'ModuleList'): # List
                obj = obj[int(names[index])]
            else: # Object
                obj = getattr(obj, names[index])

            index += 1

        if(names[index] == 'weight'):
            del obj.weight           
            obj.weight = weights[name]
        elif(names[index] == 'bias'):
            del obj.bias
            obj.bias = weights[name]
        elif(names[index] == 'pe'):
            del obj.pe
            obj.pe = weights[name]

In [None]:
alphas = [0.5, 1.0, 0.0]
trained_nets = []
results = []

for alpha in alphas:
    net = Transformer(dim_val=10, dim_attn=2, input_size=12, dec_seq_len=6, out_seq_len=1, 
                      n_decoder_layers=1, n_encoder_layers=4, n_heads=4)

    trained_net, result = train(
        net=net,
        loaders=loaders,
        solver_type="epo",       
        hidden_dim=10, #100
        no_val_eval=True,
        alpha=alpha,
        n_rays=20, #25
        epochs=6,
        lr=1e-2,
        wd=0.0
    )
    
    trained_nets.append(trained_net)
    results.append(result)

In [150]:
my_weights = []
mod = trained_nets[0]._modules

def rec_save_ini():
    for m in trained_nets[0]._modules:
        rec_save(mod[m], 0)
        

def rec_save(obj, i):
    if(i < 20):
        if (obj.__class__.__name__ == 'OrderedDict'): # Dict
            for a in obj:
                rec_save(obj[a], i+1)
        elif (obj.__class__.__name__ == 'ModuleList'): # List
            for a in range(len(obj)):
                rec_save(obj[a], i+1)
        elif (obj.__class__.__name__ == 'EncoderLayer'):
            rec_save(obj.attn, i+1)
            rec_save(obj.fc1, i+1)
            rec_save(obj.fc2, i+1)
            rec_save(obj.norm1, i+1)
            rec_save(obj.norm2, i+1)
        elif (obj.__class__.__name__ == 'DecoderLayer'):
            rec_save(obj.attn1, i+1)
            rec_save(obj.attn2, i+1)
            rec_save(obj.fc1, i+1)
            rec_save(obj.fc2, i+1)
            rec_save(obj.norm1, i+1)
            rec_save(obj.norm2, i+1)
            rec_save(obj.norm3, i+1)
        elif (obj.__class__.__name__ == 'PositionalEncoding'):
            rec_save(obj.pe, i+1)
        elif (obj.__class__.__name__ == 'Linear'):
            rec_save(obj.weight, i+1)
            if (obj.bias != None):
                rec_save(obj.bias, i+1)
        elif (obj.__class__.__name__ == 'MultiHeadAttentionBlock'):
            rec_save(obj.heads, i+1)
            rec_save(obj.fc, i+1)
        elif (obj.__class__.__name__ == 'AttentionBlock'):
            rec_save(obj.value.fc1, i+1)
            rec_save(obj.key.fc1, i+1)
            rec_save(obj.query.fc1, i+1)
        elif (obj.__class__.__name__ == 'LayerNorm'):
            rec_save(obj.weight, i+1)
            if (obj.bias != None):
                rec_save(obj.bias, i+1)
            
        elif (obj.__class__.__name__ == 'Tensor' or obj.__class__.__name__ == 'Parameter'):
            my_weights.append(obj)
              
        else:
            print("ERREUR", obj.__class__.__name__)
    else:
        print("OUT")

In [151]:
rec_save_ini()

In [185]:
names = list(weights.keys())

if(len(my_weights) == len(weights)):
    for i in range(len(my_weights)):
        weights[names[i]] = my_weights[i]

In [None]:
#torch.save(weights, 'weights/hn_0.5')

<h1> Somme des poids </h1>

In [281]:
hn_00 = torch.load('weights/hn_0.0')
hn_05 = torch.load('weights/hn_0.5')
hn_10 = torch.load('weights/hn_1.0')

In [272]:
res = torch.tensor(0.0)

for _, w in hn_00.items():
    res = torch.add(res, w.abs().sum())
    
print(res)

tensor(15071.8516, grad_fn=<AddBackward0>)


In [273]:
res = torch.tensor(0.0)

for _, w in hn_05.items():
    res = torch.add(res, w.abs().sum())
    
print(res)

tensor(13979.3398, device='cuda:0', grad_fn=<AddBackward0>)


In [274]:
res = torch.tensor(0.0)

for _, w in hn_10.items():
    res = torch.add(res, w.abs().sum())
    
print(res)

tensor(15687.4170, grad_fn=<AddBackward0>)


<h1> Score</h1>

In [282]:
%%capture
net1 = Transformer(10, 2, 12, 6, 1, 1, 4, 4).to(device)
net2 = Transformer(10, 2, 12, 6, 1, 1, 4, 4).to(device)
net3 = Transformer(10, 2, 12, 6, 1, 1, 4, 4).to(device)

net1.load_state_dict(hn_00)
net1.eval()
net2.load_state_dict(hn_05)
net2.eval()
net3.load_state_dict(hn_10)
net3.eval()

In [284]:
from metric import *
test_preds1 = []
test_preds2 = []
test_preds3 = []

loss_preds1 = []
loss_preds2 = []
loss_preds3 = []

for b in test_loader:
    X, Y = b
    X = X.to(device)
    Y = Y.detach().cpu().numpy()

    pred1 = net1(X).detach().cpu().numpy()
    pred_np1 = np.where(pred1.flatten() >= 0.5, 1, 0)
    pred2 = net2(X).detach().cpu().numpy()
    pred_np2 = np.where(pred2.flatten() >= 0.5, 1, 0)
    pred3 = net3(X).detach().cpu().numpy()
    pred_np3 = np.where(pred3.flatten() >= 0.5, 1, 0)
    
    loss_preds1 = np.append(loss_preds1, compute_prediction_utility(pred_np1, Y))
    loss_preds2 = np.append(loss_preds2, compute_prediction_utility(pred_np2, Y))
    loss_preds3 = np.append(loss_preds3, compute_prediction_utility(pred_np3, Y))

    test_preds1 = np.append(test_preds1, pred1)
    test_preds2 = np.append(test_preds2, pred2)
    test_preds3 = np.append(test_preds3, pred3)

In [279]:
testLossWeight = torch.tensor([test_label_ts.count(0) / test_label_ts.count(1)]).to(device)
criterion = nn.BCEWithLogitsLoss(pos_weight = testLossWeight)

In [280]:
print(criterion(torch.tensor(test_preds1).to(device), torch.tensor(test_label_ts).to(device)))
print(criterion(torch.tensor(test_preds2).to(device), torch.tensor(test_label_ts).to(device)))
print(criterion(torch.tensor(test_preds3).to(device), torch.tensor(test_label_ts).to(device)))

tensor(1.4019, device='cuda:0', dtype=torch.float64)
tensor(1.3720, device='cuda:0', dtype=torch.float64)
tensor(1.4018, device='cuda:0', dtype=torch.float64)


In [286]:
print(sum(loss_preds1) / len(loss_preds1))
print(sum(loss_preds2) / len(loss_preds2))
print(sum(loss_preds3) / len(loss_preds3))

-0.030669046674600135
-0.030669046674600135
-0.030669046674600135


In [120]:
net = Transformer(10, 2, 12, 6, 1, 1, 4, 4).to(device)
hnet = HyperNet(model=net).to(device)

X, Y = next(iter(train_loader))
X = X.to(device)
Y = Y.to(device)

ray = torch.from_numpy(np.random.dirichlet([0.5] * 2, 1).astype(np.float32).flatten()).to(device)
weights = hnet(ray)

In [369]:
params = []
buffers = []
for m in net.modules():
    for n, _ in m.named_parameters(recurse=False):
        params.append((m, n))
              
    for n, _ in m.named_buffers(recurse=False):
        buffers.append((m, n))

In [370]:
transferParameters(net, weights)

In [371]:
pred = net(X)
            
criterion = nn.BCEWithLogitsLoss(pos_weight = torch.tensor([51.0]).to(device))
losses = criterion(pred, Y)

torch.autograd.grad(losses, list(hnet.parameters()), retain_graph=True)

(tensor([[ 0.0000,  0.0000],
         [ 0.0050,  0.0019],
         [-0.0049, -0.0018],
         [ 0.0000,  0.0000],
         [ 0.0000,  0.0000],
         [ 0.0000,  0.0000],
         [ 0.0000,  0.0000],
         [-0.0066, -0.0025],
         [ 0.0015,  0.0006],
         [ 0.0000,  0.0000],
         [ 0.0000,  0.0000],
         [ 0.0000,  0.0000],
         [-0.0029, -0.0011],
         [ 0.0000,  0.0000],
         [ 0.0000,  0.0000],
         [-0.0031, -0.0012],
         [ 0.0000,  0.0000],
         [-0.0064, -0.0024],
         [ 0.0000,  0.0000],
         [-0.0021, -0.0008],
         [ 0.0000,  0.0000],
         [-0.0033, -0.0012],
         [-0.0050, -0.0019],
         [ 0.0000,  0.0000],
         [ 0.0000,  0.0000],
         [ 0.0080,  0.0030],
         [ 0.0000,  0.0000],
         [ 0.0000,  0.0000],
         [ 0.0000,  0.0000],
         [ 0.0000,  0.0000],
         [-0.0144, -0.0054],
         [ 0.0070,  0.0026],
         [ 0.0045,  0.0017],
         [ 0.0107,  0.0040],
         [-0.0

In [None]:
for m, n, w in (params, weights):
    print(w)

In [376]:
for m, n in params:
    setattr(m, n, nn.parameter.Parameter(w)) 
    
for m, n in buffers:
    setattr(m, n, nn.parameter.Parameter(w)) 

In [121]:
for name, w in net.state_dict().items():
    print(name)

encs.0.attn.heads.0.value.fc1.weight
encs.0.attn.heads.0.key.fc1.weight
encs.0.attn.heads.0.query.fc1.weight
encs.0.attn.heads.1.value.fc1.weight
encs.0.attn.heads.1.key.fc1.weight
encs.0.attn.heads.1.query.fc1.weight
encs.0.attn.heads.2.value.fc1.weight
encs.0.attn.heads.2.key.fc1.weight
encs.0.attn.heads.2.query.fc1.weight
encs.0.attn.heads.3.value.fc1.weight
encs.0.attn.heads.3.key.fc1.weight
encs.0.attn.heads.3.query.fc1.weight
encs.0.attn.fc.weight
encs.0.fc1.weight
encs.0.fc1.bias
encs.0.fc2.weight
encs.0.fc2.bias
encs.0.norm1.weight
encs.0.norm1.bias
encs.0.norm2.weight
encs.0.norm2.bias
encs.1.attn.heads.0.value.fc1.weight
encs.1.attn.heads.0.key.fc1.weight
encs.1.attn.heads.0.query.fc1.weight
encs.1.attn.heads.1.value.fc1.weight
encs.1.attn.heads.1.key.fc1.weight
encs.1.attn.heads.1.query.fc1.weight
encs.1.attn.heads.2.value.fc1.weight
encs.1.attn.heads.2.key.fc1.weight
encs.1.attn.heads.2.query.fc1.weight
encs.1.attn.heads.3.value.fc1.weight
encs.1.attn.heads.3.key.fc1.weight

In [None]:
X, Y = next(iter(train_loader))

net = Transformer(10, 2, 12, 6, 1, 1, 4, 4)
hnet = TransformerHyper(model=net)

ray = torch.from_numpy(np.random.dirichlet([alpha] * 2, 1).astype(np.float32).flatten()).to(device)
weights = hnet(ray)

transferParameters(net, weights)
pred = net(X)
            
criterion = nn.BCEWithLogitsLoss(pos_weight = torch.tensor([51.0]))
losses = criterion(pred, Y)

torch.autograd.grad(losses, list(hnet.parameters()), retain_graph=True)

In [None]:
net = Transformer(10, 2, 12, 6, 1, 1, 4, 4)
hnet = TransformerHyper(model=net)

ray = torch.from_numpy(np.random.dirichlet([alpha] * 2, 1).astype(np.float32).flatten()).to(device)
weights = hnet(ray)

In [None]:
transferParameters(net, weights)

In [None]:
for i in net.named_parameters():
    print(i)

In [None]:
for i in net.parameters():
    print(i)

In [None]:
for x in weights.items():
    print(x)

In [None]:
for x in net.state_dict().items():
    print(x)