In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from tqdm import trange

from phn import EPOSolver, LinearScalarizationSolver

from transformer.utils import *
from transformer.Network import *

In [2]:
device = torch.device(f"cuda:{gpus}" if torch.cuda.is_available() and not no_cuda else "cpu")

In [3]:
def circle_points(K, min_angle=None, max_angle=None):
    # generate evenly distributed preference vector
    ang0 = 1e-6 if min_angle is None else min_angle
    ang1 = np.pi / 2 - ang0 if max_angle is None else max_angle
    angles = np.linspace(ang0, ang1, K, endpoint=True)
    x = np.cos(angles)
    y = np.sin(angles)
    return np.c_[x, y]

In [17]:
@torch.no_grad()
def evaluate(hypernet, targetnet, loader, rays, device, lossWeight):
    hypernet.eval()
    results = {"ray" : [], "loss" : [], "hv" : []}#defaultdict(list)
    
    for ray in rays:
        ray = torch.from_numpy(ray.astype(np.float32)).to(device)

        ray /= ray.sum()

        total = 0.0
        full_losses = []
        for batch in loader:
            hypernet.zero_grad()

            batch = (t.to(device) for t in batch)
            X, Y = batch
            bs = len(Y)

            weights = hypernet(ray)
            
            transferParameters(targetnet, weights)
            
            pred = targetnet(X)

            # loss
            criterion = nn.BCEWithLogitsLoss(pos_weight = torch.tensor([lossWeight]))
            curr_losses = criterion(pred, Y)
            
            # metrics
            ray = ray.squeeze(0)

            # losses
            full_losses.append(curr_losses.detach().cpu().numpy())
            total += bs

        results["ray"].append(ray.cpu().numpy().tolist())
        results["loss"].append(np.array(full_losses).mean(0).tolist())

    hv = get_performance_indicator(
        "hv",
        ref_point=np.ones(
            7,
        ),
    )
    results["hv"] = hv.do(np.array(results["loss"]))

    return results

In [None]:
def train(net, loaders, solver_type: str, hidden_dim: int, no_val_eval: bool, eval_every: int, alpha: float, 
          n_rays: int, epochs: int, lr: float, wd: float):
    # ----
    # Hypernetwork
    # ----
    hnet = TransformerHyper(ray_hidden_dim=hidden_dim, model=net)

    net = net.to(device)
    hnet = hnet.to(device)

    optimizer = torch.optim.Adam(hnet.parameters(), lr=lr, weight_decay=wd)

    # ------
    # Solver
    # ------
    solvers = dict(ls=LinearScalarizationSolver, epo=EPOSolver)

    solver_method = solvers[solver_type]
    if solver_type == "epo":
        nb_params = sum(p.numel() for p in hnet.parameters() if p.requires_grad)
        solver = solver_method(n_tasks=2, n_params=nb_params)
    else:
        # ls
        solver = solver_method(n_tasks=2)

    # ----
    # Data
    # ----
    
    ### TODO: dataset to test_loader, val_loader, test_loader
    train_loader = loaders['train_loader']
    val_loader = loaders['val_loader']
    test_loader = loaders['test_loader']
    
    ### TODO: DELETE UP

    min_angle = 0.1
    max_angle = np.pi / 2 - 0.1
    test_rays = circle_points(n_rays, min_angle=min_angle, max_angle=max_angle)

    # ----------
    # Train loop
    # ----------
    last_eval = -1
    epoch_iter = trange(epochs)

    val_results = dict()
    test_results = dict()

    trainLossWeight = train_label_ts.count(0) / train_label_ts.count(1)
    valLossWeight = val_label_ts.count(0) / val_label_ts.count(1)
    testLossWeight = test_label_ts.count(0) / test_label_ts.count(1)
    for epoch in epoch_iter:
        for i, batch in enumerate(train_loader):
            hnet.train()
            optimizer.zero_grad()
            X, Y = batch
            X = X.to(device)
            Y = Y.to(device)

            if alpha > 0:
                ray = torch.from_numpy(
                    np.random.dirichlet([alpha] * 2, 1).astype(np.float32).flatten()
                ).to(device)
            else:
                alpha = torch.empty(
                    1,
                ).uniform_(0.0, 1.0)
                ray = torch.tensor([alpha.item(), 1 - alpha.item()]).to(device)

                
            weights = hnet(ray)  
       
            #net.load_state_dict(weights)          
            transferParameters(net, weights)
            
            pred = net(X)
                  
            criterion = nn.BCEWithLogitsLoss(pos_weight = torch.tensor([trainLossWeight]))
            losses = criterion(pred, Y)

            ray = ray.squeeze(0)
            loss = solver(losses, ray, list(hnet.parameters()), feat=X, label=Y, model=net, weights=weights)

            loss.backward()
            
            '''
            epoch_iter.set_description(
                f"total weighted loss: {loss.item():.3f}"
                # f", ray {ray.cpu().numpy().tolist()}"
            )
            '''

            optimizer.step()

        if (epoch + 1) % eval_every == 0:
            last_eval = epoch
            if not no_val_eval:
                epoch_results = evaluate(
                    hypernet=hnet,
                    targetnet=net,
                    loader=val_loader,
                    rays=test_rays,
                    device=device,
                    lossWeight=valLossWeight
                )
                #val_results[f"epoch_{epoch + 1}"] = epoch_results

            test_epoch_results = evaluate(
                hypernet=hnet,
                targetnet=net,
                loader=test_loader,
                rays=test_rays,
                device=device,
                lossWeight=testLossWeight
            )
            #test_results[f"epoch_{epoch + 1}"] = test_epoch_results

    if epoch != last_eval:
        if not no_val_eval:
            epoch_results = evaluate(
                hypernet=hnet,
                targetnet=net,
                loader=val_loader,
                rays=test_rays,
                device=device,
                lossWeight=valLossWeight
            )
            #val_results[f"epoch_{epoch + 1}"] = epoch_results

        test_epoch_results = evaluate(
            hypernet=hnet,
            targetnet=net,
            loader=test_loader,
            rays=test_rays,
            device=device,
            lossWeight=testLossWeight
        )
        #test_results[f"epoch_{epoch + 1}"] = test_epoch_results
        
    return net

In [6]:
from experiments.multimnist.data import Dataset

from models import TransformerHyper, TargetTransformer

In [7]:
df = pd.read_csv('data/pre_processed_dataset.csv')

# Train = 0.6
# Val = 0.1
# Test = 0.3
from sklearn.model_selection import train_test_split

samplelist = df["Patient_ID"].unique()
training_samp, split_samp = train_test_split(samplelist, train_size=0.6, test_size=0.4, random_state=5, shuffle=True)
validation_samp, test_samp = train_test_split(samplelist, train_size=0.25, test_size=0.75, random_state=5, shuffle=True)
    
train_df = df[df['Patient_ID'].isin(training_samp)]
val_df = df[df['Patient_ID'].isin(validation_samp)]
test_df = df[df['Patient_ID'].isin(test_samp)]

In [8]:
def splitDatasetPerPatient(dataset, window_size=6):
    data = []
    label = []

    for patientId in dataset['Patient_ID'].unique():
        tmp_data = dataset[dataset['Patient_ID'] == patientId]
        if(len(tmp_data) >= window_size):
            data.append(tmp_data.drop(['Hour', 'Patient_ID', 'SepsisLabel'], axis=1).to_numpy())
            label.append(tmp_data['SepsisLabel'].to_numpy())
            
    return data, label

In [9]:
train_data, train_label = splitDatasetPerPatient(train_df)
val_data, val_label = splitDatasetPerPatient(val_df)
test_data, test_label = splitDatasetPerPatient(test_df)

In [10]:
def toTimeSeriesDataloader(feat, label, window_size=6):
    data_labels = []

    # One patient per batch
    data_loader = []

    for i in range(len(feat)):
        patient_data = feat[i]
        labels = label[i]
        X_data = []
        Y_data = []

        for j in range(len(patient_data) - (window_size - 1)):
            X_data.append(patient_data[j:(j + window_size)])
            Y_data.append([labels[(j + window_size - 1)]])
            data_labels.append(labels[(j + window_size - 1)])

        data_loader.append([torch.Tensor(X_data), torch.Tensor(Y_data)])
        
    return data_loader, data_labels

In [11]:
train_loader, train_label_ts = toTimeSeriesDataloader(train_data, train_label)
val_loader, val_label_ts = toTimeSeriesDataloader(val_data, val_label)
test_loader, test_label_ts = toTimeSeriesDataloader(test_data, test_label)

  data_loader.append([torch.Tensor(X_data), torch.Tensor(Y_data)])


In [12]:
loaders = {'train_loader': train_loader, 'val_loader': val_loader, 'test_loader': test_loader}

In [13]:
def transferParameters(model, weights):
    for name in weights:
        obj = model._modules
        #names = name.replace(".", "/").split('/')
        names = name.split('.')
        index = 0

        while(index < len(names) - 1):

            if (obj.__class__.__name__ == 'OrderedDict'): # Dict
                obj = obj[names[index]]
            elif (obj.__class__.__name__ == 'ModuleList'): # List
                obj = obj[int(names[index])]
            else: # Object
                obj = getattr(obj, names[index])

            index += 1

        #name = name.replace("/", ".")
        if(names[index] == 'weight'):
            del obj.weight           
            obj.weight = weights[name]
            #model.register_parameter(name.replace(".", "/"), nn.Parameter(weights[name]))
        elif(names[index] == 'bias'):
            del obj.bias
            obj.bias = weights[name]
            #model.register_parameter(name.replace(".", "/"), nn.Parameter(weights[name]))
        elif(names[index] == 'pe'):
            del obj.pe
            obj.pe = weights[name]
            #model.register_parameter(name.replace(".", "/"), nn.Parameter(weights[name]))

In [None]:
net = Transformer(10, 2, 12, 6, 1, 1, 4, 4)

trained_net = train(
    net = net,
    loaders=loaders,
    solver_type="epo",       
    hidden_dim=10,  #100
    eval_every=1,
    no_val_eval=False,
    alpha=0.2,
    n_rays = 25,
    epochs = 6,
    lr = 1e-2,
    wd = 0.0
)


  0%|          | 0/6 [00:00<?, ?it/s]

epoch: 0


In [None]:
np.random.dirichlet(0.0 * 2, 1).astype(np.float32).flatten()

In [None]:
np.random.dirichlet(0.5 * 2, 1).astype(np.float32).flatten()

In [None]:
np.random.dirichlet(0.1 * 2, 1).astype(np.float32).flatten()

In [None]:
X, Y = next(iter(train_loader))

net = Transformer(10, 2, 12, 6, 1, 1, 4, 4)
hnet = TransformerHyper(model=net)

ray = torch.from_numpy(np.random.dirichlet([alpha] * 2, 1).astype(np.float32).flatten()).to(device)
weights = hnet(ray)

transferParameters(net, weights)
pred = net(X)
            
criterion = nn.BCEWithLogitsLoss(pos_weight = torch.tensor([51.0]))
losses = criterion(pred, Y)

torch.autograd.grad(losses, list(hnet.parameters()), retain_graph=True)

In [None]:
net = Transformer(10, 2, 12, 6, 1, 1, 4, 4)
hnet = TransformerHyper(model=net)

ray = torch.from_numpy(np.random.dirichlet([alpha] * 2, 1).astype(np.float32).flatten()).to(device)
weights = hnet(ray)

In [None]:
transferParameters(net, weights)

In [None]:
for i in net.named_parameters():
    print(i)

In [None]:
for i in net.parameters():
    print(i)

In [None]:
for x in weights.items():
    print(x)

In [None]:
for x in net.state_dict().items():
    print(x)