In [26]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from tqdm import trange

from phn import EPOSolver, LinearScalarizationSolver

In [41]:
device = torch.device(f"cuda:{gpus}" if torch.cuda.is_available() and not no_cuda else "cpu")

lr = 1e-4
wd = 0.0
batch_size = 256
n_rays = 25
alpha = 0.2
epochs = 5

In [27]:
def circle_points(K, min_angle=None, max_angle=None):
    # generate evenly distributed preference vector
    ang0 = 1e-6 if min_angle is None else min_angle
    ang1 = np.pi / 2 - ang0 if max_angle is None else max_angle
    angles = np.linspace(ang0, ang1, K, endpoint=True)
    x = np.cos(angles)
    y = np.sin(angles)
    return np.c_[x, y]

In [12]:
@torch.no_grad()
def evaluate(hypernet, targetnet, loader, rays, device):
    hypernet.eval()
    loss1 = nn.CrossEntropyLoss()
    loss2 = nn.CrossEntropyLoss()

    results = defaultdict(list)

    for ray in rays:
        total = 0.0
        task1_correct, task2_correct = 0.0, 0.0
        l1, l2 = 0.0, 0.0
        ray = torch.from_numpy(ray.astype(np.float32)).to(device)
        ray /= ray.sum()

        for batch in loader:
            hypernet.zero_grad()

            batch = (t.to(device) for t in batch)
            X, Y = batch
            bs = len(Y)

            weights = hypernet(ray)
            logit1, logit2 = targetnet(X, weights)

            # loss
            curr_l1 = loss1(logit1, Y[:, 0])
            curr_l2 = loss2(logit2, Y[:, 1])
            l1 += curr_l1 * bs
            l2 += curr_l2 * bs

            # acc
            pred1 = logit1.data.max(1)[1]  # first column has actual prob.
            pred2 = logit2.data.max(1)[1]  # first column has actual prob.
            task1_correct += pred1.eq(Y[:, 0]).sum()
            task2_correct += pred2.eq(Y[:, 1]).sum()

            total += bs

        results["ray"].append(ray.squeeze(0).cpu().numpy().tolist())
        results["task1_acc"].append(task1_correct.cpu().item() / total)
        results["task2_acc"].append(task2_correct.cpu().item() / total)
        results["task1_loss"].append(l1.cpu().item() / total)
        results["task2_loss"].append(l2.cpu().item() / total)

    return results

In [44]:
def train(loaders, solver_type: str, hidden_dim: int, no_val_eval: bool, eval_every: int, alpha: float):
    # ----
    # Nets
    # ----
    hnet = LeNetHyper([9, 5], ray_hidden_dim=hidden_dim)   #HN          LeNetHyper()
    net = LeNetTarget([9, 5])                              #transformer  LeNetTarget()

    hnet = hnet.to(device)
    net = net.to(device)

    # ---------
    # Task loss
    # ---------
    loss1 = nn.CrossEntropyLoss()
    loss2 = nn.CrossEntropyLoss()

    optimizer = torch.optim.Adam(hnet.parameters(), lr=lr, weight_decay=wd)

    # ------
    # solver
    # ------
    solvers = dict(ls=LinearScalarizationSolver, epo=EPOSolver)

    solver_method = solvers[solver_type]
    if solver_type == "epo":
        nb_params = sum(p.numel() for p in hnet.parameters() if p.requires_grad)
        solver = solver_method(n_tasks=2, n_params=nb_params)
    else:
        # ls
        solver = solver_method(n_tasks=2)

    # ----
    # data
    # ----
    
    ### TODO: dataset to test_loader, val_loader, test_loader
    train_loader = loaders['train_loader']
    val_loader = loaders['val_loader']
    test_loader = loaders['test_loader']
    
    ### TODO: DELETE UP

    min_angle = 0.1
    max_angle = np.pi / 2 - 0.1
    test_rays = circle_points(n_rays, min_angle=min_angle, max_angle=max_angle)

    # ----------
    # Train loop
    # ----------
    last_eval = -1
    epoch_iter = trange(epochs)

    val_results = dict()
    test_results = dict()

    for epoch in epoch_iter:

        for i, batch in enumerate(train_loader):
            hnet.train()
            optimizer.zero_grad()
            X, Y = batch
            X = X.to(device)
            Y = Y.to(device)

            if alpha > 0:
                ray = torch.from_numpy(
                    np.random.dirichlet((alpha, alpha), 1).astype(np.float32).flatten()
                ).to(device)
            else:
                alpha = torch.empty(
                    1,
                ).uniform_(0.0, 1.0)
                ray = torch.tensor([alpha.item(), 1 - alpha.item()]).to(device)

            weights = hnet(ray)
            logit1, logit2 = net(X, weights)

            l1 = loss1(logit1, Y[:, 0])
            l2 = loss2(logit2, Y[:, 1])
            losses = torch.stack((l1, l2))

            ray = ray.squeeze(0)
            loss = solver(losses, ray, list(hnet.parameters()), feat=X, label=Y, model=net)       #ajout des paramètres feat, label et model

            loss.backward()

            epoch_iter.set_description(
                f"total weighted loss: {loss.item():.3f}, loss 1: {l1.item():.3f}, loss 2: {l2.item():.3f}"
                # f", ray {ray.cpu().numpy().tolist()}"
            )

            optimizer.step()

        if (epoch + 1) % eval_every == 0:
            last_eval = epoch
            if not no_val_eval:
                epoch_results = evaluate(
                    hypernet=hnet,
                    targetnet=net,
                    loader=val_loader,
                    rays=test_rays,
                    device=device,
                )
                val_results[f"epoch_{epoch + 1}"] = epoch_results

            test_epoch_results = evaluate(
                hypernet=hnet,
                targetnet=net,
                loader=test_loader,
                rays=test_rays,
                device=device,
            )
            test_results[f"epoch_{epoch + 1}"] = test_epoch_results

    if epoch != last_eval:
        if not no_val_eval:
            epoch_results = evaluate(
                hypernet=hnet,
                targetnet=net,
                loader=val_loader,
                rays=test_rays,
                device=device,
            )
            val_results[f"epoch_{epoch + 1}"] = epoch_results

        test_epoch_results = evaluate(
            hypernet=hnet,
            targetnet=net,
            loader=test_loader,
            rays=test_rays,
            device=device,
        )
        test_results[f"epoch_{epoch + 1}"] = test_epoch_results

In [23]:
from experiments.multimnist.data import Dataset

from experiments.multimnist.models import (
    LeNetHyper,
    LeNetTarget,
    ResnetHyper,
    ResNetTarget,
)

In [29]:
df = pd.read_csv('data/pre_processed_dataset.csv')

# Train = 0.6
# Val = 0.1
# Test = 0.3
from sklearn.model_selection import train_test_split

samplelist = df["Patient_ID"].unique()
training_samp, split_samp = train_test_split(samplelist, train_size=0.6, test_size=0.4, random_state=5, shuffle=True)
validation_samp, test_samp = train_test_split(samplelist, train_size=0.25, test_size=0.75, random_state=5, shuffle=True)
    
train_df = df[df['Patient_ID'].isin(training_samp)]
val_df = df[df['Patient_ID'].isin(validation_samp)]
test_df = df[df['Patient_ID'].isin(test_samp)]

In [31]:
def splitDatasetPerPatient(dataset, window_size=6):
    data = []
    label = []

    for patientId in dataset['Patient_ID'].unique():
        tmp_data = dataset[dataset['Patient_ID'] == patientId]
        if(len(tmp_data) >= window_size):
            data.append(tmp_data.drop(['Hour', 'Patient_ID', 'SepsisLabel'], axis=1).to_numpy())
            label.append(tmp_data['SepsisLabel'].to_numpy())
            
    return data, label

In [33]:
train_data, train_label = splitDatasetPerPatient(train_df)
val_data, val_label = splitDatasetPerPatient(val_df)
test_data, test_label = splitDatasetPerPatient(test_df)

In [34]:
def toTimeSeriesDataloader(feat, label, window_size=6):
    data_labels = []

    # One patient per batch
    data_loader = []

    for i in range(len(feat)):
        patient_data = feat[i]
        labels = label[i]
        X_data = []
        Y_data = []

        for j in range(len(patient_data) - (window_size - 1)):
            X_data.append(patient_data[j:(j + window_size)])
            Y_data.append([labels[(j + window_size - 1)]])
            data_labels.append(labels[(j + window_size - 1)])

        data_loader.append([torch.Tensor(X_data), torch.Tensor(Y_data)])
        
    return data_loader, data_labels

In [36]:
train_loader, train_label_ts = toTimeSeriesDataloader(train_data, train_label)
val_loader, val_label_ts = toTimeSeriesDataloader(val_data, val_label)
test_loader, test_label_ts = toTimeSeriesDataloader(test_data, test_label)

In [37]:
loaders = {'train_loader': train_loader, 'val_loader': val_loader, 'test_loader': test_loader,}

In [45]:
train(
    loaders=loaders,
    solver_type="epo",       
    hidden_dim=100,
    eval_every=10,
    no_val_eval=False,
    alpha=alpha
)

  0%|          | 0/5 [00:00<?, ?it/s]


RuntimeError: Given groups=1, weight of size [10, 1, 9, 9], expected input[1, 38, 6, 12] to have 1 channels, but got 38 channels instead