### Import data and declare hyperparameters

In [1]:
import numpy as np

from utils import *

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from data_loading import *

from standardized_residuals import StandardizedResiduals

seed_everything(42)

In [2]:
load_path = "../data/processed_data_3Dmin/casp.npz"

X, Y = load_data(load_path)

normalize = True
splits = [0.7, 0.1, 0.1, 0.1]

subsets = split_and_preprocess(X, Y, splits=splits, normalize=normalize)

x_train, y_train, x_calibration, y_calibration, x_test, y_test, x_stop, y_stop = subsets["X_train"], subsets["Y_train"], subsets["X_calibration"], subsets["Y_calibration"], subsets["X_test"], subsets["Y_test"], subsets["X_stop"], subsets["Y_stop"]

print("X_train shape:", x_train.shape, "Y_train shape:", y_train.shape)
print("X_cal shape:", x_calibration.shape, "Y_cal shape:", y_calibration.shape)
print("X_test shape:", x_test.shape, "Y_test shape:", y_test.shape)
print("X_stop shape:", x_stop.shape, "Y_stop shape:", y_stop.shape)


input_dim = x_train.shape[1]
output_dim = y_train.shape[1]

n_train = x_train.shape[0]
n_test = x_test.shape[0]
n_calibration = x_calibration.shape[0]
n_stop = x_stop.shape[0]

dtype = torch.float32

x_train_tensor = torch.tensor(x_train, dtype=dtype)
y_train_tensor = torch.tensor(y_train, dtype=dtype)
x_stop_tensor = torch.tensor(x_stop, dtype=dtype)
y_stop_tensor = torch.tensor(y_stop, dtype=dtype)
x_calibration_tensor = torch.tensor(x_calibration, dtype=dtype)
y_calibration_tensor = torch.tensor(y_calibration, dtype=dtype)
x_test_tensor = torch.tensor(x_test, dtype=dtype)
y_test_tensor = torch.tensor(y_test, dtype=dtype)

alpha = 0.1

X_train shape: (32010, 7) Y_train shape: (32010, 3)
X_cal shape: (4573, 7) Y_cal shape: (4573, 3)
X_test shape: (4574, 7) Y_test shape: (4574, 3)
X_stop shape: (4573, 7) Y_stop shape: (4573, 3)


### Train the models

In [7]:
class ZeroPred:
    def __init__(self, y_dim):
        self.y_dim = y_dim

    def __call__(self, x):
        return np.zeros((len(x), self.y_dim))

center_model = ZeroPred(output_dim)

In [8]:
seed_everything(42)

hidden_dim = 128
num_layers = 3
batch_size = 32
num_epochs = 30
lr = 1e-3

trainloader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train_tensor, y_train_tensor), batch_size= batch_size, shuffle=True)
stoploader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_stop_tensor, y_stop_tensor), batch_size= batch_size, shuffle=True)

standardized_residuals = StandardizedResiduals(input_dim, 
                            output_dim,
                            hidden_dim = hidden_dim,
                            num_layers = num_layers,
                            center_model = center_model
                            )

standardized_residuals.fit(trainloader, 
                    stoploader,
                    num_epochs=num_epochs,
                    lr=lr,
                    verbose = 2
                    )

Epoch 1: Avg NLL Loss = 0.2670 -- Stop loss: 0.1523 -- Best Stop Loss: inf
Epoch 2: Avg NLL Loss = 0.0900 -- Stop loss: 0.0989 -- Best Stop Loss: 0.15226999519171414
Epoch 3: Avg NLL Loss = 0.0349 -- Stop loss: 0.0134 -- Best Stop Loss: 0.09888782586667921
Epoch 4: Avg NLL Loss = 0.0247 -- Stop loss: 0.0158 -- Best Stop Loss: 0.013418140736493197
Epoch 5: Avg NLL Loss = -0.0168 -- Stop loss: -0.0374 -- Best Stop Loss: 0.013418140736493197
Epoch 6: Avg NLL Loss = -0.0367 -- Stop loss: 0.0472 -- Best Stop Loss: -0.03735227275874231
Epoch 7: Avg NLL Loss = -0.0383 -- Stop loss: 0.0183 -- Best Stop Loss: -0.03735227275874231
Epoch 8: Avg NLL Loss = -0.0613 -- Stop loss: -0.0083 -- Best Stop Loss: -0.03735227275874231
Epoch 9: Avg NLL Loss = -0.0699 -- Stop loss: -0.0471 -- Best Stop Loss: -0.03735227275874231
Epoch 10: Avg NLL Loss = -0.0868 -- Stop loss: -0.0504 -- Best Stop Loss: -0.04709116132421927
Epoch 11: Avg NLL Loss = -0.0961 -- Stop loss: -0.1067 -- Best Stop Loss: -0.05035777627

In [7]:
mu, Sigma = standardized_residuals.get_distribution(x_test_tensor)

### Full output

In [4]:
standardized_residuals.conformalize(x=x_calibration_tensor, y=y_calibration_tensor, alpha = alpha)

5.511889457702637

In [5]:
coverage = standardized_residuals.get_coverage(x_test_tensor, y_test_tensor)
volumes  = standardized_residuals.get_average_volume(x_test_tensor)

print("Coverage:", coverage)
print("Average Volume:", volumes)

Coverage: 0.9121119379997253
Average Volume: 1.5569067001342773


### Revealed outputs

In [6]:
idx_knowned = np.array([0])

standardized_residuals.conformalize_revealed(idx_revealed = idx_knowned,
                                            x = x_calibration_tensor, 
                                            y = y_calibration_tensor, 
                                            alpha = alpha
                                            )

3.5451841354370117

In [7]:
coverage = standardized_residuals.get_coverage_revealed(x_test_tensor, y_test_tensor)
volumes  = standardized_residuals.get_average_volume_revealed(x_test_tensor, y_test_tensor[:, idx_knowned])

print("Coverage:", coverage)
print("Average Volume:", volumes)

TODO : checker si la definition du q_alpha reste la meme.
Coverage: 0.914516806602478
Average Volume: 1.5590709447860718


### Projection of the output

In [8]:
projection_matrix_tensor =  torch.randn((2, output_dim), dtype=dtype)

standardized_residuals.conformalize_projection(
                                            projection_matrix = projection_matrix_tensor,
                                            x = x_calibration_tensor, 
                                            y = y_calibration_tensor, 
                                            alpha = alpha
                                            )

4.126266956329346

In [9]:
coverage = standardized_residuals.get_coverage_projection(x_test_tensor, y_test_tensor)
volumes  = standardized_residuals.get_average_volume_projection(x_test_tensor)

print("Coverage:", coverage)
print("Average Volume:", volumes)

Coverage: 0.9112374186515808
Average Volume: 1.7222936153411865


### Missing outputs

In [10]:
# Add NaN values to the calibration and test sets

y_calibration_nan = add_nan(y_calibration, min_nan=1, max_nan=output_dim-1)
y_calibration_nan_tensor = torch.tensor(y_calibration_nan, dtype=dtype)

y_test_nan = add_nan(y_test, min_nan=1, max_nan=output_dim-1)
y_test_nan_tensor = torch.tensor(y_test_nan, dtype=dtype)

In [11]:
standardized_residuals.conformalize_missing(x = x_calibration_tensor,
                                            y = y_calibration_nan_tensor, 
                                            alpha = alpha
                                            )

0.8660470843315125

In [12]:
coverage_with_nan    = standardized_residuals.get_coverage_missing(x_test_tensor, y_test_nan_tensor)
coverage_full_vector = standardized_residuals.get_coverage(x_test_tensor, y_test_tensor)

print("Coverage with NaN:", coverage_with_nan)
print("Coverage full vector:", coverage_full_vector)

Coverage with NaN: 0.9079580307006836
Coverage full vector: 0.9121119379997253
