In [1]:
from dante.latent_features.extractor import ModelFeatureExtractor
from examples.TITAN.data_loading import PrepareTitan

import os
import torch

# Prepare data and model

```titan_data_dir``` should correspond to the path where data from https://ibm.box.com/v/titan-dataset is stored.

In [2]:
titan_data_dir = './titan_data'
model_dir = os.path.join(titan_data_dir, 'trained_model')

In [3]:
titan = PrepareTitan(model_path='../anomaly/titan_data/trained_model/')

In [4]:
train_loader = titan.get_data(
    test=False,
    affinity_filepath=os.path.join(titan_data_dir, f'trained_model_train.csv'),
    receptor_filepath=os.path.join(titan_data_dir, 'tcr_full.csv'),
    ligand_filepath=os.path.join(titan_data_dir, 'epitopes.smi')
)

test_loader = titan.get_data(
    test=True,
    affinity_filepath=os.path.join(titan_data_dir, f'trained_model_test.csv'),
    receptor_filepath=os.path.join(titan_data_dir, 'tcr_full.csv'),
    ligand_filepath=os.path.join(titan_data_dir, 'epitopes.smi')
)

# Load model
model = titan.get_model()
model.cuda()
model.eval();

# Extract hidden activations

To choose a layer indicate the full submodule list that can be explored calling ```model```.

In [5]:
layers = [['dense_layers', 'dense_0'], ['dense_layers', 'dense_1'], ['dense_layers', 'dense_2']]

Override ```batch_to_input``` to indicate which part of the batch contains the model's input.

In [6]:
class TitanExtractor(ModelFeatureExtractor):
    
    def batch_to_input(self, batch):
        
        l, r, y = batch
        
        return l, r

- Extract features from the given model ```model```, for each instance in ```test_loader```.
- The output will be stored in the ```out_dir``` ```output```, that should be either created if non-existing or replaced by another path.

In [7]:
t = TitanExtractor(
    model = model, 
    dataloaders = [(test_loader, 'test')], 
    layers = layers,
    model_name = 'prova13',
    out_dir = './output'
).run()

  0%|          | 0/18 [00:00<?, ?it/s]

0
0
0
0
0
0
0
0


KeyboardInterrupt: 

# Save prediction (aleatoric) uncertainty and true labels

In [9]:
def extract_y_hat_true(model, data_loader):
    
    y_true = []
    y_hat = []

    for l, r, batch_y_true in data_loader:

        batch_y_true = batch_y_true.cpu()
        batch_y_true = batch_y_true.flatten()
        y_true.append(batch_y_true)

        with torch.no_grad():
            batch_y_hat, _ = model(l, r)

        batch_y_hat = batch_y_hat.cpu()
        batch_y_hat = batch_y_hat.flatten()
        y_hat.append(batch_y_hat)

    y_hat = torch.cat(y_hat)
    y_true = torch.cat(y_true)
    
    return y_hat, y_true

In [10]:
y_hat, y_true = extract_y_hat_true(model, test_loader)

# Extract Epistemic Uncertainty

## Mahalanobis

In [11]:
from dante.layer_scoring.mahalanobis import Mahalanobis

In [14]:
data_path = './output'
m_name = 'prova7'
layer = 'dense_0'

data_path = os.path.join(data_path, m_name)

fpath = os.path.join(data_path, f'{m_name}_latent_test_{layer}.pt')
latent_train = torch.load(fpath).cpu()
    
maha = Mahalanobis().fit(latent_train.numpy(), y_true.numpy())
    
fpath = os.path.join(data_path, f'{m_name}_latent_test_{layer}.pt')
latent_test = torch.load(fpath).cpu()
    
maha_scores = -maha.score(latent_test.numpy()).numpy()
    
#fpath = os.path.join(data_path, f'{m_name}_maha_{layer}.npy')
#np.save(fpath, maha_scores)

ValueError: Expected 2D array, got 1D array instead:
array=[-0.0027213   0.          0.         ...  0.00103707  0.0094914
 -0.00138495].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.