# Ray et al 2013 Intepretation
**Authorship:**
Adam Klie, *09/03/2022*
***
**Description:**
Notebook to interpret the best trained models on the Ray et al dataset.
***

In [1]:
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

import os
import glob
import logging
import torch
import numpy as np
import pandas as pd
import eugene as eu
import matplotlib.pyplot as plt
import matplotlib

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

Global seed set to 13
2022-09-03 20:25:06.976903: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-03 20:25:06.976943: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
  min_coords = np.vstack(data.min(0) for data in polygons_data).min(0)
  max_coords = np.vstack(data.max(0) for data in polygons_data).max(0)


In [2]:
eu.settings.dataset_dir = "../../../_datasets/ray13"
eu.settings.output_dir = "../../../_output/ray13"
eu.settings.logging_dir = "../../../_logs/ray13"
eu.settings.config_dir = "../../../_configs/ray13"
figure_dir = "../../../_figures/ray13"
eu.settings.verbosity = logging.ERROR

# Load the test `SeqData`

In [29]:
sdata_test = eu.dl.read_h5sd(os.path.join(eu.settings.output_dir, "norm_test_predictions.h5sd"))
target_mask = sdata_test.seqs_annot.columns.str.contains("RNCMPT") & ~sdata_test.seqs_annot.columns.str.contains("MT|ST")
target_cols = sdata_test.seqs_annot.columns[target_mask]
sdata_test

SeqData object with = 100 seqs
seqs = (100,)
names = (100,)
rev_seqs = None
ohe_seqs = (100, 41, 4)
ohe_rev_seqs = None
seqs_annot: 'Probe_Set', 'RNCMPT00001', 'RNCMPT00001_predictions_MT', 'RNCMPT00001_predictions_ST', 'RNCMPT00002', 'RNCMPT00002_predictions_MT', 'RNCMPT00002_predictions_ST', 'RNCMPT00003', 'RNCMPT00003_predictions_MT', 'RNCMPT00003_predictions_ST', 'RNCMPT00004', 'RNCMPT00004_predictions_MT', 'RNCMPT00004_predictions_ST', 'RNCMPT00005', 'RNCMPT00005_predictions_MT', 'RNCMPT00005_predictions_ST', 'RNCMPT00006', 'RNCMPT00006_predictions_MT', 'RNCMPT00007', 'RNCMPT00007_predictions_MT', 'RNCMPT00008', 'RNCMPT00009', 'RNCMPT00010', 'RNCMPT00010_predictions_MT', 'RNCMPT00011', 'RNCMPT00011_predictions_MT', 'RNCMPT00012', 'RNCMPT00012_predictions_MT', 'RNCMPT00013', 'RNCMPT00013_predictions_MT', 'RNCMPT00014', 'RNCMPT00014_predictions_MT', 'RNCMPT00015', 'RNCMPT00016', 'RNCMPT00016_predictions_MT', 'RNCMPT00017', 'RNCMPT00017_predictions_MT', 'RNCMPT00018', 'RNCMPT00018_pr

# Feature Attribution

## Single task

In [40]:
for i, target_col in enumerate(target_cols[:5]):
    print(f"Testing DeepBind SingleTask model on {target_col}")
    model_file = glob.glob(os.path.join(eu.settings.logging_dir, "DeepBind_ST", target_col, "checkpoints", "*"))[0]
    model = eu.models.DeepBind.load_from_checkpoint(model_file)
    # Get predictions on the training data
    
    saliency_methods = ["InputXGradient", "DeepLift", "GradientSHAP"]
    eu.interpret.feature_attribution(
        model,
        sdata_test,
        saliency_method="InputXGradient",
        prefix=f"ST_{target_col}"
    )

Testing DeepBind SingleTask model on RNCMPT00001
No transforms given, assuming just need to tensorize).


Computing saliency on batches: 0it [00:00, ?it/s]

Testing DeepBind SingleTask model on RNCMPT00002
No transforms given, assuming just need to tensorize).


Computing saliency on batches: 0it [00:00, ?it/s]

Testing DeepBind SingleTask model on RNCMPT00003
No transforms given, assuming just need to tensorize).


Computing saliency on batches: 0it [00:00, ?it/s]

Testing DeepBind SingleTask model on RNCMPT00004
No transforms given, assuming just need to tensorize).


Computing saliency on batches: 0it [00:00, ?it/s]

Testing DeepBind SingleTask model on RNCMPT00005
No transforms given, assuming just need to tensorize).


Computing saliency on batches: 0it [00:00, ?it/s]

## Multi-task

In [46]:
print(f"Testing DeepBind MultiTask model on {target_cols[0]}")
version = 0
model_file = glob.glob(os.path.join(eu.settings.logging_dir, "DeepBind_MT", f"v{version}", "checkpoints", "*"))[0]
model = eu.models.DeepBind.load_from_checkpoint(model_file)

saliency_methods = ["InputXGradient", "DeepLift", "GradientSHAP"]
for i, target_col in enumerate(target_cols[:5]):
    eu.interpret.feature_attribution(
        model,
        sdata_test,
        saliency_method="InputXGradient",
        target=i,
        prefix=f"MT_{target_cols[i]}"
    )

Testing DeepBind MultiTask model on RNCMPT00001
No transforms given, assuming just need to tensorize).


Computing saliency on batches: 0it [00:00, ?it/s]

No transforms given, assuming just need to tensorize).


Computing saliency on batches: 0it [00:00, ?it/s]

No transforms given, assuming just need to tensorize).


Computing saliency on batches: 0it [00:00, ?it/s]

No transforms given, assuming just need to tensorize).


Computing saliency on batches: 0it [00:00, ?it/s]

No transforms given, assuming just need to tensorize).


Computing saliency on batches: 0it [00:00, ?it/s]

# Filter viz

## Single task 

In [51]:
for i, target_col in enumerate(target_cols[:5]):
    print(f"Testing DeepBind SingleTask model on {target_col}")
    model_file = glob.glob(os.path.join(eu.settings.logging_dir, "DeepBind_ST", target_col, "checkpoints", "*"))[0]
    model = eu.models.DeepBind.load_from_checkpoint(model_file)
    # Get predictions on the training data
    eu.interpret.generate_pfms(model, sdata_test, key_name=f"ST_{target_col}_pfms")

Testing DeepBind SingleTask model on RNCMPT00001
No transforms given, assuming just need to tensorize).


Getting maximial activating seqlets: 0it [00:00, ?it/s]

Getting PFMs from filters:   0%|          | 0/32 [00:00<?, ?it/s]

Testing DeepBind SingleTask model on RNCMPT00002
No transforms given, assuming just need to tensorize).


Getting maximial activating seqlets: 0it [00:00, ?it/s]

Getting PFMs from filters:   0%|          | 0/32 [00:00<?, ?it/s]

Testing DeepBind SingleTask model on RNCMPT00003
No transforms given, assuming just need to tensorize).


Getting maximial activating seqlets: 0it [00:00, ?it/s]

Getting PFMs from filters:   0%|          | 0/32 [00:00<?, ?it/s]

Testing DeepBind SingleTask model on RNCMPT00004
No transforms given, assuming just need to tensorize).


Getting maximial activating seqlets: 0it [00:00, ?it/s]

Getting PFMs from filters:   0%|          | 0/32 [00:00<?, ?it/s]

Testing DeepBind SingleTask model on RNCMPT00005
No transforms given, assuming just need to tensorize).


Getting maximial activating seqlets: 0it [00:00, ?it/s]

Getting PFMs from filters:   0%|          | 0/32 [00:00<?, ?it/s]

## Multitask

In [52]:
#model.to("cuda") #TODO
eu.interpret.generate_pfms(model, sdata_test, key_name=f"MT_pfms")

No transforms given, assuming just need to tensorize).


Getting maximial activating seqlets: 0it [00:00, ?it/s]

Getting PFMs from filters:   0%|          | 0/32 [00:00<?, ?it/s]

In [53]:
sdata_test

SeqData object with = 100 seqs
seqs = (100,)
names = (100,)
rev_seqs = None
ohe_seqs = (100, 41, 4)
ohe_rev_seqs = None
seqs_annot: 'Probe_Set', 'RNCMPT00001', 'RNCMPT00001_predictions_MT', 'RNCMPT00001_predictions_ST', 'RNCMPT00002', 'RNCMPT00002_predictions_MT', 'RNCMPT00002_predictions_ST', 'RNCMPT00003', 'RNCMPT00003_predictions_MT', 'RNCMPT00003_predictions_ST', 'RNCMPT00004', 'RNCMPT00004_predictions_MT', 'RNCMPT00004_predictions_ST', 'RNCMPT00005', 'RNCMPT00005_predictions_MT', 'RNCMPT00005_predictions_ST', 'RNCMPT00006', 'RNCMPT00006_predictions_MT', 'RNCMPT00007', 'RNCMPT00007_predictions_MT', 'RNCMPT00008', 'RNCMPT00009', 'RNCMPT00010', 'RNCMPT00010_predictions_MT', 'RNCMPT00011', 'RNCMPT00011_predictions_MT', 'RNCMPT00012', 'RNCMPT00012_predictions_MT', 'RNCMPT00013', 'RNCMPT00013_predictions_MT', 'RNCMPT00014', 'RNCMPT00014_predictions_MT', 'RNCMPT00015', 'RNCMPT00016', 'RNCMPT00016_predictions_MT', 'RNCMPT00017', 'RNCMPT00017_predictions_MT', 'RNCMPT00018', 'RNCMPT00018_pr

# Save

In [57]:
sdata_test.write_h5sd(os.path.join(eu.settings.output_dir, "norm_test_predictions_and_intepretations.h5sd"))

---

# Scratch