In [1]:
from binn import BINNClassifier, Network, SuperLogger, BINNExplainer
import pandas as pd

input_data = pd.read_csv("../data/TestQM.csv")
translation = pd.read_csv("../data/translation.tsv", sep="\t")
pathways = pd.read_csv("../data/pathways.tsv", sep="\t")

# Create the model
network = Network(
    input_data=input_data,
    pathways=pathways,
    mapping=translation,
    verbose=False
)

binn = BINNClassifier(
    pathways=network,
    n_layers=4,
    dropout=0.2,
    validate=False,
    epochs=10,
    threads=10,
    logger=SuperLogger("logs/test"),
)

  from .autonotebook import tqdm as notebook_tqdm


Base case reached
Final number of unique connections in pathway:  1003


Missing logger folder: logs/test/lightning_logs


Number of copies made for 4 layers: 26


In [2]:
# Train the model

from UtilForExamples import fit_protein_matrix_to_network_input, generate_data
import torch
from pytorch_lightning import Trainer

design_matrix = pd.read_csv('../data/design_matrix.tsv', sep="\t")
protein_matrix = pd.read_csv('../data/TestQM.csv')

protein_matrix = fit_protein_matrix_to_network_input(
    protein_matrix, features=network.inputs)

X, y = generate_data(protein_matrix, design_matrix=design_matrix)

dataloader = torch.utils.data.DataLoader(dataset=torch.utils.data.TensorDataset(torch.Tensor(X), torch.LongTensor(y)),
                                            batch_size=8,
                                            num_workers=12,
                                            shuffle=True)
trainer = Trainer(max_epochs=10)
trainer.fit(binn.clf, dataloader)

GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available but not used. Set `accelerator` and `devices` using `Trainer(accelerator='gpu', devices=1)`.
You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.

  | Name   | Type             | Params
--------------------------------------------
0 | layers | Sequential       | 364 K 
1 | loss   | CrossEntropyLoss | 0     
--------------------------------------------
364 K     Trainable params
0         Non-trainable params
364 K     Total params
1.457     Total estimated model params size (MB)
The number of training batches (25) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 9: 100%|██████████| 25/25 [00:00<00:00, 49.52it/s, loss=0.591, v_num=4, train_loss=0.584, train_acc=0.746]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 25/25 [00:00<00:00, 45.54it/s, loss=0.591, v_num=4, train_loss=0.584, train_acc=0.746]


In [3]:
from binn.shap import BINNExplainer

test_data = torch.Tensor(X)
background_data = torch.Tensor(X)

explainer = BINNExplainer(binn.clf)

importance_df = explainer.explain(test_data, background_data)

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.


Number of copies made for 4 layers: 26


In [4]:
importance_df

Unnamed: 0,source,target,value,type,source layer,target layer
0,A0M8Q6_0,R-HSA-166663_1,0.002587,0,0,1
1,A0M8Q6_0,R-HSA-166663_1,0.002143,1,0,1
2,A0M8Q6_0,R-HSA-977606_1,0.002587,0,0,1
3,A0M8Q6_0,R-HSA-977606_1,0.002143,1,0,1
4,A0M8Q6_0,R-HSA-2029481_1,0.002587,0,0,1
...,...,...,...,...,...,...
6901,R-HSA-162582_4,root_5,0.021227,1,4,5
6902,R-HSA-74160_4,root_5,0.002481,0,4,5
6903,R-HSA-74160_4,root_5,0.012042,1,4,5
6904,R-HSA-4839726_4,root_5,0.110370,0,4,5


In [5]:
from binn.plot import complete_shap_sankey

complete_shap_sankey(importance_df, savename='test.png')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

In [6]:
from binn.network import ImportanceNetwork

IG = ImportanceNetwork(importance_df)

query_node = 'P02766'

IG.generate_sankey(query_node, upstream=False)

TypeError: create_graph() takes 1 positional argument but 2 were given