In [5]:
from binn.binn import BINN

model = BINN(
            input_data  = 'data/TestQM.csv', 
            pathways = 'data/pathways.tsv',
            translation_mapping  = 'data/translation.tsv',
            input_data_column = 'Protein',
            activation ='tanh', 
            learning_rate  = 1e-4, 
            n_layers  = 4, 
            scheduler = 'plateau',
            optimizer = 'adam',
            validate  = True,
            n_outputs = 2)
model.layers


ModuleNotFoundError: No module named 'binn'

In [3]:
layers = model.layer_names
layers

[Index(['A0M8Q6', 'O00194', 'O00391', 'O14786', 'O14791', 'O15145', 'O43707',
        'O75369', 'O75594', 'O75636',
        ...
        'Q9HD89', 'Q9UBE0', 'Q9UBR2', 'Q9UBX5', 'Q9UGM3', 'Q9UK55', 'Q9UNW1',
        'Q9Y490', 'Q9Y4L1', 'Q9Y6Z7'],
       dtype='object', length=446),
 Index(['R-HSA-111452', 'R-HSA-111453', 'R-HSA-111471', 'R-HSA-114294',
        'R-HSA-114452', 'R-HSA-75108', 'R-HSA-111465', 'R-HSA-181429',
        'R-HSA-181430', 'R-HSA-210500',
        ...
        'R-HSA-381753', 'R-HSA-9717189', 'R-HSA-9706019', 'R-HSA-9715370',
        'R-HSA-9748787', 'R-HSA-9749641', 'R-HSA-9753281', 'R-HSA-9754706',
        'R-HSA-983189', 'R-HSA-936837'],
       dtype='object', length=953),
 Index(['R-HSA-429914', 'R-HSA-75158', 'R-HSA-5602358', 'R-HSA-8979227',
        'R-HSA-8963693', 'R-HSA-8964540', 'R-HSA-5601884', 'R-HSA-5668541',
        'R-HSA-5662702', 'R-HSA-72086',
        ...
        'R-HSA-2990846', 'R-HSA-202403', 'R-HSA-8956321', 'R-HSA-917977',
        'R-HSA-136807

In [1]:
from binn.Process import generate_pathway_file
pathways, inputs, mapping_to_all_layers = generate_pathway_file(
                                                    pathways = 'data/pathways.tsv',
                                                    input_data = 'data/TestQM.tsv' ,
                                                    translation_mapping = "data/translation.tsv",
                                                    input_data_column = "Protein")


Number of reactome ids before subsetting: 3484
Unique proteins in reactome df: 458
Function called 1 times.
Values in idx_list: 0
Function called 2 times.
Values in idx_list: 652
Function called 3 times.
Values in idx_list: 989
Function called 4 times.
Values in idx_list: 1173
Function called 5 times.
Values in idx_list: 1264
Function called 6 times.
Values in idx_list: 1304
Function called 7 times.
Values in idx_list: 1322
Function called 8 times.
Values in idx_list: 1328
Function called 9 times.
Values in idx_list: 1329
Base case reached
Final number of unique connections in pathway:  1856


In [37]:
from binn.Network import Network
network = Network(inputs=inputs, pathways=pathways, mapping=mapping_to_all_layers)
network.info()

'DiGraph with 1858 nodes and 1884 edges'

In [38]:
import pandas as pd
test_data = pd.read_csv('data/TestQM.tsv', sep="\t")
pathways = pd.read_csv('data/pathways.tsv', sep="\t")
translation = pd.read_csv('data/translation.tsv', sep="\t")
print("Test data (quantmatrix) \n", test_data[['PeptideSequence', 'Protein']].head())
print("Pathways file\n", pathways.head())
print("Translation file\n", translation.head())


Test data (quantmatrix) 
                       PeptideSequence Protein
0  VDRDVAPGTLC(UniMod:4)DVAGWGIVNHAGR  P00746
1  VDRDVAPGTLC(UniMod:4)DVAGWGIVNHAGR  P00746
2                          VDTVDPPYPR  P04004
3                      AVTEQGAELSNEER  P27348
4                     VDVIPVNLPGEHGQR  P02751
Pathways file
          parent          child
0  R-HSA-109581   R-HSA-109606
1  R-HSA-109581   R-HSA-169911
2  R-HSA-109581  R-HSA-5357769
3  R-HSA-109581    R-HSA-75153
4  R-HSA-109582   R-HSA-140877
Translation file
    Unnamed: 0       input    translation
0        1323  A0A075B6P5   R-HSA-166663
1        1324  A0A075B6P5   R-HSA-173623
2        1325  A0A075B6P5   R-HSA-198933
3        1326  A0A075B6P5   R-HSA-202733
4        1327  A0A075B6P5  R-HSA-2029481


In [6]:
from binn.NN import BINN
import torch.nn as nn
import torch

# we can also pass a list of activations
activations = [nn.Sigmoid(), nn.Tanh(), nn.ReLU(), nn.ReLU()]
# and a list of dropout ratios
dropouts = [0.5, 0.3, 0.1, 0.1]

model = BINN(
            input_data  = 'data/TestQM.tsv', 
            pathways = 'data/pathways.tsv',
            translation_mapping  = 'data/translation.tsv',
            input_data_column = 'Protein',
            activation = activations, 
            n_layers  = 4, 
            dropout = dropouts,
            optimizer="adam",
            scheduler = "plateau",
            validate  = True,
            n_outputs = 2)
model

Number of reactome ids before subsetting: 3484
Unique proteins in reactome df: 458
Function called 1 times.
Values in idx_list: 0
Function called 2 times.
Values in idx_list: 652
Function called 3 times.
Values in idx_list: 989
Function called 4 times.
Values in idx_list: 1173
Function called 5 times.
Values in idx_list: 1264
Function called 6 times.
Values in idx_list: 1304
Function called 7 times.
Values in idx_list: 1322
Function called 8 times.
Values in idx_list: 1328
Function called 9 times.
Values in idx_list: 1329
Base case reached
Final number of unique connections in pathway:  1856
Network:  DiGraph with 1858 nodes and 1884 edges
Number of copies made for 4 layers: 72


BINN(
  (layers): Sequential(
    (Layer_0): Linear(in_features=446, out_features=953, bias=True)
    (BatchNorm_0): BatchNorm1d(953, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (Dropout_0): Dropout(p=0.5, inplace=False)
    (Activation_0): Sigmoid()
    (Layer_1): Linear(in_features=953, out_features=455, bias=True)
    (BatchNorm_1): BatchNorm1d(455, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (Dropout_1): Dropout(p=0.3, inplace=False)
    (Activation_1): Tanh()
    (Layer_2): Linear(in_features=455, out_features=162, bias=True)
    (BatchNorm_2): BatchNorm1d(162, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (Dropout_2): Dropout(p=0.1, inplace=False)
    (Activation_2): ReLU()
    (Layer_3): Linear(in_features=162, out_features=28, bias=True)
    (BatchNorm_3): BatchNorm1d(28, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (Dropout_3): Dropout(p=0.1, inplace=False)
    (Activation_3): ReLU()
 