In [1]:
from binn import BINNClassifier, Network, SuperLogger, BINNExplainer
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
pathways = pd.read_csv("../data/pathways.tsv", sep="\t")
pathways

Unnamed: 0,parent,child
0,R-HSA-109581,R-HSA-109606
1,R-HSA-109581,R-HSA-169911
2,R-HSA-109581,R-HSA-5357769
3,R-HSA-109581,R-HSA-75153
4,R-HSA-109582,R-HSA-140877
...,...,...
2598,R-HSA-983705,R-HSA-983695
2599,R-HSA-983712,R-HSA-2672351
2600,R-HSA-983712,R-HSA-936837
2601,R-HSA-991365,R-HSA-170670


In [3]:
translation = pd.read_csv("../data/translation.tsv", sep="\t")
translation

Unnamed: 0.1,Unnamed: 0,input,translation
0,1323,A0A075B6P5,R-HSA-166663
1,1324,A0A075B6P5,R-HSA-173623
2,1325,A0A075B6P5,R-HSA-198933
3,1326,A0A075B6P5,R-HSA-202733
4,1327,A0A075B6P5,R-HSA-2029481
...,...,...,...
49731,311659,Q9Y6Y9,R-HSA-937072
49732,311660,Q9Y6Y9,R-HSA-9707616
49733,311661,Q9Y6Y9,R-HSA-975163
49734,311662,Q9Y6Z7,R-HSA-166662


In [4]:
input_data = pd.read_csv("../data/TestQM.tsv", sep="\t")
input_data

Unnamed: 0,PeptideSequence,Charge,Decoy,Protein,CK_P1912_146,CK_P1912_147,CK_P1912_148,CK_P1912_150,CK_P1912_151,CK_P1912_152,...,TM_M2012_191,TM_M2012_192,TM_M2012_196,TM_M2012_197,TM_M2012_198,TM_M2012_199,TM_M2012_200,TM_M2012_202,TM_M2012_203,RetentionTime
0,VDRDVAPGTLC(UniMod:4)DVAGWGIVNHAGR,3,False,P00746,7238870.0,,,,,,...,,,,,,,,,,3749.820
1,VDRDVAPGTLC(UniMod:4)DVAGWGIVNHAGR,4,False,P00746,2681940.0,2634110.0,2297470.0,1935300.0,2181160.0,2615960.0,...,,519698.0,,,,,,2221730.0,,3593.610
2,VDTVDPPYPR,2,False,P04004,28535800.0,34874600.0,34586900.0,25820800.0,24657400.0,30830100.0,...,12486000.0,11995900.0,24003800.0,9802000.0,6933130.0,7297560.0,4328240.0,13002400.0,4716600.0,2502.150
3,AVTEQGAELSNEER,2,False,P27348,,,,,,,...,,,,340523.0,336960.0,435119.0,257422.0,,,1790.840
4,VDVIPVNLPGEHGQR,2,False,P02751,652100.0,,,,,,...,,,,,,,,,,3158.430
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7331,GSPMEISLPIALSK,3,False,P09960,,,,,,,...,,,,,,,,,,4348.000
7332,NAYAVLYDIILK,3,False,Q06323,,,,,,,...,,,,,,,,,,5361.965
7333,PVWLGFLGPIIK,3,False,P00450,,,,,,,...,3247630.0,,1245630.0,,1041380.0,1032760.0,881958.0,1441870.0,564108.0,5510.955
7334,RVLVTGAGK,2,False,Q7Z4W1,,,,,,,...,,,221930.0,319971.0,447743.0,1107330.0,546236.0,101497.0,293407.0,1011.060


In [5]:
design_matrix = pd.read_csv("../data/design_matrix.tsv", sep="\t")
design_matrix

Unnamed: 0,sample,group
0,TM_P1911_190,2
1,TM_P1911_191,2
2,TM_P1911_192,2
3,TM_P1911_193,2
4,TM_P1911_194,2
...,...,...
192,TM_M2012_198,2
193,TM_M2012_199,2
194,TM_M2012_200,2
195,TM_M2012_202,2


In [6]:
from dpks.quant_matrix import QuantMatrix

quantified_proteins = QuantMatrix(
    quantification_file=input_data,
    design_matrix_file=design_matrix
)\
    .normalize(method="mean")\
    .quantify(method="maxlfq", level="protein", threads=10)\
    .compare_groups(
        method="linregress",
        group_a=1,
        group_b=2
    )

In [7]:
quantified_proteins.to_df()

Unnamed: 0,Protein,Group1Mean,Group2Mean,Log2FoldChange1-2,PValues1-2,Group1RepCounts,Group2RepCounts,CorrectedPValue,TM_P1911_190,TM_P1911_191,...,TM_M2012_190,TM_M2012_191,TM_M2012_192,TM_M2012_196,TM_M2012_197,TM_M2012_198,TM_M2012_199,TM_M2012_200,TM_M2012_202,TM_M2012_203
0,P08603,22.704976,22.778449,-0.073473,0.167402,74,123,0.134529,22.491483,22.858132,...,23.207890,23.246586,23.313050,23.021662,22.886529,23.148738,23.224264,23.173166,23.788795,23.174173
1,P02671,24.711628,24.731697,-0.020069,0.841549,74,123,0.442396,25.191728,25.320787,...,24.764504,24.806082,24.994612,22.741682,23.874753,24.096868,24.189893,24.664228,24.877911,24.519495
2,P01042,22.455992,22.555733,-0.099741,0.092295,74,123,0.081025,21.898323,21.943888,...,22.829843,22.913729,22.856636,22.941577,22.718110,22.870361,22.888370,22.778884,23.891875,22.818564
3,P00450,22.848118,23.019525,-0.171407,0.006293,74,123,0.007971,22.833492,23.386586,...,23.531525,23.695056,23.637887,22.932646,22.906274,23.271090,23.497650,23.517777,23.808450,23.553383
4,P05156,21.192046,21.294379,-0.102333,0.220999,74,123,0.165594,21.056419,21.366153,...,21.604318,21.660310,21.682339,21.289973,21.071472,21.397457,21.357681,21.395317,22.103208,21.370211
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
549,P07333,18.769939,19.788901,-1.018962,0.000123,12,54,0.000234,,,...,,,,18.481107,,,,,,
550,Q06323,19.514362,18.022822,1.491540,0.161708,2,5,0.130742,,,...,,,,17.150877,18.086225,,,19.517408,,17.283127
551,P16035,18.542429,18.466542,0.075887,0.759335,14,30,0.404306,,19.124122,...,,,,,,,,,,
552,O95633,,,,,0,3,,,,...,,,,18.054630,,,,,,


In [None]:
network = Network(
    input_data=input_data,
    pathways=pathways,
    mapping=translation,
    verbose=True
)

In [None]:
binn = BINNClassifier(
    pathways=network,
    n_layers=4,
    dropout=0.2,
    validate=True,
    epochs=10,
    threads=10,
    logger=SuperLogger("logs/test")
)