In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

# import seaborn as sns
import statsmodels.api as sm
from scipy.interpolate import interp1d
from scipy import linalg
from scipy.special import digamma, polygamma
from scipy.stats import t
import sys, os

from time import time

In [2]:
import logging

logging.basicConfig(
    level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%d-%b-%y %H:%M:%S"
)

# FedProt on DIA data

### Input
##### Coordinator:
* contrast variables (= class lables to compare) 

##### Each participant:
* intensities rows = protein groups ~~(peptides)~~, columns = samples
* ~~Peptides (or PSM)~~ Precursor counts per protein group
* design matrix: rows = samples, columns = variables and sample_type

### Workflow

1. **Reading inputs.**

~~2. Normalize intensities to sample load~~  MaxLFQ column of DIA-NN report was used, so data are normalized.

~~3. Normalize to reference samples within each TMT-plex~~  MaxLFQ column of DIA-NN report was used, so data are normalized.

4. **Filtering**: 
   1. TMT: Remove proteins found in > 50% all TMT batches; Optionally: remove protein groups supported by just a single peptide (federated - trivial)
   2. LFQ: "**Filter protein table. DEqMS require minimum two values for each group.**" - we will use 50% not-na  samples in each group.

5. ? Batch-effect correction ? - TBD 
   1. fedComBat
   2. fed_removeBatchEffect from limma

6. **limma** (without voom).

7. **DEqMS**.

In [3]:
# defined by each participant
# data_dir = "/home/yuliya/repos/cosybio/FedDEqMS/data/00_prot_matrices/balanced"  # path to data folder
data_dir = "/home/yuliya/repos/cosybio/FedDEqMS/data/00_prot_matrices_LABS/balanced"  # path to data folder
cohorts = ["lab_A", "lab_B", "lab_C", "lab_D", "lab_E"]

# defined by the coordinator
target_classes = ["Pyr", "Glu"]  # classes compared in DE test
covariates = []  # covariates in linear model
min_f = 0.8 # maximal fraction of TMT-plexes with NA (in local dataset) to keep feature OR for DIA - maximal fraction of samples with NA in class to keep feature
remove_single_peptide_prots = False  #  whether to remove protein groups supported by a single protein
experiment_type = "DIA"  # if "TMT" then TMT data are expected and do additional checks, if "DIA" then DIA data are expected wothout additional checks.
log_transformed = False  # whether to log transform data

## 1. Reading  and validating inputs
 * reads intensities, counts and design 
 * joins server
 * check inputs and subset to intensities and counts to shared features(protein groups)

In [4]:
from FedProt.clientDIA import Client
from FedProt.serverDIA import Server

In [5]:
# initialize the server
server = Server(target_classes, covariates)
store_clients = {}

In [6]:
# clinets are joining
for cohort_name in cohorts:
    # matrix of intensities
    intensity_file_path = f"{data_dir}/{cohort_name}_protein_groups_matrix.tsv"
    # matrix of summarized protein counts; For TMT data TMT-plex and sample_type are mandatory columns
    count_file_path = f"{data_dir}/{cohort_name}_protein_counts.tsv"
    # design matrix
    annotation_file_path = f"{data_dir}/{cohort_name}_design.tsv"
    # count_pep_file_path
    # count_pep_file_path = f"{data_dir}/{cohort_name}_precursor_counts.tsv"

    client = Client(
        cohort_name,
        intensity_file_path,
        count_file_path,
        annotation_file_path,
        experiment_type,
        # count_pep_file_path,
        log_transformed,
    )
    store_clients[client.cohort_name] = client
    
    # join client
    server.join_client(client.cohort_name, client.prot_names, client.n_samples)

print("===== Clients joined =====")
print("Client names:", server.client_names)
print("Samples per client:", server.n_samples_per_cli)
print("Target classes:", server.target_classes)
print("Covariates:", server.covariates)
print("Stored protein group names:", len(server.stored_features))
N = np.sum(server.n_samples_per_cli)  # total number of samples
print("Samples in total:", N)


# TMT prints:
# print("TMT-plexes per client:", server.n_tmt_per_cli)
# Ntmt = np.sum(server.n_tmt_per_cli)
# print("TMT-plexes in total:", Ntmt)

25-Jan-24 19:17:11 - root - INFO - Client lab_A: Log2(x+1) transformed intensities.
25-Jan-24 19:17:11 - root - INFO - Client lab_A: Loaded 23 samples and 2600 proteins.
25-Jan-24 19:17:11 - root - INFO - Server: joined client  lab_A
25-Jan-24 19:17:11 - root - INFO - Client lab_B: Log2(x+1) transformed intensities.
25-Jan-24 19:17:11 - root - INFO - Client lab_B: Loaded 23 samples and 2806 proteins.
25-Jan-24 19:17:11 - root - INFO - Server: joined client  lab_B
25-Jan-24 19:17:11 - root - INFO - Client lab_C: Log2(x+1) transformed intensities.
25-Jan-24 19:17:11 - root - INFO - Client lab_C: Loaded 23 samples and 2834 proteins.
25-Jan-24 19:17:11 - root - INFO - Server: joined client  lab_C
25-Jan-24 19:17:11 - root - INFO - Client lab_D: Log2(x+1) transformed intensities.
25-Jan-24 19:17:11 - root - INFO - Client lab_D: Loaded 24 samples and 2927 proteins.
25-Jan-24 19:17:11 - root - INFO - Server: joined client  lab_D
25-Jan-24 19:17:11 - root - INFO - Client lab_E: Log2(x+1) trans

===== Clients joined =====
Client names: ['lab_A', 'lab_B', 'lab_C', 'lab_D', 'lab_E']
Samples per client: [23, 23, 23, 24, 24]
Target classes: ['Glu', 'Pyr']
Covariates: []
Stored protein group names: 2347
Samples in total: 117


In [7]:
# Cleaning inputs
for c in cohorts:
    client = store_clients[c]
    client.validate_inputs(server.stored_features, server.variables)
    # add cohort effect columns to each design matrix
    # add 1 column less than the number of cohorts
    client.add_cohort_effects_to_design(server.client_names[1:])

    print(f"Samples in {client.cohort_name} data: {len(client.sample_names)}")
    print(f"Protein groups in {client.cohort_name} data:  {len(client.prot_names)}")

# add cohort columns to the list of confounders on the server side
print(server.variables)
server.variables = server.variables + server.client_names[1:]
print(server.variables)

25-Jan-24 19:17:11 - root - INFO - Client lab_A:	2 columns are excluded from the design matrix:
25-Jan-24 19:17:11 - root - INFO - Client lab_A: Validated 23 samples and 2347 proteins.
25-Jan-24 19:17:11 - root - INFO - Client lab_B:	2 columns are excluded from the design matrix:
25-Jan-24 19:17:11 - root - INFO - Client lab_B: Validated 23 samples and 2347 proteins.
25-Jan-24 19:17:11 - root - INFO - Client lab_C:	2 columns are excluded from the design matrix:
25-Jan-24 19:17:11 - root - INFO - Client lab_C: Validated 23 samples and 2347 proteins.
25-Jan-24 19:17:11 - root - INFO - Client lab_D:	2 columns are excluded from the design matrix:
25-Jan-24 19:17:11 - root - INFO - Client lab_D: Validated 24 samples and 2347 proteins.
25-Jan-24 19:17:11 - root - INFO - Client lab_E:	2 columns are excluded from the design matrix:
25-Jan-24 19:17:11 - root - INFO - Client lab_E: Validated 24 samples and 2347 proteins.


Samples in lab_A data: 23
Protein groups in lab_A data:  2347
Samples in lab_B data: 23
Protein groups in lab_B data:  2347
Samples in lab_C data: 23
Protein groups in lab_C data:  2347
Samples in lab_D data: 24
Protein groups in lab_D data:  2347
Samples in lab_E data: 24
Protein groups in lab_E data:  2347
['Glu', 'Pyr']
['Glu', 'Pyr', 'lab_B', 'lab_C', 'lab_D', 'lab_E']


## 2. Filtering
* For TMT: Remove proteins found in < user defined % of TMT batches in each client
* For LFQ: Remove proteins found in < user defined % of target class batches
* [Optionally:] Remove protein groups supported by just a single peptide 

In [8]:
print("Stored protein group names:", len(server.stored_features))
N = np.sum(server.n_samples_per_cli)  # total number of samples
print("Samples in total:", N)

Stored protein group names: 2347
Samples in total: 117


In [9]:
for c in cohorts:
    client = store_clients[c]
    
    na_count_in_variable, samples_per_class = client.apply_filters(min_f=min_f, remove_single_peptide_prots=remove_single_peptide_prots)
    server.create_na_df(na_count_in_variable, samples_per_class)
    
# filter out proteins with too many NAs
server.update_prot_names(min_f)
print("Server:\tProtein groups remained:", len(server.stored_features), file=sys.stderr)

# update the order and proteins in each client using server order
for c in cohorts:
    client = store_clients[c]
    client.update_prot_names(server.stored_features)

25-Jan-24 19:17:13 - root - INFO - Client lab_A:	Protein groups detected in less than 0.8 of each target class will be excluded:
25-Jan-24 19:17:13 - root - INFO - Client lab_B:	Protein groups detected in less than 0.8 of each target class will be excluded:
25-Jan-24 19:17:13 - root - INFO - Client lab_C:	Protein groups detected in less than 0.8 of each target class will be excluded:
25-Jan-24 19:17:13 - root - INFO - Client lab_D:	Protein groups detected in less than 0.8 of each target class will be excluded:
25-Jan-24 19:17:13 - root - INFO - Client lab_E:	Protein groups detected in less than 0.8 of each target class will be excluded:
Server:	Protein groups remained: 2346
25-Jan-24 19:17:13 - root - INFO - Samples in lab_A data: 23, protein groups: 2346
25-Jan-24 19:17:13 - root - INFO - Samples in lab_B data: 23, protein groups: 2346
25-Jan-24 19:17:13 - root - INFO - Samples in lab_C data: 23, protein groups: 2346
25-Jan-24 19:17:13 - root - INFO - Samples in lab_D data: 24, protei

## 3. Limma

In [10]:
### 1) computes XtX, XtY, beta and stdev
XtX_list = []
XtY_list = []

server.n_samples_per_cli = []

In [11]:
for c in cohorts:
    client = store_clients[c]
    client.prepare_for_limma(server.stored_features)
    
    server.n_samples_per_cli.append(client.n_samples)
    XtX, XtY = client.compute_XtX_XtY()
    XtX_list.append(XtX)
    XtY_list.append(XtY)

In [12]:
server.compute_beta_and_beta_stdev(XtX_list, XtY_list)

25-Jan-24 19:17:18 - root - INFO - Server: computing global beta and beta stdev, k = 6, n = 2346


In [13]:
### 1) Computes SSE, sigma, and cov. coeficients for clients and aggregates them
### 2) computes Ameans and log-counts and fits LOWESS
SSE_list = []
cov_coef_list = []
n_measurements = []
intensities_sum = []

for c in store_clients.keys():
    client = store_clients[c]

    # sum of squared residues
    SSE, cov_coef = client.compute_SSE_and_cov_coef(server.beta)
    SSE_list.append(SSE)
    cov_coef_list.append(cov_coef)

    # mean log(intensities) per protein
    intensities_sum.append(client.sum_intensities())
    n_measurements.append(client.get_not_na())


server.aggregate_SSE_and_cov_coef(SSE_list, cov_coef_list, intensities_sum, n_measurements)

In [14]:
# contrasts matrix
contrast_matrix = server.make_contrasts(contrasts=[([target_classes[0]], [target_classes[1]])])

In [15]:
### applies contrasts
server.fit_contasts(contrast_matrix.values)

In [16]:
# modified eBayes()
# - replaces standatr mean and sum from np with nanmean, nansum
# - corrected index in posterior_var()
### all eBayes steps are on the server side
server.eBayes()
# result
server.table.loc[:, ["logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B", "CI.L", "CI.R"]].head(10)

25-Jan-24 19:17:21 - rpy2.situation - INFO - cffi mode is CFFI_MODE.ANY
25-Jan-24 19:17:21 - rpy2.situation - DEBUG - Looking for R home with: R RHOME
25-Jan-24 19:17:21 - rpy2.situation - INFO - R home found: /home/yuliya/anaconda3/envs/deqms_fed/lib/R
25-Jan-24 19:17:21 - rpy2.situation - DEBUG - Looking for LD_LIBRARY_PATH with: /home/yuliya/anaconda3/envs/deqms_fed/lib/R/bin/Rscript -e cat(Sys.getenv("LD_LIBRARY_PATH"))


25-Jan-24 19:17:21 - rpy2.situation - INFO - R library path: 
25-Jan-24 19:17:21 - rpy2.situation - INFO - LD_LIBRARY_PATH: 
25-Jan-24 19:17:21 - rpy2.rinterface_lib.ffi_proxy - DEBUG - cffi mode is InterfaceType.API
25-Jan-24 19:17:21 - rpy2.rinterface_lib.embedded - INFO - Default options to initialize R: rpy2, --quiet, --no-save
25-Jan-24 19:17:21 - rpy2.rinterface_lib.embedded - INFO - R is already initialized. No need to initialize.
25-Jan-24 19:17:22 - root - INFO - Calculating tail p-values


Unnamed: 0,logFC,AveExpr,t,P.Value,adj.P.Val,B,CI.L,CI.R
A5A614,1.065912,16.366291,10.651998,1.588538e-18,4.254235e-18,30.240916,0.867554,1.264269
O32583,0.828454,17.27229,9.172633,2.402397e-15,5.60241e-15,22.897657,0.649528,1.007379
P00350,-0.19593,24.301998,-10.251015,7.432977000000001e-18,1.9246980000000003e-17,28.665579,-0.233794,-0.158065
P00363,0.123964,22.200149,2.930303,0.004094106,0.005260007,-4.680319,0.040157,0.207771
P00370,-0.484948,23.721769,-14.341575,2.9677010000000003e-27,1.148882e-26,50.343895,-0.551936,-0.41796
P00393,-0.105231,21.483183,-4.043782,9.613036e-05,0.000137012,-1.150387,-0.156784,-0.053678
P00448,0.67544,24.094176,13.764063,5.835129999999999e-26,2.145645e-25,47.355367,0.578224,0.772656
P00452,0.11678,21.286681,3.777521,0.0002538009,0.0003521093,-2.075886,0.055537,0.178023
P00490,-0.820533,21.388415,-32.514674,2.190204e-59,3.402794e-58,124.729347,-0.870526,-0.770539
P00509,0.185133,24.042637,8.591871,5.232925e-14,1.14874e-13,19.827898,0.142446,0.22782


In [17]:
# server.table.loc[:, ["logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B", "CI.L", "CI.R"]].to_csv(
#     "/home/yuliya/repos/cosybio/FedDEqMS/data/04_evaluation/balanced/results.Flimma.tsv", sep="\t"
# )

## 7). DEqMS 

In [18]:
# # summarize peptides from clients to unique prot-peptiides counts
# for c in cohorts:
#     client = store_clients[c]
#     server.create_pep_counts(client.pep_counts)

# server.summarize_pep_counts()

In [19]:
# we use minimum peptide count among all samples
# count unique+razor peptides used for quantification - for TMT data from MaxQuant
min_counts = []

for c in cohorts:
    client = store_clients[c]
    min_counts.append(client.get_min_count())

# Minimum peptide count of some proteins can be 0
# add pseudocount 1 to all proteins
server.min_counts = pd.concat(min_counts, axis=1).min(axis=1).loc[server.stored_features] + 1

In [20]:
# removes trend between (min-counts+1) and log-variance,
# computes statistics
server.spectral_count_ebayes(fit_method="loess")

25-Jan-24 19:17:26 - root - INFO - Fitting LOWESS curve...
25-Jan-24 19:17:26 - root - INFO - min_count: (2346,), log_var: (2346,)


In [21]:
# results:
server.table.head(15)

Unnamed: 0,t,AveExpr,B,logFC,CI.L,CI.R,adj.P.Val,P.Value,post.df,counts,sca.t,sca.P.Value,sca.adj.pval
A5A614,10.651998,16.366291,30.240916,1.065912,0.867554,1.264269,4.254235e-18,1.588538e-18,110.7,2,10.6798,9.968684999999998e-19,2.669696e-18
O32583,9.172633,17.27229,22.897657,0.828454,0.649528,1.007379,5.60241e-15,2.402397e-15,116.7,2,9.174466,1.948955e-15,4.558572e-15
P00350,-10.251015,24.301998,28.665579,-0.19593,-0.233794,-0.158065,1.9246980000000003e-17,7.432977000000001e-18,116.7,40,-10.399045,2.536877e-18,6.702154e-18
P00363,2.930303,22.200149,-4.680319,0.123964,0.040157,0.207771,0.005260007,0.004094106,116.7,36,2.969382,0.003622639,0.004672189
P00370,-14.341575,23.721769,50.343895,-0.484948,-0.551936,-0.41796,1.148882e-26,2.9677010000000003e-27,116.7,33,-14.526643,6.23433e-28,2.449872e-27
P00393,-4.043782,21.483183,-1.150387,-0.105231,-0.156784,-0.053678,0.000137012,9.613036e-05,116.7,25,-4.084802,8.122381e-05,0.0001160481
P00448,13.764063,24.094176,47.355367,0.67544,0.578224,0.772656,2.145645e-25,5.835129999999999e-26,116.7,15,13.912949,1.54277e-26,5.74498e-26
P00452,3.777521,21.286681,-2.075886,0.11678,0.055537,0.178023,0.0003521093,0.0002538009,116.7,39,3.829193,0.0002084147,0.000290345
P00490,-32.514674,21.388415,124.729347,-0.820533,-0.870526,-0.770539,3.402794e-58,2.190204e-59,116.7,45,-32.996383,5.217764e-61,8.160583e-60
P00509,8.591871,24.042637,19.827898,0.185133,0.142446,0.22782,1.14874e-13,5.232925e-14,116.7,32,8.693378,2.579159e-14,5.74071e-14


In [70]:
# server.table.to_csv( "/home/yuliya/repos/cosybio/FedDEqMS/data/02_fedDEqMS_on_all/results.FedProt.tsv", sep="\t")

In [71]:
# server.table.to_csv("/home/yuliya/repos/cosybio/FedDEqMS/data/04_evaluation/balanced/results.FedProt.tsv", sep="\t")
# server.table.to_csv("//home/yuliya/repos/cosybio/FedDEqMS/data/04_evaluation_on_scap/balanced/results.FedProt.tsv", sep="\t")

In [72]:
#fig, ax =  server.variance_deqms_plot(main='fed-DEqMS')
#plt.show()  # to display the plot
#fig.savefig('my_plot.png')  # to save the plot as a .png file

In [73]:
server.table.to_csv("/home/yuliya/repos/cosybio/FedDEqMS/data/04_evaluation_LABS/balanced/results.FedProt.tsv", sep="\t")

# Unbalanced

In [74]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

# import seaborn as sns
import statsmodels.api as sm
from scipy.interpolate import interp1d
from scipy import linalg
from scipy.special import digamma, polygamma
from scipy.stats import t
import sys, os

from time import time

np.set_printoptions(precision=22)

# %precision 22
%matplotlib inline

06-Nov-23 21:08:09 - matplotlib.pyplot - DEBUG - Loaded backend module://matplotlib_inline.backend_inline version unknown.


In [75]:
import logging

logging.basicConfig(
    level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%d-%b-%y %H:%M:%S"
)

In [76]:
# defined by each participant
# data_dir = "/home/yuliya/repos/cosybio/FedDEqMS/data/00_prot_matrices/unbalanced"  # path to data folder
data_dir = "/home/yuliya/repos/cosybio/FedDEqMS/data/00_prot_matrices_LABS/unbalanced"  # path to data folder
cohorts = ["lab_A", "lab_B", "lab_C", "lab_D", "lab_E"]

# defined by the coordinator
target_classes = ["Pyr", "Glu"]  # classes compared in DE test
covariates = []  # covariates in linear model
min_f = 0.8  # maximal fraction of TMT-plexes with NA (in local dataset) to keep feature OR for DIA - maximal fraction of samples with NA in class to keep feature
remove_single_peptide_prots = False  #  whether to remove protein groups supported by a single protein
experiment_type = "DIA"  # if "TMT" then TMT data are expected and do additional checks, if "DIA" then DIA data are expected wothout additional checks.
log_transformed = False  # whether to log transform data

In [77]:
from fedDEqMS.clientDIA import Client
from fedDEqMS.serverDIA import Server

In [78]:
# initialize the server
server = Server(target_classes, covariates)
store_clients = {}

In [79]:
# clinets are joining
for cohort_name in cohorts:
    # matrix of intensities
    intensity_file_path = f"{data_dir}/{cohort_name}_protein_groups_matrix.tsv"
    # matrix of summarized protein counts; For TMT data TMT-plex and sample_type are mandatory columns
    count_file_path = f"{data_dir}/{cohort_name}_protein_counts.tsv"
    # design matrix
    annotation_file_path = f"{data_dir}/{cohort_name}_design.tsv"
    # count_pep_file_path
    # count_pep_file_path = f"{data_dir}/{cohort_name}_precursor_counts.tsv"

    client = Client(
        cohort_name,
        intensity_file_path,
        count_file_path,
        annotation_file_path,
        experiment_type,
        # count_pep_file_path
    )

    store_clients[client.cohort_name] = client
    # join client
    server.join_client(client.cohort_name, client.prot_names, client.n_samples)

# TMT prints:
# print("TMT-plexes per client:", server.n_tmt_per_cli)
# Ntmt = np.sum(server.n_tmt_per_cli)
# print("TMT-plexes in total:", Ntmt)

print("===== Clients joined =====")
print("Client names:", server.client_names)
print("Samples per client:", server.n_samples_per_cli)
print("Target classes:", server.target_classes)
print("Covariates:", server.covariates)
print("Stored protein group names:", len(server.stored_features))
N = np.sum(server.n_samples_per_cli)  # total number of samples
print("Samples in total:", N)


06-Nov-23 21:08:09 - root - INFO - Client lab_A: Log2(x+1) transformed intensities.
06-Nov-23 21:08:09 - root - INFO - Client lab_A: Loaded 16 samples and 2599 proteins.
06-Nov-23 21:08:09 - root - INFO - Server: joined client  lab_A
06-Nov-23 21:08:09 - root - INFO - Client lab_B: Log2(x+1) transformed intensities.
06-Nov-23 21:08:09 - root - INFO - Client lab_B: Loaded 13 samples and 2805 proteins.
06-Nov-23 21:08:09 - root - INFO - Server: joined client  lab_B
06-Nov-23 21:08:09 - root - INFO - Client lab_C: Log2(x+1) transformed intensities.
06-Nov-23 21:08:09 - root - INFO - Client lab_C: Loaded 14 samples and 2826 proteins.
06-Nov-23 21:08:09 - root - INFO - Server: joined client  lab_C
06-Nov-23 21:08:09 - root - INFO - Client lab_D: Log2(x+1) transformed intensities.
06-Nov-23 21:08:09 - root - INFO - Client lab_D: Loaded 15 samples and 2927 proteins.
06-Nov-23 21:08:09 - root - INFO - Server: joined client  lab_D
06-Nov-23 21:08:09 - root - INFO - Client lab_E: Log2(x+1) trans

===== Clients joined =====
Client names: ['lab_A', 'lab_B', 'lab_C', 'lab_D', 'lab_E']
Samples per client: [16, 13, 14, 15, 15]
Target classes: ['Glu', 'Pyr']
Covariates: []
Stored protein group names: 2344
Samples in total: 73


In [80]:
# Cleaning inputs
for c in cohorts:
    client = store_clients[c]
    client.validate_inputs(server.stored_features, server.variables)
    # add cohort effect columns to each design matrix
    # add 1 column less than the number of cohorts
    client.add_cohort_effects_to_design(server.client_names[1:])

    print(f"Samples in {client.cohort_name} data: {len(client.sample_names)}")
    print(f"Protein groups in {client.cohort_name} data:  {len(client.prot_names)}")

# add cohort columns to the list of confounders on the server side
print(server.variables)
server.variables = server.variables + server.client_names[1:]
print(server.variables)

06-Nov-23 21:08:09 - root - INFO - Client lab_A:	2 columns are excluded from the design matrix:
06-Nov-23 21:08:09 - root - INFO - Client lab_A: Validated 16 samples and 2344 proteins.
06-Nov-23 21:08:09 - root - INFO - Client lab_B:	2 columns are excluded from the design matrix:
06-Nov-23 21:08:09 - root - INFO - Client lab_B: Validated 13 samples and 2344 proteins.
06-Nov-23 21:08:09 - root - INFO - Client lab_C:	2 columns are excluded from the design matrix:
06-Nov-23 21:08:09 - root - INFO - Client lab_C: Validated 14 samples and 2344 proteins.
06-Nov-23 21:08:09 - root - INFO - Client lab_D:	2 columns are excluded from the design matrix:
06-Nov-23 21:08:09 - root - INFO - Client lab_D: Validated 15 samples and 2344 proteins.
06-Nov-23 21:08:09 - root - INFO - Client lab_E:	2 columns are excluded from the design matrix:
06-Nov-23 21:08:09 - root - INFO - Client lab_E: Validated 15 samples and 2344 proteins.


Samples in lab_A data: 16
Protein groups in lab_A data:  2344
Samples in lab_B data: 13
Protein groups in lab_B data:  2344
Samples in lab_C data: 14
Protein groups in lab_C data:  2344
Samples in lab_D data: 15
Protein groups in lab_D data:  2344
Samples in lab_E data: 15
Protein groups in lab_E data:  2344
['Glu', 'Pyr']
['Glu', 'Pyr', 'lab_B', 'lab_C', 'lab_D', 'lab_E']


In [81]:
print("Stored protein group names:", len(server.stored_features))
N = np.sum(server.n_samples_per_cli)  # total number of samples
print("Samples in total:", N)

Stored protein group names: 2344
Samples in total: 73


In [82]:
for c in cohorts:
    client = store_clients[c]
    # filter out proteins with too many NAs
    passed_prots = client.apply_filters(min_f=min_f, remove_single_peptide_prots=remove_single_peptide_prots)

    # send updated protein list to the server
    server.update_prot_names(client.cohort_name, passed_prots)

print("Server:\tProtein groups remained:", len(server.stored_features), file=sys.stderr)

# update the order and proteins in each client using server order
for c in cohorts:
    client = store_clients[c]
    client.update_prot_names(server.stored_features)

06-Nov-23 21:08:09 - root - INFO - Client lab_A:	Protein groups detected in less than 0.8 of each target class will be excluded: 10
06-Nov-23 21:08:09 - root - INFO - Client lab_B:	Protein groups detected in less than 0.8 of each target class will be excluded: 10
06-Nov-23 21:08:09 - root - INFO - Client lab_C:	Protein groups detected in less than 0.8 of each target class will be excluded: 12
06-Nov-23 21:08:09 - root - INFO - Client lab_D:	Protein groups detected in less than 0.8 of each target class will be excluded: 5
06-Nov-23 21:08:09 - root - INFO - Client lab_E:	Protein groups detected in less than 0.8 of each target class will be excluded: 30
Server:	Protein groups remained: 2303
06-Nov-23 21:08:09 - root - INFO - Samples in lab_A data: 16, protein groups: 2303
06-Nov-23 21:08:09 - root - INFO - Samples in lab_B data: 13, protein groups: 2303
06-Nov-23 21:08:09 - root - INFO - Samples in lab_C data: 14, protein groups: 2303
06-Nov-23 21:08:09 - root - INFO - Samples in lab_D da

In [83]:
### 1) computes XtX, XtY, beta and stdev
XtX_list = []
XtY_list = []

server.n_samples_per_cli = []

In [84]:
for c in cohorts:
    client = store_clients[c]
    client.prepare_for_limma(server.stored_features)
    
    server.n_samples_per_cli.append(client.n_samples)
    XtX, XtY = client.compute_XtX_XtY()
    XtX_list.append(XtX)
    XtY_list.append(XtY)

In [85]:
server.compute_beta_and_beta_stdev(XtX_list, XtY_list)

06-Nov-23 21:08:09 - root - INFO - Server: computing global beta and beta stdev, k = 6, n = 2303


In [86]:
### 1) Computes SSE, sigma, and cov. coeficients for clients and aggregates them
### 2) computes Ameans and log-counts and fits LOWESS
SSE_list = []
cov_coef_list = []
n_measurements = []
intensities_sum = []

for c in store_clients.keys():
    client = store_clients[c]

    # sum of squared residues
    SSE, cov_coef = client.compute_SSE_and_cov_coef(server.beta)
    SSE_list.append(SSE)
    cov_coef_list.append(cov_coef)

    # mean log(intensities) per protein
    intensities_sum.append(client.sum_intensities())
    n_measurements.append(client.get_not_na())


server.aggregate_SSE_and_cov_coef(SSE_list, cov_coef_list, intensities_sum, n_measurements)

In [87]:
# contrasts matrix
contrast_matrix = server.make_contrasts(contrasts=[([target_classes[0]], [target_classes[1]])])

In [88]:
### applies contrasts
server.fit_contasts(contrast_matrix.values)

In [89]:
# modified eBayes()
# - replaces standatr mean and sum from np with nanmean, nansum
# - corrected index in posterior_var()
### all eBayes steps are on the server side
server.eBayes()
# result
server.table.loc[:, ["logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B", "CI.L", "CI.R"]].head(10)

06-Nov-23 21:08:09 - root - INFO - Calculating tail p-values


Unnamed: 0,logFC,AveExpr,t,P.Value,adj.P.Val,B,CI.L,CI.R
A5A614,1.098182,16.430501,7.195423,6.405928e-10,1.530379e-09,10.95135,0.793609,1.402754
O32583,1.059081,17.33447,9.384876,5.394604e-14,1.720744e-13,20.305424,0.833996,1.284166
P00350,-0.195019,24.311221,-8.521375,2.052776e-12,5.786465e-12,16.660446,-0.240666,-0.149372
P00363,0.084287,22.180905,1.340764,0.1843467,0.2139871,-7.666913,-0.041101,0.209675
P00370,-0.511984,23.720114,-10.677997,2.538903e-16,9.355351e-16,25.685085,-0.607618,-0.41635
P00393,-0.110967,21.510265,-3.022506,0.003504379,0.004900173,-4.214382,-0.184194,-0.03774
P00448,0.731706,24.199084,10.90602,1.002322e-16,3.771811e-16,26.619248,0.597887,0.865524
P00452,0.118296,21.310524,3.189336,0.00213663,0.003043079,-3.752861,0.044316,0.192277
P00490,-0.827549,21.36752,-22.904002,3.550217e-34,4.8379580000000004e-33,67.139637,-0.899614,-0.755483
P00509,0.199446,24.068919,7.181524,5.916442e-10,1.414908e-09,11.006606,0.144053,0.254839


In [90]:
# server.table.loc[:, ["logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B", "CI.L", "CI.R"]].to_csv(
#     "/home/yuliya/repos/cosybio/FedDEqMS/data/04_evaluation/unbalanced/results.Flimma.tsv", sep="\t"
# )

In [91]:
# # summarize peptides from clients to unique prot-peptiides counts
# for c in cohorts:
#     client = store_clients[c]
#     server.create_pep_counts(client.pep_counts)

# server.summarize_pep_counts()

In [92]:
min_counts = []

for c in cohorts:
    client = store_clients[c]
    min_counts.append(client.get_min_count())

# Minimum peptide count of some proteins can be 0
# add pseudocount 1 to all proteins
server.min_counts = pd.concat(min_counts, axis=1).min(axis=1).loc[server.stored_features] + 1

In [93]:
# removes trend between (min-counts+1) and log-variance,
# computes statistics
server.spectral_count_ebayes(fit_method="loess")

06-Nov-23 21:08:10 - root - INFO - Fitting LOWESS curve...
06-Nov-23 21:08:10 - root - INFO - min_count: (2303,), log_var: (2303,)


In [94]:
# results:
server.table.head(15)

Unnamed: 0,t,AveExpr,B,logFC,CI.L,CI.R,adj.P.Val,P.Value,post.df,counts,sca.t,sca.P.Value,sca.adj.pval
A5A614,7.195423,16.430501,10.95135,1.098182,0.793609,1.402754,1.530379e-09,6.405928e-10,70.5,2,7.238657,4.424725e-10,1.060369e-09
O32583,9.384876,17.33447,20.305424,1.059081,0.833996,1.284166,1.720744e-13,5.394604e-14,72.5,2,9.343289,4.515708e-14,1.428527e-13
P00350,-8.521375,24.311221,16.660446,-0.195019,-0.240666,-0.149372,5.786465e-12,2.052776e-12,72.5,40,-8.722584,6.509444e-13,1.88096e-12
P00363,1.340764,22.180905,-7.666913,0.084287,-0.041101,0.209675,0.2139871,0.1843467,72.5,37,1.367214,0.1757823,0.2046646
P00370,-10.677997,23.720114,25.685085,-0.511984,-0.607618,-0.41635,9.355351e-16,2.538903e-16,72.5,33,-10.883754,6.666122000000001e-17,2.50851e-16
P00393,-3.022506,21.510265,-4.214382,-0.110967,-0.184194,-0.03774,0.004900173,0.003504379,72.5,25,-3.070573,0.003006517,0.004221957
P00448,10.90602,24.199084,26.619248,0.731706,0.597887,0.865524,3.771811e-16,1.002322e-16,72.5,15,11.080034,2.948575e-17,1.145121e-16
P00452,3.189336,21.310524,-3.752861,0.118296,0.044316,0.192277,0.003043079,0.00213663,72.5,39,3.255626,0.00172135,0.002463809
P00490,-22.904002,21.36752,67.139637,-0.827549,-0.899614,-0.755483,4.8379580000000004e-33,3.550217e-34,72.5,45,-23.411456,1.564598e-35,2.1838e-34
P00509,7.181524,24.068919,11.006606,0.199446,0.144053,0.254839,1.414908e-09,5.916442e-10,72.5,32,7.313587,2.811769e-10,6.823502e-10


In [95]:
# server.table.to_csv( "/home/yuliya/repos/cosybio/FedDEqMS/data/04_evaluation/unbalanced/results.FedProt.tsv", sep="\t")
# server.table.to_csv("//home/yuliya/repos/cosybio/FedDEqMS/data/04_evaluation_on_scap/unbalanced/results.FedProt.tsv", sep="\t")

In [96]:
server.table.to_csv("/home/yuliya/repos/cosybio/FedDEqMS/data/04_evaluation_LABS/unbalanced/results.FedProt.tsv", sep="\t")

# Subsampled

In [47]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

# import seaborn as sns
import statsmodels.api as sm
from scipy.interpolate import interp1d
from scipy import linalg
from scipy.special import digamma, polygamma
from scipy.stats import t
import sys, os

# %precision 22
%matplotlib inline

25-Okt-23 18:05:52 - matplotlib.pyplot - DEBUG - Loaded backend module://matplotlib_inline.backend_inline version unknown.


In [48]:
import logging

logging.basicConfig(
    level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%d-%b-%y %H:%M:%S"
)

In [49]:
# defined by each participant
data_dir = "/home/yuliya/repos/cosybio/FedDEqMS/data/00_prot_matrices/subsampled"  # path to data folder
cohorts = ["lab_A", "lab_B", "lab_C", "lab_D", "lab_E"]

# defined by the coordinator
target_classes = ["Pyr", "Glu"]  # classes compared in DE test
covariates = []  # covariates in linear model
min_f = 0.8  # maximal fraction of TMT-plexes with NA (in local dataset) to keep feature OR for DIA - maximal fraction of samples with NA in class to keep feature
remove_single_peptide_prots = False  #  whether to remove protein groups supported by a single protein
experiment_type = "DIA"  # if "TMT" then TMT data are expected and do additional checks, if "DIA" then DIA data are expected wothout additional checks.
log_transformed = False  # whether to log transform data

In [50]:
from fedDEqMS.clientDIA import Client
from fedDEqMS.serverDIA import Server

In [51]:
# initialize the server
server = Server(target_classes, covariates)
store_clients = {}

In [52]:
# clinets are joining
for cohort_name in cohorts:
    # matrix of intensities
    intensity_file_path = f"{data_dir}/{cohort_name}_protein_groups_matrix.tsv"
    # matrix of summarized protein counts; For TMT data TMT-plex and sample_type are mandatory columns
    count_file_path = f"{data_dir}/{cohort_name}_protein_counts.tsv"
    # design matrix
    annotation_file_path = f"{data_dir}/{cohort_name}_design.tsv"
    # count_pep_file_path
    count_pep_file_path = f"{data_dir}/{cohort_name}_precursor_counts.tsv"

    client = Client(
        cohort_name,
        intensity_file_path,
        count_file_path,
        annotation_file_path,
        experiment_type,
        count_pep_file_path
    )

    store_clients[client.cohort_name] = client
    # join client
    server.join_client(client.cohort_name, client.prot_names, client.n_samples)

# TMT prints:
# print("TMT-plexes per client:", server.n_tmt_per_cli)
# Ntmt = np.sum(server.n_tmt_per_cli)
# print("TMT-plexes in total:", Ntmt)

print("===== Clients joined =====")
print("Client names:", server.client_names)
print("Samples per client:", server.n_samples_per_cli)
print("Target classes:", server.target_classes)
print("Covariates:", server.covariates)
print("Stored protein group names:", len(server.stored_features))
N = np.sum(server.n_samples_per_cli)  # total number of samples
print("Samples in total:", N)


25-Okt-23 18:05:52 - root - INFO - Client lab_A: Log2(x+1) transformed intensities.
25-Okt-23 18:05:52 - root - INFO - Client lab_A: Loaded 12 samples and 2566 proteins.
25-Okt-23 18:05:52 - root - INFO - Server: joined client  lab_A
25-Okt-23 18:05:52 - root - INFO - Client lab_B: Log2(x+1) transformed intensities.
25-Okt-23 18:05:52 - root - INFO - Client lab_B: Loaded 12 samples and 2805 proteins.
25-Okt-23 18:05:52 - root - INFO - Server: joined client  lab_B
25-Okt-23 18:05:52 - root - INFO - Client lab_C: Log2(x+1) transformed intensities.
25-Okt-23 18:05:52 - root - INFO - Client lab_C: Loaded 12 samples and 2795 proteins.
25-Okt-23 18:05:52 - root - INFO - Server: joined client  lab_C
25-Okt-23 18:05:52 - root - INFO - Client lab_D: Log2(x+1) transformed intensities.
25-Okt-23 18:05:52 - root - INFO - Client lab_D: Loaded 12 samples and 2824 proteins.
25-Okt-23 18:05:52 - root - INFO - Server: joined client  lab_D
25-Okt-23 18:05:52 - root - INFO - Client lab_E: Log2(x+1) trans

===== Clients joined =====
Client names: ['lab_A', 'lab_B', 'lab_C', 'lab_D', 'lab_E']
Samples per client: [12, 12, 12, 12, 12]
Target classes: ['Glu', 'Pyr']
Covariates: []
Stored protein group names: 2297
Samples in total: 60


In [53]:
# Cleaning inputs
for c in cohorts:
    client = store_clients[c]
    client.validate_inputs(server.stored_features, server.variables)
    # add cohort effect columns to each design matrix
    # add 1 column less than the number of cohorts
    client.add_cohort_effects_to_design(server.client_names[1:])

    print(f"Samples in {client.cohort_name} data: {len(client.sample_names)}")
    print(f"Protein groups in {client.cohort_name} data:  {len(client.prot_names)}")

# add cohort columns to the list of confounders on the server side
print(server.variables)
server.variables = server.variables + server.client_names[1:]
print(server.variables)

25-Okt-23 18:05:52 - root - INFO - Client lab_A:	2 columns are excluded from the design matrix:
25-Okt-23 18:05:52 - root - INFO - Client lab_A: Validated 12 samples and 2297 proteins.
25-Okt-23 18:05:52 - root - INFO - Client lab_B:	2 columns are excluded from the design matrix:
25-Okt-23 18:05:52 - root - INFO - Client lab_B: Validated 12 samples and 2297 proteins.
25-Okt-23 18:05:52 - root - INFO - Client lab_C:	2 columns are excluded from the design matrix:
25-Okt-23 18:05:52 - root - INFO - Client lab_C: Validated 12 samples and 2297 proteins.
25-Okt-23 18:05:52 - root - INFO - Client lab_D:	2 columns are excluded from the design matrix:
25-Okt-23 18:05:52 - root - INFO - Client lab_D: Validated 12 samples and 2297 proteins.
25-Okt-23 18:05:52 - root - INFO - Client lab_E:	2 columns are excluded from the design matrix:
25-Okt-23 18:05:52 - root - INFO - Client lab_E: Validated 12 samples and 2297 proteins.


Samples in lab_A data: 12
Protein groups in lab_A data:  2297
Samples in lab_B data: 12
Protein groups in lab_B data:  2297
Samples in lab_C data: 12
Protein groups in lab_C data:  2297
Samples in lab_D data: 12
Protein groups in lab_D data:  2297
Samples in lab_E data: 12
Protein groups in lab_E data:  2297
['Glu', 'Pyr']
['Glu', 'Pyr', 'lab_B', 'lab_C', 'lab_D', 'lab_E']


In [54]:
print("Stored protein group names:", len(server.stored_features))
N = np.sum(server.n_samples_per_cli)  # total number of samples
print("Samples in total:", N)

Stored protein group names: 2297
Samples in total: 60


In [55]:
for c in cohorts:
    client = store_clients[c]
    # filter out proteins with too many NAs
    passed_prots = client.apply_filters(min_f=min_f, remove_single_peptide_prots=remove_single_peptide_prots)

    # send updated protein list to the server
    server.update_prot_names(client.cohort_name, passed_prots)

print("Server:\tProtein groups remained:", len(server.stored_features), file=sys.stderr)

# update the order and proteins in each client using server order
for c in cohorts:
    client = store_clients[c]
    client.update_prot_names(server.stored_features)

25-Okt-23 18:05:52 - root - INFO - Client lab_A:	Protein groups detected in less than 0.8 of each target class will be excluded: 19
25-Okt-23 18:05:52 - root - INFO - Client lab_B:	Protein groups detected in less than 0.8 of each target class will be excluded: 11
25-Okt-23 18:05:52 - root - INFO - Client lab_C:	Protein groups detected in less than 0.8 of each target class will be excluded: 14
25-Okt-23 18:05:52 - root - INFO - Client lab_D:	Protein groups detected in less than 0.8 of each target class will be excluded: 4
25-Okt-23 18:05:52 - root - INFO - Client lab_E:	Protein groups detected in less than 0.8 of each target class will be excluded: 31
Server:	Protein groups remained: 2247
25-Okt-23 18:05:52 - root - INFO - Samples in lab_A data: 12, protein groups: 2247
25-Okt-23 18:05:52 - root - INFO - Samples in lab_B data: 12, protein groups: 2247
25-Okt-23 18:05:52 - root - INFO - Samples in lab_C data: 12, protein groups: 2247
25-Okt-23 18:05:52 - root - INFO - Samples in lab_D da

In [56]:
### 1) computes XtX, XtY, beta and stdev
XtX_list = []
XtY_list = []

server.n_samples_per_cli = []

In [57]:
for c in cohorts:
    client = store_clients[c]
    client.prepare_for_limma(server.stored_features)
    
    server.n_samples_per_cli.append(client.n_samples)
    XtX, XtY = client.compute_XtX_XtY()
    XtX_list.append(XtX)
    XtY_list.append(XtY)

In [58]:
server.compute_beta_and_beta_stdev(XtX_list, XtY_list)

25-Okt-23 18:05:52 - root - INFO - Server: computing global beta and beta stdev, k = 6, n = 2247


In [59]:
### 1) Computes SSE, sigma, and cov. coeficients for clients and aggregates them
### 2) computes Ameans and log-counts and fits LOWESS
SSE_list = []
cov_coef_list = []
n_measurements = []
intensities_sum = []

for c in store_clients.keys():
    client = store_clients[c]

    # sum of squared residues
    SSE, cov_coef = client.compute_SSE_and_cov_coef(server.beta)
    SSE_list.append(SSE)
    cov_coef_list.append(cov_coef)

    # mean log(intensities) per protein
    intensities_sum.append(client.sum_intensities())
    n_measurements.append(client.get_not_na())


server.aggregate_SSE_and_cov_coef(SSE_list, cov_coef_list, intensities_sum, n_measurements)

In [60]:
# contrasts matrix
contrast_matrix = server.make_contrasts(contrasts=[([target_classes[0]], [target_classes[1]])])

In [61]:
### applies contrasts
server.fit_contasts(contrast_matrix.values)

In [62]:
# modified eBayes()
# - replaces standatr mean and sum from np with nanmean, nansum
# - corrected index in posterior_var()
### all eBayes steps are on the server side
server.eBayes()
# result
server.table.loc[:, ["logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B", "CI.L", "CI.R"]].head(10)

25-Okt-23 18:05:53 - root - INFO - Calculating tail p-values


Unnamed: 0,logFC,AveExpr,t,P.Value,adj.P.Val,B,CI.L,CI.R
A5A614,0.963529,18.050729,6.838267,7.223241e-09,1.943787e-08,8.758824,0.681098,1.24596
P00350,-0.236608,25.335092,-6.398916,3.065339e-08,7.627704e-08,7.291183,-0.310638,-0.162579
P00363,0.107915,23.243037,1.665934,0.1011644,0.1256585,-7.050401,-0.021774,0.237604
P00370,-0.55713,24.63378,-9.050648,1.177978e-12,4.404186e-12,17.460221,-0.680372,-0.433889
P00393,-0.130966,22.5274,-2.606009,0.01164378,0.01629115,-5.164666,-0.231581,-0.030351
P00448,0.706373,24.950701,9.390811,3.276172e-13,1.312221e-12,18.746346,0.555778,0.856968
P00452,0.024587,22.634703,0.416508,0.6785886,0.7145214,-8.340457,-0.093596,0.142769
P00490,-0.895161,22.529066,-19.831128,2.154209e-27,2.951529e-26,51.719423,-0.985532,-0.804789
P00509,0.126954,24.902209,2.995375,0.004038581,0.005989895,-4.189352,0.042099,0.211808
P00547,0.345631,24.22615,9.262271,5.307129e-13,2.070333e-12,18.26144,0.270921,0.42034


In [63]:
server.table.loc[:, ["logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B", "CI.L", "CI.R"]].to_csv(
    "/home/yuliya/repos/cosybio/FedDEqMS/data/04_evaluation/subsampled/results.Flimma.tsv", sep="\t"
)

In [64]:
# summarize peptides from clients to unique prot-peptiides counts
for c in cohorts:
    client = store_clients[c]
    server.create_pep_counts(client.pep_counts)

server.summarize_pep_counts()

In [65]:
# removes trend between (min-counts+1) and log-variance,
# computes statistics
server.spectral_count_ebayes(fit_method="loess")

25-Okt-23 18:05:53 - root - INFO - Fitting LOWESS curve...
25-Okt-23 18:05:53 - root - INFO - min_count: (2247,), log_var: (2247,)


In [66]:
# results:
server.table.head(15)

Unnamed: 0,t,AveExpr,B,logFC,CI.L,CI.R,adj.P.Val,P.Value,post.df,counts,sca.t,sca.P.Value,sca.adj.pval
A5A614,6.838267,18.050729,8.758824,0.963529,0.681098,1.24596,1.943787e-08,7.223241e-09,56.2,2,6.731585,9.454677e-09,2.499372e-08
P00350,-6.398916,25.335092,7.291183,-0.236608,-0.310638,-0.162579,7.627704e-08,3.065339e-08,59.2,107,-6.538338,1.606233e-08,4.1154e-08
P00363,1.665934,23.243037,-7.050401,0.107915,-0.021774,0.237604,0.1256585,0.1011644,59.2,97,1.693701,0.0955781,0.1195791
P00370,-9.050648,24.63378,17.460221,-0.55713,-0.680372,-0.433889,4.404186e-12,1.177978e-12,59.2,100,-9.20429,5.065402e-13,1.968112e-12
P00393,-2.606009,22.5274,-5.164666,-0.130966,-0.231581,-0.030351,0.01629115,0.01164378,59.2,74,-2.649323,0.01032598,0.0145197
P00448,9.390811,24.950701,18.746346,0.706373,0.555778,0.856968,1.312221e-12,3.276172e-13,59.2,38,9.498351,1.649283e-13,6.713656e-13
P00452,0.416508,22.634703,-8.340457,0.024587,-0.093596,0.142769,0.7145214,0.6785886,59.2,107,0.423735,0.6732946,0.7096121
P00490,-19.831128,22.529066,51.719423,-0.895161,-0.985532,-0.804789,2.951529e-26,2.154209e-27,59.2,121,-20.222367,2.882494e-28,3.998126e-27
P00509,2.995375,24.902209,-4.189352,0.126954,0.042099,0.211808,0.005989895,0.004038581,59.2,87,3.051871,0.00340217,0.005096451
P00547,9.262271,24.22615,18.26144,0.345631,0.270921,0.42034,2.070333e-12,5.307129e-13,59.2,43,9.326056,3.179999e-13,1.260222e-12


In [67]:
server.table.to_csv( "/home/yuliya/repos/cosybio/FedDEqMS/data/04_evaluation/subsampled/results.FedProt.tsv", sep="\t")
server.table.to_csv("//home/yuliya/repos/cosybio/FedDEqMS/data/04_evaluation_on_scap/subsampled/results.FedProt.tsv", sep="\t")