In [3]:
import pandas as pd

df = pd.read_excel('proteomics_evs.xlsx', 'dBTEV to dBTWCL')

In [4]:
gene_cols = ['Accession', 'Description', 'Gene symbol']
# comparison 1
dBT_WCL_cols = ['dBT_114_WCL', 'dBT_116_WCL', 'dBT_120_WCL']
dBT_EV_cols = ['dBT_114_EV', 'dBT_116_EV', 'dBT_120_EV']
# comparsion 2
astro_WCL_cols = ['Astro_1_WCL', 'Astro_2_WCL', 'Astro_3_WCL']
astro_EV_cols = ['Astro_1_EV', 'Astro_2_EV', 'Astro_3_EV']



In [5]:
df2 = df.loc[:, gene_cols].copy()

In [6]:
df.loc[:, dBT_EV_cols].mean(axis=1)

0       0.784298
1       3.767199
2       2.320155
3       0.333763
4      -0.106535
          ...   
3208   -4.798293
3209   -4.766052
3210   -4.744745
3211   -4.640128
3212   -4.814932
Length: 3213, dtype: float64

In [7]:
df2['dBTEV-WCL'] = df.loc[:, dBT_EV_cols].mean(axis=1) - df.loc[:, dBT_WCL_cols].mean(axis=1)
df2['astroEV-WCL'] = df.loc[:, astro_EV_cols].mean(axis=1) - df.loc[:, astro_WCL_cols].mean(axis=1)

In [8]:
df2.head()

Unnamed: 0,Accession,Description,Gene symbol,dBTEV-WCL,astroEV-WCL
0,O94955,Rho-related BTB domain-containing protein 3,RHOBTB3,5.314422,0.137433
1,P16035,Metalloproteinase inhibitor 2,TIMP2,7.80396,2.849515
2,P28072,Proteasome subunit beta type-6,PSMB6,6.663266,2.705477
3,Q9BTY2,Plasma alpha-L-fucosidase,FUCA2,4.870684,-1.358395
4,P61966,AP-1 complex subunit sigma-1A,AP1S1,4.428192,-0.261821


In [9]:
df.loc[0, dBT_EV_cols].to_list()

[0.9163991606180169, 0.6126083238001812, 0.8238857528745136]

In [10]:
from scipy.stats import ttest_ind
import pandas as pd
import numpy as np

def calc_ttest(series1: pd.Series, series2: pd.Series) -> float:
  _, p_value = ttest_ind(series1.to_list(), series2.to_list(), equal_var=False)
  return p_value

df2['dBTEV-WCL_ttest'] = -np.log2(df.apply(lambda row: calc_ttest(row[dBT_EV_cols], row[dBT_WCL_cols]), axis=1))
df2['astroEV-WCL_ttest'] = -np.log2(df.apply(lambda row: calc_ttest(row[astro_EV_cols], row[astro_WCL_cols]), axis=1))

In [11]:
df2['dBTEV-WCL_score'] = df2['dBTEV-WCL'] * df2['dBTEV-WCL_ttest']
df2['astroEV-WCL_score'] = df2['astroEV-WCL'] * df2['astroEV-WCL_ttest']

In [12]:
display(df2.head())
print(df2.shape)

Unnamed: 0,Accession,Description,Gene symbol,dBTEV-WCL,astroEV-WCL,dBTEV-WCL_ttest,astroEV-WCL_ttest,dBTEV-WCL_score,astroEV-WCL_score
0,O94955,Rho-related BTB domain-containing protein 3,RHOBTB3,5.314422,0.137433,19.0547,0.080988,101.264721,0.011131
1,P16035,Metalloproteinase inhibitor 2,TIMP2,7.80396,2.849515,11.711644,2.965486,91.397203,8.450196
2,P28072,Proteasome subunit beta type-6,PSMB6,6.663266,2.705477,11.525392,4.743221,76.796758,12.832676
3,Q9BTY2,Plasma alpha-L-fucosidase,FUCA2,4.870684,-1.358395,15.485417,1.452529,75.424575,-1.973109
4,P61966,AP-1 complex subunit sigma-1A,AP1S1,4.428192,-0.261821,16.92898,6.757104,74.964771,-1.769152


(3213, 9)


In [13]:
df3 = df2.loc[(df2['dBTEV-WCL'] > 0) & (df2['astroEV-WCL'] > 0)].copy()

In [14]:
df3['dBTEV-WCL-astroEV-WCL_score'] = df3['dBTEV-WCL_score'] - df3['astroEV-WCL_score']

In [15]:
df3 = df3.sort_values(by='dBTEV-WCL-astroEV-WCL_score', ascending=False)

In [16]:
df3.head()

Unnamed: 0,Accession,Description,Gene symbol,dBTEV-WCL,astroEV-WCL,dBTEV-WCL_ttest,astroEV-WCL_ttest,dBTEV-WCL_score,astroEV-WCL_score,dBTEV-WCL-astroEV-WCL_score
0,O94955,Rho-related BTB domain-containing protein 3,RHOBTB3,5.314422,0.137433,19.0547,0.080988,101.264721,0.011131,101.25359
1,P16035,Metalloproteinase inhibitor 2,TIMP2,7.80396,2.849515,11.711644,2.965486,91.397203,8.450196,82.947008
2,P28072,Proteasome subunit beta type-6,PSMB6,6.663266,2.705477,11.525392,4.743221,76.796758,12.832676,63.964082
5,Q92692,Nectin-2,NECTIN2,3.89905,0.172347,16.179525,0.251158,63.08478,0.043286,63.041494
6,Q08431,Lactadherin,MFGE8,4.95777,0.031628,12.250265,0.040876,60.733991,0.001293,60.732698


In [51]:
df3.to_excel('proteomics_processeed.xlsx', index=False)