In [54]:
import pandas as pd
import numpy as np
import math

In [2]:
# Read in sample z-score data from 'data_mRNA_median_all_sample_Zscores.txt' into pandas dataframa
z_scores = pd.read_csv('../nbl_target_2018_pub/data_mRNA_median_all_sample_Zscores.txt',  sep='\t')

In [3]:
#This function stratified samples based on the expression of a gene of interest
#Inputs: gene of interest, lower z-score threshold, upper z-score threshold
#Outputs: 2-D array containing lists of samples in each stratified group
def stratifyDataOnGeneExpression(gene,lowerThresh,upperThresh):
    #Obtain the z-scores for the gene of interest
    gene_z_scores = z_scores.loc[z_scores['Hugo_Symbol']==gene].iloc[:,2:].transpose()
    #Find samples for which the z-scores are < lowerThresh
    lower_expression_samples = gene_z_scores.loc[gene_z_scores.iloc[:,0] < lowerThresh].index
    #Find samples for which the z-scores are > upperThresh
    higher_expression_samples = gene_z_scores.loc[gene_z_scores.iloc[:,0] > upperThresh].index
    return [lower_expression_samples, higher_expression_samples]

In [4]:
#Stratify data on ACE2 expression
ACE2_groups = stratifyDataOnGeneExpression('ACE2',-0.5,0.5)

# High ACE2 Expression

In [118]:
#Specify samples with high ACE2 expression (z-score > 0.5) and those without high ACE2 expression
high_ACE2_columns = ACE2_groups[1].tolist()
high_ACE2_df = z_scores[high_ACE2_columns]

remaining_columns = set(z_scores.columns)-set(ACE2_groups[1])-set(['Hugo_Symbol','Entrez_Gene_Id'])
remaining_df = z_scores[remaining_columns]

In [122]:
#Find genes up and downregulated by high ACE2 expression
genes_upregulated_by_high_ACE2 = pd.DataFrame(columns=['Hugo_Symbol','Entrez_Gene_Id','High ACE2 mean z-score','Remaining mean z-score'])
genes_downregulated_by_high_ACE2 = pd.DataFrame(columns=['Hugo_Symbol','Entrez_Gend_Id','High ACE2 mean z-score','Remaining mean z-score'])

upregulated_index = 0
downregulated_index = 0
for i in z_scores.index:
    high_ACE2_mean = np.mean(high_ACE2_df.iloc[i,2:].values)
    remaining_mean = np.mean(remaining_df.iloc[i,2:].values)
    
    if(high_ACE2_mean - remaining_mean > 0.5):
        genes_upregulated_by_high_ACE2.loc[upregulated_index] = [z_scores.loc[i,'Hugo_Symbol'],z_scores.loc[i,'Entrez_Gene_Id'],high_ACE2_mean,remaining_mean]
        upregulated_index += 1
    elif(high_ACE2_mean - remaining_mean < -0.5):
        genes_downregulated_by_high_ACE2.loc[downregulated_index] = [z_scores.loc[i,'Hugo_Symbol'],z_scores.loc[i,'Entrez_Gene_Id'],high_ACE2_mean,remaining_mean]
        downregulated_index += 1






In [126]:
genes_upregulated_by_high_ACE2['z-score difference'] = genes_upregulated_by_high_ACE2['High ACE2 mean z-score'] - genes_upregulated_by_high_ACE2['Remaining mean z-score']
genes_downregulated_by_high_ACE2['z-score difference'] = genes_downregulated_by_high_ACE2['High ACE2 mean z-score'] - genes_downregulated_by_high_ACE2['Remaining mean z-score']




In [127]:
genes_upregulated_by_high_ACE2

Unnamed: 0,Hugo_Symbol,Entrez_Gene_Id,High ACE2 mean z-score,Remaining mean z-score,z-score difference
0,A1CF,29974.0,0.485934,-0.195613,0.681547
1,A2ML1,144568.0,0.537275,-0.217111,0.754386
2,AADAC,13.0,0.406575,-0.167360,0.573934
3,ABAT,18.0,0.520000,-0.201443,0.721443
4,ABCA13,154664.0,0.432939,-0.170161,0.603100
...,...,...,...,...,...
1373,ZNF843,283933.0,0.372792,-0.133518,0.506309
1374,ZNF865,100507290.0,0.384254,-0.169682,0.553936
1375,ZNRF4,148066.0,0.391754,-0.153451,0.545204
1376,ZSCAN4,201516.0,0.432361,-0.169713,0.602073


In [128]:
genes_downregulated_by_high_ACE2

Unnamed: 0,Hugo_Symbol,Entrez_Gend_Id,High ACE2 mean z-score,Remaining mean z-score,z-score difference
0,ACAP2,23527.0,-0.446462,0.187310,-0.633772
1,ACTR8,93973.0,-0.382458,0.157426,-0.539884
2,ADAM1,8759.0,-0.425646,0.176382,-0.602028
3,ADNP,23394.0,-0.383234,0.160827,-0.544061
4,AGAP10,653234.0,-0.545049,0.205333,-0.750382
...,...,...,...,...,...
778,ZNF789,285989.0,-0.371610,0.140479,-0.512089
779,ZNF891,101060200.0,-0.454892,0.182181,-0.637073
780,ZNF91,7644.0,-0.417620,0.167111,-0.584731
781,ZRANB1,54764.0,-0.359052,0.157879,-0.516931


In [129]:
genes_upregulated_by_high_ACE2.to_csv('../DEG_Files/genes_upregulated_by_high_ACE2.csv',index=False)
genes_downregulated_by_high_ACE2.to_csv('../DEG_Files/genes_downregulated_by_high_ACE2.csv',index=False)

In [135]:
genes_upregulated_by_high_ACE2_sorted = genes_upregulated_by_high_ACE2.sort_values('z-score difference',ascending=False)
genes_downregulated_by_high_ACE2_sorted = genes_downregulated_by_high_ACE2.sort_values('z-score difference',ascending=True)



In [136]:
genes_upregulated_by_high_ACE2_sorted

Unnamed: 0,Hugo_Symbol,Entrez_Gene_Id,High ACE2 mean z-score,Remaining mean z-score,z-score difference
15,ACE2,59272.0,1.203037,-0.494899,1.697935
440,FAM55A,120400.0,0.672941,-0.271577,0.944518
845,MIA2,4253.0,0.627072,-0.255607,0.882679
252,CCDC83,220047.0,0.617203,-0.240202,0.857405
263,CDY1,9085.0,0.613451,-0.242729,0.856180
...,...,...,...,...,...
1299,TUBAL3,79861.0,0.361608,-0.138829,0.500438
86,ASPG,374569.0,0.357418,-0.142875,0.500294
412,EXOC3L2,90332.0,0.360593,-0.139668,0.500261
1016,PDZK1P1,100034743.0,0.358272,-0.141968,0.500240


In [137]:
genes_downregulated_by_high_ACE2_sorted

Unnamed: 0,Hugo_Symbol,Entrez_Gend_Id,High ACE2 mean z-score,Remaining mean z-score,z-score difference
336,LOC100294337,,-0.630866,0.261594,-0.892460
341,LOC100505643,,-0.611014,0.248523,-0.859537
386,LOC440434,440434.0,-0.584577,0.237829,-0.822406
294,LOC100128729,,-0.580315,0.230749,-0.811064
408,LOC731275,,-0.577283,0.231608,-0.808891
...,...,...,...,...,...
463,NBPF14,25832.0,-0.360215,0.140967,-0.501182
70,C1orf112,55732.0,-0.360315,0.140475,-0.500791
398,LOC727820,,-0.349455,0.150952,-0.500407
525,PPHLN1,51535.0,-0.369003,0.131397,-0.500400


In [138]:
genes_upregulated_by_high_ACE2_sorted.to_csv('../DEG_Files/genes_upregulated_by_high_ACE2_sorted.csv',index=False)
genes_downregulated_by_high_ACE2_sorted.to_csv('../DEG_Files/genes_downregulated_by_high_ACE2_sorted.csv',index=False)

# Low ACE2 Expression

In [141]:
#Specify samples with low ACE2 expression (z-score < -0.5) and those without low ACE2 expression
low_ACE2_columns = ACE2_groups[0].tolist()
low_ACE2_df = z_scores[low_ACE2_columns]

remaining_columns = set(z_scores.columns)-set(ACE2_groups[0])-set(['Hugo_Symbol','Entrez_Gene_Id'])
remaining_df = z_scores[remaining_columns]

In [144]:
#Find genes up and downregulated by low ACE2 expression
genes_upregulated_by_low_ACE2 = pd.DataFrame(columns=['Hugo_Symbol','Entrez_Gene_Id','Low ACE2 mean z-score','Remaining mean z-score'])
genes_downregulated_by_low_ACE2 = pd.DataFrame(columns=['Hugo_Symbol','Entrez_Gend_Id','Low ACE2 mean z-score','Remaining mean z-score'])

upregulated_index = 0
downregulated_index = 0
for i in z_scores.index:
    low_ACE2_mean = np.mean(low_ACE2_df.iloc[i,2:].values)
    remaining_mean = np.mean(remaining_df.iloc[i,2:].values)
    
    if(low_ACE2_mean - remaining_mean > 0.5):
        genes_upregulated_by_low_ACE2.loc[upregulated_index] = [z_scores.loc[i,'Hugo_Symbol'],z_scores.loc[i,'Entrez_Gene_Id'],low_ACE2_mean,remaining_mean]
        upregulated_index += 1
    elif(low_ACE2_mean - remaining_mean < -0.5):
        genes_downregulated_by_low_ACE2.loc[downregulated_index] = [z_scores.loc[i,'Hugo_Symbol'],z_scores.loc[i,'Entrez_Gene_Id'],low_ACE2_mean,remaining_mean]
        downregulated_index += 1






In [146]:
genes_upregulated_by_low_ACE2['z-score difference'] = genes_upregulated_by_low_ACE2['Low ACE2 mean z-score'] - genes_upregulated_by_low_ACE2['Remaining mean z-score']
genes_downregulated_by_low_ACE2['z-score difference'] = genes_downregulated_by_low_ACE2['Low ACE2 mean z-score'] - genes_downregulated_by_low_ACE2['Remaining mean z-score']




In [147]:
genes_upregulated_by_low_ACE2

Unnamed: 0,Hugo_Symbol,Entrez_Gene_Id,Low ACE2 mean z-score,Remaining mean z-score,z-score difference
0,AADAT,51166.0,0.360045,-0.169104,0.529149
1,AAK1,22848.0,0.350744,-0.171976,0.522720
2,ACAD9,28976.0,0.379556,-0.166367,0.545923
3,ACAP2,23527.0,0.429532,-0.191846,0.621379
4,ADNP,23394.0,0.391404,-0.177438,0.568841
...,...,...,...,...,...
709,ZNF789,285989.0,0.449149,-0.198733,0.647882
710,ZNF880,400713.0,0.357909,-0.158606,0.516515
711,ZNF891,101060200.0,0.379804,-0.177792,0.557596
712,ZNF91,7644.0,0.478578,-0.211955,0.690533


In [148]:
genes_downregulated_by_low_ACE2

Unnamed: 0,Hugo_Symbol,Entrez_Gend_Id,Low ACE2 mean z-score,Remaining mean z-score,z-score difference
0,A1CF,29974.0,-0.396208,0.183862,-0.580070
1,A2ML1,144568.0,-0.490884,0.221014,-0.711899
2,AADACL4,343066.0,-0.365843,0.161221,-0.527064
3,AANAT,15.0,-0.348266,0.163640,-0.511907
4,ABAT,18.0,-0.390551,0.181142,-0.571693
...,...,...,...,...,...
1809,ZNHIT2,741.0,-0.344481,0.164622,-0.509103
1810,ZP1,22917.0,-0.482875,0.224227,-0.707102
1811,ZP2,7783.0,-0.396016,0.164484,-0.560500
1812,ZPBP2,124626.0,-0.403186,0.191413,-0.594599


In [149]:
genes_upregulated_by_low_ACE2.to_csv('../DEG_Files/genes_upregulated_by_low_ACE2.csv',index=False)
genes_downregulated_by_low_ACE2.to_csv('../DEG_Files/genes_downregulated_by_low_ACE2.csv',index=False)