In [1]:
import seaborn as sns
import pandas as pd
import os
import numpy as np
from openpyxl import load_workbook
import matplotlib.pyplot as plt
import matplotlib

In [2]:
from matplotlib import rcParams

custom_params = {"axes.spines.right": False, "axes.spines.top": False}
rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['font.sans-serif'] = ['Arial']
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
sns.set_theme(style="ticks", rc=custom_params)

In [3]:
os.chdir('/mnt/data/hong/2022/DHJ1_human_obesity_placenta/')

In [6]:
def plot_scores(sheetname):
    ## read in the data
    df = pd.read_excel('output/DEGs/final_negbinom_all/DEG_score_pvalues_shuffle_tail.xlsx', sheet_name=sheetname)
    ## filter out score == 0
    df = df[df['score'] != 0]
    ## sort the score
    df = df.sort_values('score', ascending=True).reset_index(drop=True)
    ## color bar by pvalue, continous variable
    df['color'] = np.where(df['p'] > 0.05, 'grey', np.where(df['convergence']=='high', '#C93235', '#33498C'))
    f, ax = plt.subplots(figsize=(1.2,1.8))
    points = ax.scatter(df.index, df['score'], c=df['color'], s=1)
    f.colorbar(points)
    plt.ylim(-1, 3.5)
    # plt.scatter(df.index, df['score'], c=df['color'])
    ## save the plot
    plt.savefig('figures/DEGs/score/' + sheetname + '_logp_v4.pdf')
    plt.close()

In [30]:
def plot_scores(cell_type, highlight):
    ## read in the data
    df = pd.read_csv(f'output/DEGs/final_negbinom_all/score_tsv/{cell_type}.tsv', sep='\t')
    ## filter out score == 0
    df = df[df['score'] != 0]
    ## sort the score
    df = df.sort_values('score', ascending=True).reset_index(drop=True)
    ## color bar by pvalue, continous variable
    df['color'] = np.where(df['p'] > 0.05, 'grey', np.where(df['convergence']=='high', '#C93235', '#33498C'))
    f, ax = plt.subplots(figsize=(1.2,1.8))
    points = ax.scatter(df.index, df['score'], c=df['color'], s=1)
    df_highlight = df.loc[df['Unnamed: 0'].isin(highlight)] ## the index not change after loc
    # add annotations one by one with a loop
    for line in range(0,df_highlight.shape[0]):
          i = df_highlight.index[line]
          # Add arrow-style labels using annotate()
      #     plt.text(
      #     i+0.2,
      #     df_highlight.loc[i, "score"],
      #     df_highlight.loc[i, "Unnamed: 0"],
      #     ha='left',
      #     weight='bold'
      #     )
          ax.annotate(df_highlight.loc[i, "Unnamed: 0"], (i, df_highlight.loc[i, "score"]), xytext=(5, -0.1),
                textcoords='offset points', arrowprops=dict(arrowstyle='<-', linewidth=2), style='italic', fontsize=2)
    plt.ylim(-1, 2)
    # plt.scatter(df.index, df['score'], c=df['color'])
    ## save the plot
    plt.savefig('figures/DEGs/score/' + cell_type + '_logp_v5.pdf')
    plt.close()

In [79]:
def plot_scores(df, highlight):
    ## read in the data
    
    ## filter out score == 0
    df = df[df['score'] != 0]
    ## sort the score
    df = df.sort_values('score', ascending=True).reset_index(drop=True)
    ## color bar by pvalue, continous variable
    df['color'] = np.where(df['p'] > 0.05, 'grey', np.where(df['convergence']=='high', '#C93235', '#33498C'))
    f, ax = plt.subplots(figsize=(1.2,1.8))
    points = ax.scatter(df.index, df['score'], c=df['color'], s=1)
    df_highlight = df.loc[df['mgi_symbol_comp1'].isin(highlight)]
    # add annotations one by one with a loop
    for line in range(0,df_highlight.shape[0]):
          i = df_highlight.index[line]
          # Add arrow-style labels using annotate()
      #     plt.text(
      #     i+0.2,
      #     df_highlight.loc[i, "score"],
      #     df_highlight.loc[i, "Unnamed: 0"],
      #     ha='left',
      #     weight='bold'
      #     )
          ax.annotate(df_highlight.loc[i, "mgi_symbol_comp1"], (i, df_highlight.loc[i, "score"]), xytext=(5, -0.1),
                textcoords='offset points', arrowprops=dict(arrowstyle='<-', linewidth=2), style='italic', fontsize=2)
    plt.ylim(-2.5, 6)
    # plt.scatter(df.index, df['score'], c=df['color'])
    ## save the plot
    plt.savefig('figures/DEGs/score/GSE_bottom6_norm.pdf')
    plt.close()

In [76]:
fname = '/mnt/data/hong/my_github/cscore/testdata/GSE237099_1_unloading_reloading_cscore_sign.txt'
df = pd.read_csv(fname, sep='\t')
bottom6 = df.sort_values('score', ascending=True).head(6)['mgi_symbol_comp1']

In [87]:
df_c.sort_values('score', ascending=True).head(6)

Unnamed: 0,Unnamed: 0_comp1,ensembl_gene_id,mgi_symbol_comp1,chromosome_name_comp1,start_position_comp1,end_position_comp1,strand_comp1,gene_biotype_comp1,baseMean_comp1,log2FoldChange_comp1,...,gene_biotype_comp2,baseMean_comp2,log2FoldChange_comp2,lfcSE_comp2,pvalue_comp2,padj_comp2,isTF_comp2,score,p,convergence
6146,6015,ENSMUSG00000028341,Nr4a3,4,48045153.0,48086447.0,1.0,protein_coding,159.7871,-0.086802,...,protein_coding,159.78706,3.013929,0.178051,0.0,0.0,Yes,-2.33937,0.00035,low
8345,11832,ENSMUSG00000032515,Csrnp1,9,119800229.0,119813724.0,-1.0,protein_coding,125.2781,-0.005366,...,protein_coding,125.278112,2.797598,0.208112,0.0,0.0,Yes,-2.064902,0.0003,low
13235,5680,ENSMUSG00000050064,Zfp697,3,98289777.0,98661128.0,1.0,protein_coding,44.07816,-0.078603,...,protein_coding,44.078157,2.497531,0.320141,0.0,3e-16,Yes,-1.850289,0.000675,low
95,11256,ENSMUSG00000000567,Sox9,11,112673050.0,112678586.0,1.0,protein_coding,46.81155,-0.011159,...,protein_coding,46.811548,2.159638,0.295305,0.0,1.9e-15,Yes,-1.483873,0.000725,low
55,7043,ENSMUSG00000000317,Bcl6b,11,70114954.0,70120624.0,-1.0,protein_coding,106.9957,-0.067172,...,protein_coding,106.995703,1.992955,0.243465,0.0,0.0,Yes,-1.377358,0.0011,low
10265,1945,ENSMUSG00000038418,Egr1,18,34992876.0,34998037.0,1.0,protein_coding,241.435274,-0.165115,...,protein_coding,241.435274,1.494251,0.58363,8e-06,6.235203e-05,Yes,-1.070816,0.001275,low


In [85]:
df_sig = df_c[df_c['p'] < 0.05]

In [86]:
df_sig['convergence'].value_counts()

high    191
low     148
Name: convergence, dtype: int64

In [77]:
df_c = df.dropna()

In [80]:
plot_scores(df_c, bottom6)

In [5]:
## apply the function to all sheets
for sheetname in sheetnames:
    try:
        plot_scores(sheetname)
    except:
        continue

In [6]:
plot_scores('STB_a', ['FN1', 'FBLN1'])

  plt.savefig('figures/DEGs/score/' + sheetname + '_logp_v5.pdf')


In [8]:
for sheetname in ['STB_a', 'STB_b', 'STB_c']:
    try:
        plot_scores(sheetname)
    except:
        continue

In [31]:
plot_scores('CTB', ['ITGB4', 'SLC38A1', 'DST', 'FOXO3', 'RBPJ'])