In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from matplotlib.patches import Patch

In [None]:
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['font.family'] = ['arial']
# matplotlib.rcParams['font.size'] = 6

sns.set_theme(
    context ='paper',
    palette="Paired", 
    style='white',
    font='arial',
    # font_scale=1.0
)

## Gobal proteomics

In [None]:
# dec only
dec_only_global_id = 'exp23_global_raw_intensity_crosstab.txt'
dec_meta_id = 'exp23_global_raw_metadata.txt'

dec_only_meta = pd.read_csv(dec_meta_id, delimiter='\t')
do_table = pd.read_csv(dec_only_global_id, delimiter='\t')

In [None]:
dec_only_meta

In [None]:

# gv, gvd, gd samples
exp_18_meta_id = 'exp18_global_raw_metadata 1.txt'
exp_18_id = 'exp18_global_raw_intensity_crosstab 1.txt'

exp_18_meta = pd.read_csv(exp_18_meta_id, delimiter='\t')
exp_18 = pd.read_csv(exp_18_id, delimiter='\t')
exp_18.shape

In [None]:
exp_18_meta['TS_'] = exp_18_meta.Treatment + '_' +exp_18_meta.State + '-'+ exp_18_meta.Plex.astype(str)
# make a dictionary to rename columns of table to be the same as meta
exp_18_meta.reset_index(names='name_index',inplace=True)
exp_18_meta.dropna(subset=['Ligand'], inplace=True)
col_dict = exp_18_meta[['name_index', 'TS_']].set_index('name_index').to_dict()['TS_']

exp_18 = exp_18[col_dict.keys()]
exp_18.rename(columns=col_dict, inplace=True)

In [None]:
exp_18.head()

In [None]:
# median center the data
exp_18_for_plot = exp_18.copy()
exp_18_for_plot.columns = [f'{i}_{j}' for i,j in enumerate(exp_18.columns)]

exp_18_for_plot -= exp_18_for_plot.median()

In [None]:
exp_18_for_plot

In [None]:
f, ax = plt.subplots(figsize=(11, 6))
g = sns.violinplot(data=exp_18_for_plot, bw_adjust=.5, cut=1, linewidth=1, palette="Set3")

# Finalize the figure
ax.set(ylim=(-11., 7.5))
ax.xaxis.set_tick_params(rotation=90)
sns.despine(left=True, bottom=True)

In [None]:
dec_only_meta.reset_index(names='name_index',inplace=True)

dec_only_meta.loc[dec_only_meta.sample_name.str.startswith('L'), 'TS_'] = 'GD_late'
dec_only_meta.loc[dec_only_meta.sample_name.str.startswith('molm14'), 'TS_'] = 'none_Parental'
dec_only_meta.loc[dec_only_meta.sample_name.str.startswith('molm/dec'), 'TS_'] = 'D_only'


do_table=do_table[[f'sample_{i}' for i in range(1,17)]]
col_dict2 = dec_only_meta[['name_index', 'TS_']].set_index('name_index').to_dict()['TS_']
do_table.rename(columns=col_dict2, inplace=True)

In [None]:
do_table

In [None]:
do_table_for_plot = do_table.copy()
do_table_for_plot.columns = [f'00{i}_{j}-4' for i,j in enumerate(do_table.columns)]
do_table_for_plot -= do_table_for_plot.median()


In [None]:
f, ax = plt.subplots(figsize=(11, 6))
sns.violinplot(data=do_table_for_plot, bw_adjust=.5, cut=1, linewidth=1, palette="Set3",
               order =sorted(do_table_for_plot.columns) )

# Finalize the figure
ax.set(ylim=(-11., 7.5))
ax.xaxis.set_tick_params(rotation=90)
sns.despine(left=True, bottom=True)

In [None]:
merged = pd.concat([do_table_for_plot, exp_18_for_plot ], axis=1)
merged = merged[[i for i in merged.columns if not 'Spontaneous' in i]]
merged.head()

In [None]:
merged.to_csv('merged_data.csv')

In [None]:
merged_no_na = merged.fillna(0, axis=0)
merged_no_na -= merged_no_na.median()

In [None]:
styles_18 = pd.DataFrame()

styles_18['name'] = ['_'.join(i.split('_')[1:]) for i in exp_18_for_plot.columns if 'Spontaneous' not in i]
styles_18['name'] = [i.split('-')[0] for i in styles_18['name']]
styles_18['batch'] = [i.split('-')[1] for i in exp_18_for_plot.columns if "Spontaneous" not in i]
styles_18['state'] = [i.split('_')[-1] for i in styles_18['name']]
styles_18['drug'] = [i.split('_')[0] for i in styles_18['name']]
# styles['state'] = ['early' if 'Early' in i else 'late' for i in styles['name']]
styles_18.loc[styles_18.state == 'only', 'state'] = 'late'
styles_18.loc[styles_18.state == 'Early', 'state'] = 'early'
styles_18.loc[styles_18.state == 'Late', 'state'] = 'late'
styles_18.loc[styles_18.state == 'Parental', 'state'] = 'parental'
styles_18.loc[styles_18.name == 'Parental', 'state'] = 'parental'
styles_18.loc[styles_18.name == 'parental', 'drug'] = 'none'
styles_18.loc[len(do_table_for_plot.columns):, 'experiment'] = 'exp_18'

In [None]:
styles = pd.DataFrame()

styles['name'] = ['_'.join(i.split('_')[1:]) for i in merged.columns]
styles['name'] = [i.split('-')[0] for i in styles['name']]
styles.name = styles.name.str.replace('late', 'Late')
styles['batch'] = [i.split('-')[1] for i in merged.columns]
styles['state'] = [i.split('_')[-1] for i in styles['name']]
styles['drug'] = [i.split('_')[0] for i in styles['name']]
# styles['state'] = ['early' if 'Early' in i else 'late' for i in styles['name']]
styles.loc[styles.state == 'only', 'state'] = 'late'
styles.loc[styles.state == 'Early', 'state'] = 'early'
styles.loc[styles.state == 'Late', 'state'] = 'late'
styles.loc[styles.state == 'Parental', 'state'] = 'parental'
styles.loc[styles.name == 'Parental', 'state'] = 'parental'
styles.loc[styles.name == 'parental', 'drug'] = 'none'
styles.loc[:len(do_table_for_plot.columns), 'experiment'] = 'exp_23'
styles.loc[len(do_table_for_plot.columns):, 'experiment'] = 'exp_18'

# styles['batch'] = batch
# styles= styles.loc[styles.state=='Spontaneous']
styles.to_csv('data/meta.csv', index=False)

In [None]:
styles_18 = styles.loc[styles.experiment == 'exp_18']
dec_only_styles = styles.loc[styles.experiment == 'exp_23']

In [None]:

from combat import pycombat    

In [None]:
df_corrected_drug_mod = pycombat.pycombat(
    merged_no_na, 
    batch=list(styles.batch), 
    mod=list(styles.drug)
)

In [None]:
f, ax = plt.subplots(figsize=(13, 6))
sns.violinplot(data=df_corrected_drug_mod, bw_adjust=.5, cut=1, linewidth=1, palette="Set3")

# Finalize the figure
ax.set(ylim=(-11., 7.5))
ax.xaxis.set_tick_params(rotation=90)
sns.despine(left=True, bottom=True)
plt.savefig('violin_plot_median_centered.png', bbox_inches='tight')

In [None]:
df_corrected_no_mod = pycombat.pycombat(
    merged_no_na, list(styles.batch), 
)

In [None]:
exp_18_for_plot = exp_18_for_plot[[i for i in exp_18_for_plot.columns if 'Spontaneous' not in i]].fillna(0, axis=0)

exp_18_corrected = pycombat.pycombat(
    exp_18_for_plot,
    batch=list(styles_18.batch),
    mod=list(styles_18.drug)
)

In [None]:
two_step_merge = pd.concat([do_table_for_plot, exp_18_corrected ], axis=1).fillna(0, axis=0)
two_step_merge_df = pycombat.pycombat(
    two_step_merge, list(styles.experiment), list(styles.drug)
)

In [None]:
def create_pca(df, save_name, title, styles=styles):
    X = df.values.T
    pca = PCA(n_components=2)
    X_r = pca.fit(X).transform(X)
    current_styles = styles.copy()
    current_styles['x'] = X_r[:, 0]
    current_styles['y'] = X_r[:, 1]
    f, (ax1, ax2)= plt.subplots(2,1, sharex=True, figsize=(2,4))
    ax = sns.scatterplot(
    ax=ax1,
    data=current_styles, x='x', y='y', hue='drug', hue_order=['none', 'D', 'G', 'GV','GD', 'GVD'],
    # style='ligand', style_order=['FLT3', 'FGF2', 'none'],
    style='state', style_order=['early', 'late', 'parental'],
    palette='Set2'
    )
    # Shrink current axis by 20%
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    
    # Put a legend to the right of the current axis
    ax.legend(loc='center left', bbox_to_anchor=(1.05, 0.5))
    # plt.title("PCA of global proteomics")
    ax1.set_xlabel(f'PCA 1 variance = {pca.explained_variance_ratio_[0]*100:.2f} %')
    ax1.set_ylabel(f'PCA 2 variance = {pca.explained_variance_ratio_[1]*100:.2f} %')
    
    ax = sns.scatterplot(
        ax=ax2,
        data=current_styles, x='x', y='y', #hue='drug', hue_order=['none', 'D', 'G', 'GV','GD', 'GVD'],
        # style='ligand', style_order=['FLT3', 'FGF2', 'none'],
        hue='experiment',# style_order=['early', 'late', 'parental'],
        palette='Dark2'
    )
    # Shrink current axis by 20%
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    
    # Put a legend to the right of the current axis
    ax.legend(loc='center left', bbox_to_anchor=(1.05, 0.5))
    plt.suptitle(f"PCA of {title}")
    plt.xlabel(f'PCA 1 variance = {pca.explained_variance_ratio_[0]*100:.2f} %')
    plt.ylabel(f'PCA 2 variance = {pca.explained_variance_ratio_[1]*100:.2f} %')
    plt.savefig(f'pca_{save_name}.png', bbox_inches='tight', dpi=300)
    plt.savefig(f'pca_{save_name}.pdf', bbox_inches='tight', dpi=300)

In [None]:
create_pca(merged_no_na, 'uncorrected', 'uncorrected')

In [None]:
create_pca(exp_18_for_plot, 'exp18', 'exp18', styles=styles_18)
create_pca(exp_18_corrected, 'exp18', 'exp18', styles=styles_18)

In [None]:
create_pca(do_table_for_plot, 'dec_samples', 'dec_samples', styles=dec_only_styles)

In [None]:
create_pca(two_step_merge_df, 'corrected_two_step_merge_df', 'corrected_two_step_merge_df')

In [None]:
create_pca(df_corrected_drug_mod, 'corrected_drug_mod', 'corrected_drug_mod')

In [None]:
create_pca(df_corrected_no_mod, 'corrected_no_mod', 'corrected_no_mod')

In [None]:
#create a color palette with the same number of colors as unique values in the Source column
network_pal = sns.color_palette('Set3', len(styles.drug.unique()), as_cmap=False)
# network_pal = ['green', 'red', 'blue', 'pink', 'orange', 'yellow']
#Create a dictionary where the key is the category and the values are the
#colors from the palette we just created
network_lut = dict(zip(styles.drug.unique(), network_pal))


#map the colors to the series. Now we have a list of colors the same
#length as our dataframe, where unique values are mapped to the same color
network_colors = pd.Series(styles.drug).map(network_lut)
network_colors.index = merged_no_na.columns

#create a color palette with the same number of colors as unique values in the Source column
network_pal2 = sns.color_palette('Dark2', len(styles.experiment.unique()), as_cmap=False)
# network_pal = ['green', 'red', 'blue', 'pink', 'orange', 'yellow']
#Create a dictionary where the key is the category and the values are the
#colors from the palette we just created
network_lut2 = dict(zip(styles.experiment.unique(), network_pal2))


#map the colors to the series. Now we have a list of colors the same
#length as our dataframe, where unique values are mapped to the same color
network_colors2 = pd.Series(styles.experiment).map(network_lut2)
network_colors2.index = merged_no_na.columns
colors = pd.DataFrame([network_colors2, network_colors])

In [None]:
sns.clustermap(
    two_step_merge_df,
    xticklabels=False, 
    yticklabels=False, 
    row_cluster=True,
    cmap='coolwarm',
    z_score=0, 
    col_colors=colors.T,
    method='ward',
    figsize=(8,8)
)

handles = [Patch(facecolor=network_lut[name]) for name in network_lut]
legend1= plt.legend(handles, network_lut, title='Drug',
           bbox_to_anchor=(1.01, .7), bbox_transform=plt.gcf().transFigure, loc='upper right')

handles2 = [Patch(facecolor=network_lut2[name]) for name in network_lut2]
legend2 = plt.legend(
    handles2, 
    ['18', '23'],
    title='Experiment',
    bbox_to_anchor=(1., 1.),
    bbox_transform=plt.gcf().transFigure,
    loc='upper right'
)
plt.gca().add_artist(legend1)
plt.gca().add_artist(legend2)
# plt.tight_layout()
plt.savefig('heatmap_corrected_clustered.png', bbox_inches='tight', dpi=300, bbox_extra_artists=(legend1, legend2))
plt.savefig('heatmap_corrected_clustered.pdf', bbox_inches='tight', dpi=300, bbox_extra_artists=(legend1, legend2))

In [None]:
sns.clustermap(
    df_corrected_no_mod,
    xticklabels=True, 
    yticklabels=False, 
    row_cluster=True,
    cmap='coolwarm',
    z_score=0, 
    col_colors=colors.T,
    method='ward',
    figsize=(8, 8)
)

handles = [Patch(facecolor=network_lut[name]) for name in network_lut]
plt.legend(handles, network_lut, title='Drug',
           bbox_to_anchor=(1, 1), bbox_transform=plt.gcf().transFigure, loc='upper right')
# 
# handles2 = [Patch(facecolor=network_lut2[name]) for name in network_lut2]
# plt.legend(handles2, network_lut2, title='Exp',
#            bbox_to_anchor=(1, 1), bbox_transform=plt.gcf().transFigure, loc='upper right')

# plt.savefig('corrected_clustered_no_mod1.png', bbox_inches='tight')

In [None]:
df_corrected_drug_mod.to_csv('global_proteomics_matrix.csv')

### Load in diff. expression 

In [None]:
import pandas as pd

## Phosphoproteomics

In [None]:
dec_only_phospho_id = 'data/exp23_phospho_raw_intensity_crosstab.txt'
dec_phospho_meta_id = 'data/exp23_phospho_raw_metadata.txt'

do_phospho_meta = pd.read_csv(dec_phospho_meta_id, delimiter='\t')
do_phospho_table = pd.read_csv(dec_only_phospho_id, delimiter='\t')


do_phospho_meta.reset_index(names='name_index',inplace=True)
do_phospho_meta.loc[do_phospho_meta.sample_name.str.startswith('L'), 'TS_'] = 'GD_late'
do_phospho_meta.loc[do_phospho_meta.sample_name.str.startswith('molm14'), 'TS_'] = 'none_Parental'
do_phospho_meta.loc[do_phospho_meta.sample_name.str.startswith('molm/dec'), 'TS_'] = 'D_only'
col_dict2 = do_phospho_meta[['name_index', 'TS_']].set_index('name_index').to_dict()['TS_']

do_phospho_table.rename(columns=col_dict2, inplace=True)
do_phospho_table.columns = [f'{i}_{j}' for i,j in enumerate(do_phospho_table.columns)]

In [None]:
# gv, gvd, gd samples
exp_18_meta_id_phospho = 'data/exp18_phospho_raw_metadata.txt'
exp_18_id_phospho = 'data/exp18_phospho_raw_intensity_crosstab.txt'

exp_18_meta_phospho = pd.read_csv(exp_18_meta_id_phospho, delimiter='\t')
exp_18_phospho = pd.read_csv(exp_18_id_phospho, delimiter='\t')

exp_18_meta_phospho['TS_'] = exp_18_meta_phospho.Treatment + '_' +exp_18_meta_phospho.State + '-'+ exp_18_meta_phospho.Plex.astype(str)
# make a dictionary to rename columns of table to be the same as meta
exp_18_meta_phospho.reset_index(names='name_index',inplace=True)
exp_18_meta_phospho.dropna(subset=['Ligand'], inplace=True)

col_dict = exp_18_meta_phospho[['name_index', 'TS_']].set_index('name_index').to_dict()['TS_']

exp_18_phospho = exp_18_phospho[col_dict.keys()].copy()
exp_18_phospho.rename(columns=col_dict, inplace=True)
exp_18_phospho.columns = [f'{i}_{j}' for i,j in enumerate(exp_18_phospho.columns)]

In [None]:
exp_18_phospho

In [None]:
exp_18_phospho

In [None]:
cols = [i for i in exp_18_phospho.columns if 'Spontaneous' not in i]
len(cols)

In [None]:
e18_phospho = exp_18_phospho[cols].copy()
e18_phospho -= e18_phospho.median()
e18_phospho

In [None]:

e18_phospho_corrected = pycombat.pycombat(
    e18_phospho,
    batch=list(styles_18.batch),
    mod=list(styles_18.drug)
)

In [None]:
do_phospho_table_for_plot = do_phospho_table.copy()
do_phospho_table_for_plot -= do_phospho_table_for_plot.median()

two_step_merge_phopsho = pd.concat([do_phospho_table_for_plot, e18_phospho_corrected ], axis=1)
print(two_step_merge_phopsho.shape)
two_step_merge_phopsho.dropna(thresh=29, axis=0, inplace=True)
print(two_step_merge_phopsho.shape)

two_step_merge_phospho_df = pycombat.pycombat(
    two_step_merge_phopsho.fillna(0),
    batch=list(styles.experiment),
    mod=list(styles.drug)
)

In [None]:


one_step_merge_phopsho = pd.concat([do_phospho_table, e18_phospho ], axis=1)
print(one_step_merge_phopsho.shape)
one_step_merge_phopsho.dropna(thresh=29, axis=0, inplace=True)
print(one_step_merge_phopsho.shape)

one_step_merge_phopsho = one_step_merge_phopsho.copy()
one_step_merge_phopsho -= one_step_merge_phopsho.median()
one_step_merge_phopsho = one_step_merge_phopsho.fillna(0)

one_step_merge_phospho_df = pycombat.pycombat(
    one_step_merge_phopsho,
    batch=list(styles.batch),
    mod=list(styles.drug)
)

In [0]:
# save results used
one_step_merge_phopsho.to_csv('data/phospho_proteomics_matrix.csv')

In [None]:
create_pca(do_phospho_table, 'phosphoproteomics after batch corrected', 'one_step_phospho', styles=dec_only_styles)

In [None]:
create_pca(e18_phospho_corrected, 'phosphoproteomics after batch corrected', 'one_step_phospho', styles=styles_18)

In [None]:
create_pca(one_step_merge_phospho_df, 'phosphoproteomics after batch corrected', 'one_step_phospho')

In [None]:
create_pca(two_step_merge_phospho_df, 'phosphoproteomics after batch corrected', 'phospho_corrected_two_step_merge_df')

In [None]:
#create a color palette with the same number of colors as unique values in the Source column
network_pal = sns.color_palette('Set3', len(styles.drug.unique()), as_cmap=False)
# network_pal = ['green', 'red', 'blue', 'pink', 'orange', 'yellow']
#Create a dictionary where the key is the category and the values are the
#colors from the palette we just created
network_lut = dict(zip(styles.drug.unique(), network_pal))


#map the colors to the series. Now we have a list of colors the same
#length as our dataframe, where unique values are mapped to the same color
network_colors = pd.Series(styles.drug).map(network_lut)
network_colors.index = two_step_merge.columns

#create a color palette with the same number of colors as unique values in the Source column
network_pal2 = sns.color_palette('Dark2', len(styles.experiment.unique()), as_cmap=False)
# network_pal = ['green', 'red', 'blue', 'pink', 'orange', 'yellow']
#Create a dictionary where the key is the category and the values are the
#colors from the palette we just created
network_lut2 = dict(zip(styles.experiment.unique(), network_pal2))


#map the colors to the series. Now we have a list of colors the same
#length as our dataframe, where unique values are mapped to the same color
network_colors2 = pd.Series(styles.experiment).map(network_lut2)
network_colors2.index = two_step_merge.columns
colors = pd.DataFrame([network_colors2, network_colors])

In [None]:
sns.clustermap(
    two_step_merge_df,
    xticklabels=False, 
    yticklabels=False, 
    row_cluster=True,
    cmap='coolwarm',
    z_score=0, 
    col_colors=colors.T,
    method='ward',
    figsize=(6,6)
)

handles = [Patch(facecolor=network_lut[name]) for name in network_lut]
legend1= plt.legend(handles, network_lut, title='Drug',
           bbox_to_anchor=(1.01, .7), bbox_transform=plt.gcf().transFigure, loc='upper right')

handles2 = [Patch(facecolor=network_lut2[name]) for name in network_lut2]
legend2 = plt.legend(
    handles2, 
    ['18', '23'],
    title='Experiment',
    bbox_to_anchor=(1., 1.),
    bbox_transform=plt.gcf().transFigure,
    loc='upper right'
)
plt.gca().add_artist(legend1)
plt.gca().add_artist(legend2)
# plt.tight_layout()
plt.savefig('heatmap_phospho_corrected_clustered.png', bbox_inches='tight', dpi=300, bbox_extra_artists=(legend1, legend2))