In [None]:
import pandas as pd
from magine.data.experimental_data import ExperimentalData
import numpy as np
import seaborn as sns

In [None]:
import matplotlib

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['font.family'] = ['arial']

sns.set_theme(
    context='paper',
    palette="Paired",
    style='white',
    font='arial',
    # font_scale=1.0
)

## Merge differential results to single file

In [0]:
designs = ['D_only', 'G_Early', 'G_Late', 'GD_Early', 'GD_Late', 'GV_Early', 'GV_Late', 'GVD_Early', 'GVD_Late', ]
global_results, phospho_results = [], []
 
for i in designs:
    prot = pd.read_csv(f'diff_ex{i}-none_Parental.csv', delimiter=' ')
    prot['sample_id'] = i
    global_results.append(prot)
    phos = pd.read_csv(f'data/diff_ex_phospho_{i}-none_Parental.csv', delimiter=' ')
    phos['sample_id'] = i
    phospho_results.append(phos)


In [None]:
global_prot = pd.concat(global_results).reset_index()
global_prot.rename(columns={'index': 'identifier'}, inplace=True)
del global_prot['Var.1']

crit_1 = global_prot.logFC > 0
crit_2 = global_prot.logFC < 0
global_prot.loc[crit_1, 'fold_change'] = 2 ** global_prot.loc[crit_1]['logFC']
global_prot.loc[crit_2, 'fold_change'] = (-1 / (2 ** global_prot.loc[crit_2]['logFC']))

global_prot['significant'] = False

sig_crit = global_prot['adj.P.Val'] <= 0.05
fold_change_crit = np.abs(global_prot['fold_change']) >= 1.5
global_prot.loc[sig_crit & fold_change_crit, 'significant'] = True

global_prot['p_value'] = global_prot['adj.P.Val']
global_prot['species_type'] = 'protein'
global_prot['source'] = 'global_proteomics'
global_prot['label'] = global_prot['identifier'] + '_prot'
global_prot.to_csv('global_proteomics_for_magine.csv.gz', )

In [None]:
phospho_data = pd.concat(phospho_results).reset_index()
phospho_data.rename(columns={'index': 'gene_name'}, inplace=True)
del phospho_data['Var.1']
phospho_data['identifier'] = phospho_data.gene_name.str.split('-').str.get(0)
phospho_data.rename(columns={'gene_name': 'label'}, inplace=True)

crit_1 = phospho_data.logFC > 0
crit_2 = phospho_data.logFC < 0

phospho_data.loc[crit_1, 'fold_change'] = 2 ** phospho_data.loc[crit_1]['logFC']
phospho_data.loc[crit_2, 'fold_change'] = (-1 / (2 ** phospho_data.loc[crit_2]['logFC']))

phospho_data['significant'] = False

sig_crit = phospho_data['adj.P.Val'] <= 0.05
fold_change_crit = np.abs(phospho_data['fold_change']) >= 1.5
phospho_data.loc[sig_crit & fold_change_crit, 'significant'] = True

phospho_data['p_value'] = phospho_data['adj.P.Val']
phospho_data['species_type'] = 'protein'
phospho_data['source'] = 'phospho_proteomics'
phospho_data = phospho_data.loc[~phospho_data.label.str.contains('NULL'), :]
phospho_data.to_csv('phospho_proteomics_for_magine.csv.gz', )

In [None]:
merged = pd.concat([global_prot, phospho_data])
merged.to_csv('both_proteomics_for_magine.csv.gz', )

In [None]:
exp_data = ExperimentalData('both_proteomics_for_magine.csv.gz')