# Compare outcomes from differential analysis based on different imputation methods

- load scores based on `16_ald_diff_analysis`

In [None]:
from pathlib import Path

import pandas as pd
import seaborn as sns

import vaep
logger = vaep.logging.setup_nb_logger()

In [None]:
# catch passed parameters
args = None
args = dict(globals()).keys()

## Parameters

In [None]:
folder_experiment = "runs/appl_ald_data/proteinGroups"
model_key = 'vae'
target = 'kleiner'
out_folder='diff_analysis'

In [None]:
params = vaep.nb.get_params(args, globals=globals())
params

In [None]:
args = vaep.nb.Config()
args.folder_experiment = Path(params["folder_experiment"])
args = vaep.nb.add_default_paths(args, out_root=args.folder_experiment/params["out_folder"]/params["target"]/params["model_key"])
args.update_from_dict(params)
args

# Load scores 

In [None]:
[x for x in args.out_folder.iterdir() if 'scores' in str(x)]

In [None]:
fname = args.out_folder / f'diff_analysis_scores.pkl'
fname

In [None]:
scores = pd.read_pickle(fname)
scores

In [None]:
import omegaconf
models = vaep.nb.Config.from_dict(vaep.pandas.index_to_dict(scores.columns.levels[0]))
vars(models)

In [None]:
assert args.model_key in models.keys(), f"Missing model key which was expected: {args.model_key}"

In [None]:
scores.describe()

In [None]:
scores = scores.loc[pd.IndexSlice[:, args.target], :]
scores

In [None]:
scores.describe()

## Load frequencies of observed features

In [None]:
fname = args.folder_experiment / 'freq_features_observed.csv'
freq_feat = pd.read_csv(fname, index_col=0)
freq_feat

# Compare shared features

In [None]:
scores_common = scores.dropna().reset_index(-1, drop=True)
scores_common

In [None]:
def annotate_decision(scores, model):
    return scores[(model, 'rejected')].replace({False: f'{model} ->  no', True: f'{model} -> yes'})

annotations = None
for model, model_column in models.items():
    if not annotations is None:
        annotations += ' - '
        annotations += scores_common[(model_column, 'rejected')].replace({False: f'{model} ->  no', True: f'{model} -> yes'})
    else:
        annotations= scores_common[(model_column, 'rejected')].replace({False: f'{model} ->  no', True: f'{model} -> yes'})
annotations.name = 'Differential Analysis Comparison'
annotations.value_counts()

In [None]:
mask_different = ( (scores_common.loc[:, pd.IndexSlice[:, 'rejected']].any(axis=1)) & 
 ~(scores_common.loc[:, pd.IndexSlice[:, 'rejected']].all(axis=1))
)

scores_common.loc[mask_different]

In [None]:
fname = args.out_folder / f'diff_analysis_differences.xlsx'
scores_common.loc[mask_different].to_excel(fname)
fname

In [None]:
var = 'qvalue'
to_plot = [scores_common[v][var] for k,v in models.items()]
for s, k in zip(to_plot, models.keys()): s.name = k.replace('_', ' ') 
to_plot.append(freq_feat.loc[scores_common.index])
to_plot.append(annotations)
to_plot = pd.concat(to_plot, axis=1)
to_plot

## Differences plotted

- first only using created annotations

In [None]:
ax = sns.scatterplot(data=to_plot, x=to_plot.columns[0], y=to_plot.columns[1], hue='Differential Analysis Comparison')
fname = args.out_folder / f'diff_analysis_comparision_1_{args.model_key}'
fig = ax.get_figure()
vaep.savefig(fig, name = fname)

- showing how many features were measured ("observed")

In [None]:
ax = sns.scatterplot(data=to_plot, x=to_plot.columns[0], y=to_plot.columns[1],  size='frequency', hue='Differential Analysis Comparison')
fig = ax.get_figure()
fname = args.out_folder / f'diff_analysis_comparision_2_{args.model_key}'
vaep.savefig(fig, name=fname)

# Only features contained in model

In [None]:
scores_model_only = scores.reset_index(level=-1, drop=True)
scores_model_only = (scores_model_only
                     .loc[
                         scores_model_only.index.difference(scores_common.index),
                         args.model_key]
                     .sort_values(by='qvalue', ascending=True)
                     .join(freq_feat)
                     )
scores_model_only

In [None]:
scores_model_only.rejected.value_counts()

In [None]:
fname = args.out_folder / 'diff_analysis_only_model.xlsx'
scores_model_only.to_excel(fname)
fname

In [None]:
from IPython.display import IFrame
display(IFrame('https://www.uniprot.org/', width=900,height=500))

In [None]:
%%html
<iframe 
  style="transform-origin: 0px 0px 0px; transform: scale(1.5); width: 600px; height: 600px;" 
  src='https://diseases.jensenlab.org/Search'
  name="iFrame"
  scrolling="no">
</iframe>