# Fig 2B: Proportion of non-linearities of GDSC-to-PDXE consensus features
This notebook supports Fig_2B of the manuscript.

In [None]:
# All list of import is in module_import.py
from module_import import *
from compute_proportion import compute_proportion

In [None]:
# All data settings are in data_settings.py
from data_settings import *

## Read data

In [None]:
data_df = read_data(tissues=tissues,
                    data_types=[e for e in data_types],
                    projects=projects,
                    data_sources=data_sources,
                    folder_basis='../data/')

source_data_key, target_data_key = reformat_df(data_df, source, target)

In [None]:
# Library size normalization
average_depth_global = 10**5
for ds in list(data_df.keys()):
    GE_normalized = library_size_normalization.TMM_normalization(data_df[ds].values.astype(float))
    GE_normalized = np.array(GE_normalized)
    average_depths = np.mean(np.sum(GE_normalized,1))
    GE_normalized = GE_normalized / average_depths * average_depth_global
    GE_normalized = np.log(np.array(GE_normalized)+1)
    data_df[ds] = pd.DataFrame(GE_normalized,
                               columns=data_df[ds].columns,
                               index=data_df[ds].index)

In [None]:
# Normalization
with_mean = True
with_std = True

normalized_data_df = {
    ds : StandardScaler(with_mean=with_mean, with_std=with_std).fit_transform(data_df[ds])
    for ds in data_df
}

## Computing contribution of each component

In [None]:
number_pc = {
    'source': 70,
    'target': 50
}

n_pv = 20

In [None]:
gamma_values = np.logspace(-6,-2,20,base=10)

contribution = {gamma:compute_proportion(gamma,
                                         number_pc,
                                         n_pv,
                                         normalized_data_df,
                                         source_data_key,
                                         target_data_key) for gamma in gamma_values}
clf = {gamma:contribution[gamma][0] for gamma in gamma_values}
contribution = {gamma:contribution[gamma][1] for gamma in gamma_values}

## Plot

In [None]:
# Format the results
features = ['offset', 'linear', 'interaction']
global_contribution_source = {}
global_contribution_target = {}
global_contribution_consensus = {}

for gamma in gamma_values:
    contribution_source = {f:contribution[gamma][f]['source'] for f in features}
    global_contribution_source[gamma] = {f:np.mean(contribution_source[f]) for f in contribution_source}

    contribution_target = {f:contribution[gamma][f]['target'] for f in features}
    global_contribution_target[gamma] = {f:np.mean(contribution_target[f]) for f in contribution_target}

    contribution_consensus = {f:contribution[gamma][f]['consensus'] for f in features}
    global_contribution_consensus[gamma] = {f:np.mean(contribution_consensus[f]) for f in contribution_consensus}

global_contribution_consensus_df = pd.DataFrame(global_contribution_consensus).T
global_contribution_consensus_df['higher order'] = (1 - np.sum(global_contribution_consensus_df, axis=1)).values

In [None]:
xticks_lines = [
    10**(-5),
    10**(-4.5),
    10**(-4),
    10**(-3.5),
    10**(-3),
    10**(-2.5),
    10**(-2),
]

xticks_lines_labels = [
    '\n%s'%(f'{x:.0E}')
    for i, x in enumerate(xticks_lines)
]

In [None]:
# xticks = np.logspace(-5, -2, 4)
xticks = np.array(xticks_lines)
yticks = np.linspace(0,1,6)
yticks_labels = ['%s%%'%(int(100*(y))) for y in yticks]

plt.figure(figsize=(17,8.2))

plt.stackplot(global_contribution_consensus_df.index.astype(float),
              global_contribution_consensus_df.values.T, 
              labels=global_contribution_consensus_df.columns,alpha=0.75)

for i, (x, x_lab) in enumerate(zip(xticks_lines, xticks_lines_labels)):
    plt.vlines(x, 0, 1, linewidth=3)

plt.legend(loc='upper left', bbox_to_anchor=(1.05, 1), fontsize=15, ncol=2)
plt.xscale('log')
plt.xticks(0.8*xticks, xticks_lines_labels, fontsize=30, rotation='vertical', color='black')
plt.grid(True)
plt.yticks(yticks, yticks_labels, fontsize=25)
plt.xlim(min(xticks)*.3, xticks[-1]*1.2)
plt.ylim(0,1.01)
plt.ylabel('Geometric proportion \n of linear and non-linear terms \n in consensus features',
           fontsize=30,
           color='black')
plt.xlabel('$\gamma$', fontsize=35, color='black')
plt.tight_layout()

plt.savefig('figures/stacked_figure_contribution_annotated_n_pv_%s.png'%(n_pv), dpi=300)
plt.show()