# Processed data

The properties of the coombined processed data.

In [1]:
## logging functions
import logging
## data functions
import numpy as np
## system functions
from os.path import dirname
from os.path import splitext
import sys
## visualization functions
import matplotlib.pyplot as plt
## visualization functions from roux
from roux.viz.io import begin_plot
## data functions from roux
import roux.lib.dfs as rd # attributes
sys.path.append('..')

In [2]:
## parameters
metadata_path='../config/metadata.yaml'
kernel=None
force=False
test=True

In [3]:
## inferred parameters
metadata=read_metadata(metadata_path,inputs=None if not test else {'version':{'number':'test'}},)
metadata['dataset']=read_metadata(metadata['dataset_config_path'],config_base=dict(species_name=metadata['species_name'],path=metadata['dataset_path'],),)
### output
output_dir_path=metadata['processed']['merged']
logging.info(f"Output directory: {output_dir_path}")
## backup old files if overwriting (force is True)
if force: backup(output_dir_path,dirname(output_dir_path),test=not force,)
## misc.
if kernel is None:
    kernel=metadata['kernels']['default']

## Merging the analysed data

In [4]:
%reset_selective -f "^df.*"

In [5]:
from roux.workflow.task import run_task
run_task(
    parameters=dict(
        input_path=metadata['merged']['input']['redistribution'],
        output_path=metadata['merged']['pairs'],
        abundance_change_path=metadata['merged']['input']['abundance_change'],
        relocalization_paths=metadata['merged']['input']['relocalization'],
        genes_path=metadata['ids']['genes'],
    ),
    kernel='yeast',
    input_notebook_path='51_script_merge_analyses.ipynb',
    force=force,
)

## Plots

### Genes classification stats

In [6]:
output_plots_dir_path=splitext(metadata['merged']['genes'])[0]+'_plots/'

In [7]:
df01=read_table(metadata['merged']['genes'])
df01.head(1)

#### Relative abundance change

In [8]:
data=df01.loc[:,['gene symbol','protein abundance change']].copy()

In [9]:
from roux.viz.colors import saturate_color,to_hex
kws_plot=dict(
    pie=dict(
        colors=list({
             'ns':metadata['colors']['none'],
             'decrease':to_hex(saturate_color(metadata['colors']['dependency'],alpha=0.05)),
             'increase':to_hex(saturate_color(metadata['colors']['compensation'],alpha=0.05)),
            }.values()),
        ),
)

In [10]:
begin_plot()
fig,ax=plt.subplots(figsize=[2.3,2.3])
ds_=data['protein abundance change'].value_counts().sort_index(ascending=False)
ds_=ds_.loc[['ns','decrease','increase']]
pie=ds_.plot(
    kind='pie',
    autopct=lambda p: '{:.0f}%({:.0f})'.format(p,(p/100)*ds_.sum()), 
    explode=(0.1,0,0),
    pctdistance=0.8, 
    labeldistance=1.25,
    startangle=90,
    ax=ax,
    **kws_plot['pie'],    
)
ax.axis('off')
first=True
for child in pie.get_children():
    if isinstance(child,plt.Text):
        if child.get_text()=='decrease':
            child.set_x(child.get_position()[0]+0.25)    
            child.set_y(child.get_position()[1]-0.2)    
        elif child.get_text()=='9%(15)' and first:
            child.set_y(child.get_position()[1]-0.25)    
            first=False

ax.set_title('Individual paralogs',
             y=1,
            )
to_plot(
    plotp=f"{output_plots_dir_path}/relative_abundance_change.png",
    data=data,
    kws_plot=kws_plot,
    validate=True,
)

#### Redistribution

In [11]:
data=df01.loc[:,['gene symbol','redistribution']].copy()

In [12]:
kws_plot=dict(
    pie=dict(
        colors=[metadata['colors']['none'],metadata['colors']['redistribution']],
            ),
)

In [13]:
begin_plot()
fig,ax=plt.subplots(figsize=[2,2])
ds_=data['redistribution'].value_counts().sort_index(ascending=False)
ds_.index=ds_.index.map({True:'redistribution',False: 'non\nredistribution'})
ds_.sort_values(ascending=False).plot(
    kind='pie',
    autopct=lambda p: '{:.1f}%\n({:.0f})'.format(p,(p/100)*ds_.sum()), 
    explode=(0,0.1),
    labeldistance=1.25,
    startangle=90,
    ax=ax,
    **kws_plot['pie'],
    )
ax.axis('off')
ax.set_title('Individual paralogs',
             y=1,
            )
to_plot(
    plotp=f"{output_plots_dir_path}/redistribution.png",
    data=data,
    kws_plot=kws_plot,
    validate=True,
)

#### Overlap of relative abundance change with redistribution

In [14]:
data=df01.loc[:,['gene symbol','protein abundance change','redistribution']].copy()

In [15]:
from roux.viz.colors import saturate_color,to_hex
kws_plot=dict(
    venn=dict(
        set_colors=(
         metadata['colors']['compensation'],
         metadata['colors']['dependency'],
         metadata['colors']['redistribution']
        ),
    ),
    abundance_change_classes=metadata['abundance_change']['classes'],
    colors=metadata['colors'],
)

In [16]:
begin_plot()
fig,ax=plt.subplots(figsize=[3,3])
d_=data.loc[data['protein abundance change'].isin(['decrease', 'increase']),:].rd.to_dict(['protein abundance change','gene symbol'])
d_={"Abundance\n"+kws_plot['abundance_change_classes'][k]: v for k,v in d_.items()}
d_['Redistribution']=data.loc[data['redistribution'],'gene symbol'].tolist()

d_={k:list(v) for k,v in d_.items()}
from roux.lib.df import to_map_binary, dict2df
df_=to_map_binary(dict2df(d_).explode('value'),colgroupby='key',colvalue='value')
ds_=df_.groupby(df_.columns.tolist()).size()

logging.info(ds_)
from roux.viz.sets import plot_venn
plot_venn(
    ds_.copy(),figsize=[3.5,3.5],
    ax=ax,
    **kws_plot['venn'],
    )

## adjust positions
import matplotlib
text=list(filter( lambda y : y.get_text().startswith('Redistr'), filter(lambda x: isinstance(x,matplotlib.text.Text),ax.get_children())))[0]
text.set_y(0.75)
text.set_va('top')
## recolor
text.set_color(kws_plot['colors']['redistribution'])
for k in kws_plot['abundance_change_classes'].values():
    text=list(filter( lambda y : y.get_text().startswith(f'Abundance\n{k}'), filter(lambda x: isinstance(x,matplotlib.text.Text),ax.get_children())))[0]
    ## recolor
    text.set_color(kws_plot['colors'][k])
to_plot(
    plotp=f"{output_plots_dir_path}/venn_relative_abundance_change_redistribution.png",
    data=data,
    kws_plot=kws_plot,
    validate=True,
)

#### Relocalized genes

In [17]:
df1=(df01
    .loc[:,[
        "gene symbol",
        'redistribution','protein abundance change',
        "relocalization type","relocalization notes",
    ]]
    .log.dropna(subset=["relocalization type"])
)
assert df1['redistribution'].all(), 'found relocalized genes that are not redistributed.'
df1.head(1)

In [18]:
abundance_change2color={
    'decrease':"#99E5EB",
    'ns':"#99D8C7",
    'increase':"#D4DFB9",}
from roux.viz.colors import saturate_color,to_hex
abundance_change2color={k:to_hex(saturate_color(v,alpha=1.5)) for k,v in abundance_change2color.items()}
abundance_change2color

In [19]:
from roux.viz.colors import mix_colors
df2=(df1
    .sort_values('gene symbol')
    .assign(
        **{
            'gene symbol': lambda df: df['gene symbol'].str.capitalize()+'p',#' $\\rightarrow$ '),
            'relocalization notes': lambda df: df['relocalization notes'].apply(lambda x: x.split('(')[0]).str.replace(' to ','\n↓\n'),#' $\\rightarrow$ '),
            'x': lambda df : df['protein abundance change'].map({'increase':0,'ns':0.5,'decrease':1,}),
            'abundance compensation/dependency': lambda df : df['protein abundance change'].map({'decrease':'dependency','ns':np.nan,'increase':'compensation'}),
            'color': lambda df : df['relocalization type'].map(metadata['colors']),
          }
    )
)
df2.head(1)

In [20]:
kws_plot={'colors':{'abundance_change2color':abundance_change2color}}

In [21]:
for label,data in dict(tuple(df2.groupby('relocalization type'))).items():
    genes_per_category_max=data.groupby(['relocalization type','protein abundance change'])['gene symbol'].size().max()
    kws_plot['ax_set_title']=dict(label=f"Relocalization {label} ({data['gene symbol'].nunique()})",#label.capitalize(),
                              color=metadata['colors'][label],
                              loc='left',
                       )
    data=data.assign(
            **{'y': lambda df : df.groupby('x')['gene symbol'].transform('rank').apply(lambda x: (genes_per_category_max-x)/3)},
        )
    
    begin_plot()
    fig,ax=plt.subplots()
    data.apply(lambda x: ax.text(x=x['x'],y=x['y'],s=x['gene symbol'],color=x['color'],ha='center',va='bottom',
                               size=10,clip_on=False,
                               ),axis=1)
    data.apply(lambda x: ax.text(x=x['x'],y=x['y'],s=x['relocalization notes'],color='k',ha='center',va='top',
                               fontfamily='dejavusans',
                                size=8,clip_on=False,
                                linespacing=0.8,
                               ),axis=1)
    pad=0.24
    ax.axvspan(xmin=-1*pad,xmax=pad,color=kws_plot['colors']['abundance_change2color']['increase'],alpha=0.5,zorder=0,lw=0)
    ax.axvspan(xmin=pad,xmax=1-pad,color=kws_plot['colors']['abundance_change2color']['ns'],alpha=0.5,zorder=0,lw=0)
    ax.axvspan(xmin=1-pad,xmax=1+pad,color=kws_plot['colors']['abundance_change2color']['decrease'],alpha=0.5,zorder=0,lw=0)
    ax.set(xlim=(-1*pad,1+pad),
           ylim=(data['y'].min()-(pad*0.8),
                 data['y'].max()+(pad*0.5)),
          )
    ax.set_title(**kws_plot['ax_set_title'])
    ax.axis(False)
    to_plot(
       plotp=f"{output_plots_dir_path}/relocalized_genes_{label}",# filename
       data=data, #source data
       kws_plot=kws_plot,# plotting parameters 
        validate=True,
        )

### Pair-wise

In [4]:
input_path=metadata['merged']['pairs']
output_plots_dir_path=splitext(input_path)[0]+'_plots/'

In [5]:
df01=read_table(metadata['merged']['pairs'])
df01.head(1)

#### Relative abundance change

In [6]:
data=df01.loc[:,['pairs','protein abundance change paired']].copy()

In [10]:
from roux.viz.colors import to_hex,saturate_color

In [11]:
kws_plot=dict(
    pie=dict(),
    colors={
        'ns':metadata['colors']['none'],
        'both increase':to_hex(saturate_color(metadata['colors']['compensation'],alpha=0.05)),
        'both decrease':to_hex(saturate_color(metadata['colors']['dependency'],alpha=0.05)),
    },
    )

In [14]:
begin_plot()
fig,ax=plt.subplots(figsize=[1.5,1.5])
from roux.viz.colors import to_hex,saturate_color
colors=kws_plot['colors']
from roux.viz.colors import saturate_color,mix_colors
ds_=data['protein abundance change paired'].value_counts().sort_index(ascending=False)
ds_=ds_.rename(index={'increase':'both increase','decrease':'both decrease'})
ds_=ds_.loc[[
         'ns', 
         'both decrease',
         'decrease & ns',
         'decrease & increase', 
         'increase & ns',
         'both increase', 
        ]]
pie=ds_.plot(kind='pie',
        autopct=lambda p: '{:.0f}%({:.0f})'.format(p,(p/100)*ds_.sum()), 
        explode=(0.1,0,0,0,0,0),
        pctdistance=0.85, 
        labeldistance=1.4,
        startangle=90,
         colors=[
                 # mix_colors([get_colors_default()[0],get_colors_default()[2]]),
                 colors['ns'],
                 saturate_color(color=colors['both decrease'],alpha=1.2),
                 saturate_color(color=colors['both decrease'],alpha=0.4),
                 saturate_color(mix_colors([
                                      colors['both decrease'],
                                      colors['both increase'],
                                ]),alpha=0.7),
                 saturate_color(color=colors['both increase'],alpha=0.8),
                 saturate_color(color=colors['both increase'],alpha=1.2),
         ],
        ax=ax)
ax.axis('off')

for child in pie.get_children():
    if isinstance(child,plt.Text):
        if child.get_text()=='1%(1)':
            child.set_y(child.get_position()[1]+0.25)
        elif child.get_text()=='both increase':
            child.set_x(child.get_position()[0]-0.5)
            child.set_y(child.get_position()[1]+0.1)
        elif child.get_text()=='increase & ns':
            child.set_y(child.get_position()[1]-0.1)
        elif child.get_text()=='decrease & increase':
            child.set_y(child.get_position()[1]-0.1)
ax.set_title('Paralog pairs',
             y=1,
            )
to_plot(
    plotp=f"{output_plots_dir_path}/protein_abundance_change.png",
    data=data,
    kws_plot=kws_plot,
    validate=True,
)

In [13]:
f"{output_plots_dir_path}/protein_abundance_change.png"

#### Redistribution

In [27]:
data=df01.loc[:,['pairs','redistribution']].copy()

In [28]:
kws_plot=dict(
    pie=dict(
        colors=[metadata['colors']['none'],metadata['colors']['redistribution']],
            ),
)

In [29]:
begin_plot()
fig,ax=plt.subplots(figsize=[2,2])
ds_=data['redistribution'].value_counts().sort_index(ascending=False)
out=ds_.plot(
    kind='pie',
    autopct=lambda p: '{:.1f}%\n({:.0f})'.format(p,(p/100)*ds_.sum()), 
    pctdistance=0.9, 
    labeldistance=1.45,
    explode=(0.1,0,0),
    startangle=90,
    **kws_plot['pie'],    
    ax=ax,
)
# print(out)
ax.axis('off')
ax.set(title='pairs')
ax.set_title('Paralog pairs',
             y=1.2,
            )
to_plot(
    plotp=f"{output_plots_dir_path}/redistribution.png",
    data=data,
    kws_plot=kws_plot,
    validate=test,
)