# Explanatory features

Analysis of the explanatory features including network features.

In [1]:
## logging functions
from icecream import ic as info
import logging
## system functions
from os.path import dirname
from os.path import splitext
import sys
## visualization functions
import matplotlib.pyplot as plt
import seaborn as sns
## visualization functions from roux
from roux.viz.io import begin_plot
## data functions from roux
import roux.lib.dfs as rd # attributes
sys.path.append('..')

In [2]:
## parameters
metadata_path='../config/metadata.yaml'
force=False
test=True

In [3]:
## inferred parameters
metadata=read_metadata(metadata_path,inputs=None if not test else {'version':{'number':'test'}},)
metadata['dataset']=read_metadata(metadata['dataset_config_path'],config_base=dict(species_name=metadata['species_name'],path=metadata['dataset_path'],),)
### output
output_dir_path=metadata['processed']['predictors']
logging.info(f"Output directory: {output_dir_path}")
## backup old files if overwriting (force is True)
if force: backup(output_dir_path,dirname(output_dir_path),test=not force,)

## Paired features

In [7]:
%reset_selective -f "^df.*"
id_type='pairs'
df01=(
    read_table(metadata['merged'][id_type])
    .merge(
        right=read_table(metadata['predictors']['mapped']['predictors'][id_type]),
        how='left',
        on='pairs',
        validate="1:1",
    )
    )
df01.head(1)

In [8]:
_output_plots_dir_path=splitext(metadata['predictors']['processed']['predictors'][id_type])[0]+'_plots/'
_output_plots_dir_path

### Functional similarity

#### Shortest path length

In [13]:
df01.filter(like='shortest path ').columns.tolist()

In [14]:
for col,xlabel in zip(
    ['GIs shortest path length','PPIs shortest path length'],
    ['Genetic\ninteractions','Protein-protein\ninteractions'],
    ):
    kws_plot=dict(
        dists=dict(
            x= col,
            y= 'redistribution score min',
            order=['1','>1'],
            colindex= 'genes id',
            ),
        ax_set=dict(
            title='Shortest path length',
            xlabel=xlabel,
            ylabel='redistribution score',            
                   ),
        )
    
    data=(df01
        .log.dropna(subset=[kws_plot['dists']['x']],)
        
        )
    ## plot
    begin_plot()
    fig, ax=plt.subplots(1,1)
    from roux.viz.dist import plot_dists
    ax=plot_dists(
        data,
        kind=['box','strip'],
        ax=ax,
        **kws_plot['dists'],
    )
    ## formatting
    ax.set(**kws_plot['ax_set'])
    
    to_plot(
       ax,
       prefix=_output_plots_dir_path,# filename
       data=data, #source data
       kws_plot=kws_plot,# plotting parameters
       )    

#### Shared interactors, applicable to PPIs

In [15]:
for col,xlabel in zip(
    [
        "PPIs interactors intersection",
        ],
    [
        'Protein-protein\ninteractions'
    ],
    ):
    kws_plot=dict(
        dists=dict(
            x= col,
            y= 'redistribution score min',
            order=['low','high'],
            colindex= 'genes id',
            ),
        ax_set=dict(
            title='interactors sharing',
            xlabel=xlabel,
            ylabel='redistribution score',            
                   ),
        )
    
    data=(df01
        .log.dropna(subset=[kws_plot['dists']['x']],)                
        )
    ## plot
    begin_plot()
    fig, ax=plt.subplots(1,1)
    from roux.viz.dist import plot_dists
    ax=plot_dists(
        data,
        kind=['box','strip'],
        ax=ax,
        **kws_plot['dists'],
    )
    ## formatting
    ax.set(**kws_plot['ax_set'])
    to_plot(
       ax,
       prefix=_output_plots_dir_path,# filename
       data=data, #source data
       kws_plot=kws_plot,# plotting parameters
        validate=test,
       )    

## Gene features

In [4]:
%reset_selective -f "^df.*"
id_type='genes'
df01=(
    read_table(metadata['merged'][id_type])
    .merge(
        right=read_table(metadata['predictors']['mapped']['predictors'][id_type]).drop(['gene symbol'],axis=1),
        how='left',
        on=['gene id'],
        validate="1:1",
    )
    )
df01.head(1)

In [5]:
assert any((df01['gene symbol']=='CUE1') & (df01['gene id']=='YMR264W')), df01.query(expr="`gene id`=='YMR264W'").filter(like='gene')

In [6]:
_output_plots_dir_path=splitext(metadata['predictors']['processed']['predictors'][id_type])[0]+'_plots/'
_output_plots_dir_path

#### Functional redundancy: Negative GIs

In [19]:
data=df01.copy()
info(data['negative interactions'].value_counts())
kws_plot=dict(
    dists=dict(
        x='negative interactions',
        y= 'redistribution score',
        order=['no','yes'],
        colindex= 'gene id',
    ),
    ax_set=dict(
    )
    )

## plot
begin_plot()
fig, ax=plt.subplots(1,1)
from roux.viz.dist import plot_dists
ax=plot_dists(
    data,
    kind=['box','strip'],
    ax=ax,
    **kws_plot['dists'],
)
## formatting
ax.set(**kws_plot['ax_set'])

to_plot(
   ax,
   prefix=_output_plots_dir_path,
   data=data, #source data
   kws_plot=kws_plot,# plotting parameters
    validate=test,
   )

#### Trigenic interaction fraction class

In [20]:
for x,order in zip(['Trigenic interaction\nfraction'],
                  [['Low','High']]):
    kws_plot=dict(
        dists=dict(
            x=x,
            y= 'redistribution score',
            order=order,
            colindex= 'gene id',
        ),
        ax_set=dict(
            xlabel=x,#None,
        )
        )
    data=df01.dropna(subset=[x]).copy()
    
    ## plot
    begin_plot()
    fig, ax=plt.subplots(1,1)
    from roux.viz.dist import plot_dists
    ax=plot_dists(
        data,
        kind=['box','strip'],
        ax=ax,
        **kws_plot['dists'],
    )
    ## formatting
    ax.set(**kws_plot['ax_set'])

    to_plot(
       ax,
       prefix=_output_plots_dir_path,# filename
       data=data, #source data
       kws_plot=kws_plot,# plotting parameters
        validate=test,
       )

### Co-localization of private interactors with the partner

In [10]:
for k in ['GIs','PPIs']:
    kws_plot=dict(
        crosstab=dict(
            cols=[
                'redistribution',
                f'locations shared with private {k} interactors(>=50%)',
            ],
            ),
        ax_set=dict(
            title=k.split('\n')[0],
        ),
        ax_legend=dict(
            title='Co-localization with private interactors',
        ),
    )
    data=(
        df01
        .loc[:,
             [
            'gene id',
            'gene symbol',
        ]+kws_plot['crosstab']['cols']]
        .dropna()
        )
    ## plot
    begin_plot()
    fig, ax=plt.subplots(1,1)
    from roux.viz.sets import plot_intersection_counts
    plot_intersection_counts(data,**kws_plot['crosstab'],kind='bar',
                             order_x=[True,False],
                             ax=ax)
    ax.set(xlabel='paralogs',ylim=ax.get_ylim()[::-1])
    ax.legend(
        ncol=2,
        loc='upper center',
        bbox_to_anchor=[0.5,-0.2],
        **kws_plot['ax_legend'],
        )
    sns.despine(trim=False,ax=ax)    
    to_plot(
       ax,
       prefix=f'{_output_plots_dir_path}',# filename
       data=data, #source data
       kws_plot=kws_plot,# plotting parameters
       validate=test,
       )