## Notebook to visualize colocalization results as a heatmap

In [None]:
!date

#### import libraries

In [None]:
from pandas import read_csv, concat, pivot
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
from seaborn import heatmap
from os.path import exists
from math import ceil
import numpy as np

%matplotlib inline
# for white background of figures (only for docs rendering)
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'

#### set notebook variables

In [None]:
# naming
cohort = 'foundin'

# directories
wrk_dir = '/home/gibbsr/working/foundin/foundin_qtl'
results_dir = f'{wrk_dir}/results'
figures_dir = f'{wrk_dir}/figures'

# out files
figure_file = f'{figures_dir}/{cohort}.colocalization.{dx}.png'
# rnab_features_file = ''
# pdui_features_file = 

# variables
DEBUG = True
days = ['da0', 'da25', 'da65']
modalities = ['ATAC', 'METH', 'PDUI', 'RNAB', 'RNAS', 'SCRN-DA', 'SCRN-ElC', 
              'SCRN-eNP', 'SCRN-iDA', 'SCRN-lNP', 'SCRN-NlC', 'SCRN-PFPP']
dx = 'PD'
dpi_value = 100

### for each day and modality load the colocalization results

In [None]:
coloc_df = None
for day in days:
    for modality in modalities:
        print(day, modality, end=':')
        in_file = f'{results_dir}/{cohort}_{day}_{modality}_{dx}.coloc.pp.csv'
        if exists(in_file):
            this_df = read_csv(in_file)
            print(f'loaded {this_df.shape[0]} results')
            # add day and modality
            this_df['day'] = day
            this_df['modality'] = modality
            coloc_df = concat([coloc_df, this_df])
print(f'\ntotal results loaded {coloc_df.shape[0]}')
if DEBUG:
    display(coloc_df.sample(5))
    display(coloc_df.day.value_counts())
    display(coloc_df.modality.value_counts())        

### reshape the dataframe from long to wide

In [None]:
# drop the cols we don't need
temp_df = coloc_df.drop(columns=['H0', 'H1', 'H2', 'H3', 'h4_supported'])
# sort not sure if this will help may have to set order in plotting explicityly
temp_df = temp_df.sort_values(by=['modality', 'day'])
# want day/modality combo's so combine
temp_df['day_modality'] = temp_df.day + '-' + temp_df.modality
# wcoloc_df = pivot(temp_df, index='feature', columns='day_modality', values='H4')
wcoloc_df = pivot(temp_df, index='feature', columns=['day', 'modality'], values='H4')
# set precision
wcoloc_df = wcoloc_df.round(2)
#replace all zeros with NaN values
wcoloc_df.replace(0, np.nan, inplace=True)
# drop rows that are all null
wcoloc_df.dropna(how='all', inplace=True)
print(f'shape of wide reformated results {wcoloc_df.shape}')
if DEBUG:
    display(wcoloc_df)

### visualize the reformated data as a heatmap

In [None]:

if wcoloc_df.shape[0] > 9:
    height = 9+ceil(wcoloc_df.shape[0]/6)
else:
    height = 9
print(dx, height)        
with rc_context({'figure.figsize': (9, height), 'figure.dpi': dpi_value}):
    plt.style.use('seaborn-bright')    
    heatmap(wcoloc_df, annot=True, linecolor='grey',
            annot_kws={"fontsize":10}, linewidths=0.05, cmap='Blues')    
    plt.title(f'Colocalization H4 for {dx} and QTL')
    plt.savefig(figure_file, dpi=dpi_value, bbox_inches='tight', 
                transparent=True, pad_inches=1)
    plt.show()

In [None]:
!date