## Notebook for plotting feature specific feature QTL results

In [None]:
!date

#### import libraries

In [None]:
from pandas import read_parquet, DataFrame
import numpy as np
from seaborn import relplot, lmplot
import statsmodels.formula.api as smf

import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context

%matplotlib inline
# for white background of figures (only for docs rendering)
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'

#### set notebook variables

In [None]:
# parameters
day = 'daNA'
modality = 'DAn-meta'
feature = 'CCAR2'
feature_name = 'CCAR2'

In [None]:
# naming
cohort = 'foundin'
dx = 'PD'
set_name = f'{cohort}_{day}_{modality}'

# directories
wrk_dir = '/home/jupyter/foundin_qtl'
results_dir = f'{wrk_dir}/results'
figures_dir = f'{wrk_dir}/figures'

# input files
coloc_casuals_files = f'{results_dir}/{set_name}_{dx}.casuals.pp.parquet'

# output files
figure_file = f'{figures_dir}/{set_name}.{feature_name}.png'

# variables
DEBUG = False
dpi_value = 100

### load input data

In [None]:
results_df = read_parquet(coloc_casuals_files)
print(f'full results shape {results_df.shape}')
if DEBUG:
    display(results_df.head())

#### subset results for specified feature

In [None]:
if 'MarkerName' in results_df.columns:
    feature_results_df = results_df.loc[results_df.MarkerName.str.startswith(f'{feature}:')]
elif 'trait' in results_df.columns:
    feature_results_df = results_df.loc[results_df.trait == feature]
    # rename p-values columns for plotting later
    feature_results_df = feature_results_df.rename(columns={'p_value': 'p_value_risk', 'pvalue': 'p_value_qtl'})
else:
    print('results type for QTL is not tensorQTL or Metal+')
print(f'{feature} results shape {feature_results_df.shape}')
# get region
chrom = feature_results_df.chromosome[0]
start = feature_results_df.base_pair_location.min()
end = feature_results_df.base_pair_location.max()
print(f'feature {feature} region {chrom} from {start} to {end}')
if DEBUG:
    display(feature_results_df.head())

In [None]:
print(results_df.columns.values)

#### now do the plotting

In [None]:
from seaborn import scatterplot
#plot local manhattan for feature qtl
def plot_manhattan(df: DataFrame, name: str, chrom: str, signal: str):
    with rc_context({'figure.figsize': (12, 9), 'figure.dpi': dpi_value}):
        plt.style.use('seaborn-bright') 
        scatterplot(x='base_pair_location',y=signal, data=df)        

        plt.title(f'{name} qtl', fontsize='large') 
        plt.xlabel(f'Bp on chromosome {chrom}')
        plt.ylabel(f'logABF')    
        plt.show()

In [None]:
plot_manhattan(feature_results_df, feature_name, chrom, 'logABF_qtl')

In [None]:
plot_manhattan(feature_results_df, feature_name, chrom, 'logABF_risk')

In [None]:
#plot local manhattan for feature QTL
def plot_gwas_qtl(df: DataFrame, name: str, chrom: str):
    #now actually do the plotting
    with rc_context({'figure.figsize': (12, 9), 'figure.dpi': dpi_value}):
        plt.style.use('seaborn-bright') 
        relplot(x=np.log10(df['p_value_risk'])*-1, 
                y=np.log10(df['p_value_qtl'])*-1, 
                alpha=.5, data=df)         

        plt.title(f'PD risk and {name} QTL', fontsize='large')
        plt.xlabel('PD GWAS risk -log10(p-value)')
        plt.ylabel(f'{name} QTL -log10(p-value)')
        plt.show()

        relplot(x='logABF_risk', 
                    y='logABF_qtl', 
                    alpha=.5, data=df)         

        plt.title(f'PD risk and {name} QTL', fontsize='large')
        plt.xlabel('PD GWAS logABF')
        plt.ylabel(f'{name} QTL logABF')
        plt.show() 

        lmplot(x='logABF_risk', 
                y='logABF_qtl', data=df)
        plt.title(f'PD risk and {name} QTL', fontsize='large')
        plt.xlabel('PD GWAS logABF')
        plt.ylabel(f'{name} QTL logABF')
        plt.savefig(figure_file, dpi=dpi_value, bbox_inches='tight', 
                    transparent=True, pad_inches=1)        
        plt.show()        
        return

In [None]:
plot_gwas_qtl(feature_results_df, feature_name, chrom)

In [None]:
this_formula = 'p_value_risk ~ p_value_qtl'
reg_model = smf.ols(this_formula, data=feature_results_df).fit()
print(reg_model.summary())

In [None]:
this_formula = 'logABF_risk ~ logABF_qtl'
reg_model = smf.ols(this_formula, data=feature_results_df).fit()
print(reg_model.summary())

In [None]:
!date