## Notebook to visualize omics tracks for GBA, SNCA, and LRRK2 for the FOUNDIN-PD Epigenetics project

will start with single figure with localized tracks for each regions including GWAS, gene annotations, 'risk' ATAC peaks, <i>cis</i> correlations between the gene and ATAC and then DNA methylation sites, will also include Encode CRE, then show try to incorporate the Hi-C data and SCAT co-asscessibility 

using pyGenomeTracks to generate the figure
- [pyGenomeTracks github](https://pygenometracks.readthedocs.io/en/latest/index.html)
- [pyGenomeTrack docs](https://pygenometracks.readthedocs.io/en/latest/index.html)

In [None]:
!date

#### import libraries

In [None]:
from pandas import read_parquet, read_csv, DataFrame, read_hdf, concat
from math import ceil, floor
from numpy import log10
from IPython.display import Image
from os.path import exists
import math

#### set notebook variables

In [None]:
# parameters
gene = 'SNCA' # 'GBA1', 'SNCA', or 'LRRK2'
gene_id = 'ENSG00000145335.15' # 'ENSG00000177628.15', 'ENSG00000145335.15', or 'ENSG00000188906.15'
chrom = 4
start_bp = '89650345'
stop_bp = '89938315'

In [None]:
# naming
cohort = 'foundin'
dx = 'PD'

# directories
wrk_dir = '/labshare/raph/datasets/foundin_qtl'
quants_dir = f'{wrk_dir}/quants'
results_dir = f'{wrk_dir}/results'
public_dir = f'{wrk_dir}/public'
figures_dir = f'{wrk_dir}/figures'
tensorqtl_dir = f'{wrk_dir}/tensorqtl'
info_dir = f'{wrk_dir}/sample_info'

# in files
gwas_sum_stats_file = f'{public_dir}/nalls_pd_gwas/pd_sumstats_23andme_buildGRCh38.tsv.gz'
ld_variants_file = f'{public_dir}/ld_info/{cohort}_{dx}.ld_prime.csv'
gene_annot_file = f'{public_dir}/hg38.ncbiRefSeq.gtf.gz'
risk_peaks_bed = f'{quants_dir}/{cohort}_risk_peaks.bed'
encode_bb_file = f'{public_dir}/encodeCcreCombined.bb'
fracs_atac_corr_file = f'{results_dir}/{cohort}_ATAC_{gene}_cis_features_cell_fractions.csv'
fracs_meth_corr_file = f'{results_dir}/{cohort}_METH_{gene}_cis_features_cell_fractions.csv'

# out files
figure_file = f'{figures_dir}/{cohort}.{gene}.epigenetics.png'
config_file = f'{figures_dir}/tracks/{cohort}.{gene}.epigenetics.ini'
gwas_bedgraph_file = f'{figures_dir}/tracks/{dx}.gwas.{gene}.bedgraph'
gwas_ld_bedgraph_file = f'{figures_dir}/tracks/{dx}.gwas.{gene}.ld.bedgraph'
risk_peaks_bedgraph_file  = f'{figures_dir}/tracks/{dx}.risk_peaks.{gene}.bedgraph'
encode_bed_file = f'{figures_dir}/tracks/{dx}.{gene}.encodeCcreCombined.bed'

# variables
DEBUG = False
config_content = ''
nl = '\n'
FLANK_PAD = 5000
SUG_THRESHOLD = 1.00e-05
MIN_CICERO = 0.2
days = ['da0', 'da25', 'da65']
day_colors = {'da0': 'black', 'da25': 'green', 'da65': 'blue'}

#### utility functions

In [None]:
# run command line procsss with bash majic
# make this a small function so it can be target function for parallelization if needed
def run_bash_cmd(this_cmd: str, verbose: bool=False):
    if verbose:
        print(this_cmd)
    !{this_cmd}

### set the region info

In [None]:
start_bp = int(start_bp)
stop_bp = int(stop_bp)
print(gene, chrom, start_bp, stop_bp)

### GWAS track

#### load the summary stats

In [None]:
%%time
gwas_df = read_csv(gwas_sum_stats_file, sep='\t')
print(gwas_df.shape)
if DEBUG:
    display(gwas_df.sample(5))

#### load the LD info

In [None]:
ld_df = read_csv(ld_variants_file)
print(ld_df.shape)
ld_df = ld_df.loc[(ld_df.CHR_B == chrom) & 
                  (ld_df.BP_B >= start_bp) & 
                  (ld_df.BP_B <= stop_bp)]
print(ld_df.shape)
if DEBUG:
    display(ld_df.head())

#### set the actual edges based on statistically significant p-values from regions

In [None]:
# filter to the specified region
gwas_df = gwas_df.loc[(gwas_df.chromosome == chrom) & 
                      (gwas_df.base_pair_location >= start_bp) & 
                      (gwas_df.base_pair_location <= stop_bp)]
risk_df = gwas_df.loc[(gwas_df.p_value <= SUG_THRESHOLD)]
print(f'shape of GWAS risk {risk_df.shape}')
if DEBUG:
    display(risk_df.head())

#### convert gwas df to bedgraph

In [None]:
# re-filter with narrower edges
gwas_df = gwas_df.loc[(gwas_df.chromosome == chrom) & 
                      (gwas_df.base_pair_location >= start_bp) & 
                      (gwas_df.base_pair_location <= stop_bp)]
gwas_bed = gwas_df[['chromosome', 'base_pair_location', 'variant_id']].copy()
gwas_bed.rename(columns={'chromosome': 'chr', 'base_pair_location': 'position', 
                         'variant_id': 'id'}, inplace=True)
gwas_bed['end'] = gwas_bed['position'] + 1
gwas_bed['signal'] = -log10(gwas_df.p_value)
gwas_bed = gwas_bed[['chr', 'position', 'end', 'signal', 'id']]
print(gwas_bed.shape)
if DEBUG:
    display(gwas_bed.sample(5))
# save the gwas bedgraph
gwas_bed.to_csv(gwas_bedgraph_file, index=False, header=False, sep='\t')

#### subset gwas bed to create bedgraph for LD variats to overlay

In [None]:
ld_gwas_bed = gwas_bed.loc[(gwas_bed.id.isin(ld_df.SNP_A) | 
                           gwas_bed.id.isin(ld_df.SNP_B)) & 
                           (gwas_bed.signal >= -log10(SUG_THRESHOLD))]
print(ld_gwas_bed.shape)
if DEBUG:
    display(ld_gwas_bed.head())

#### save the gwas ld bedgraph

In [None]:
ld_gwas_bed.to_csv(gwas_ld_bedgraph_file, index=False, header=False, sep='\t')

### cis RNAB ~ ATAC correlation tracks

#### load the chromosome file containing the genes cis ATAC correlation results, convert correlation df to bedgraph, and save correlation bedgraph

In [None]:
def format_correlation_bedgraph(gene: str, chrom: str, 
                                in_file: str, out_file: str, alpha: float=1.0,
                                verbose: bool=True) -> {float, float}:
    corr_df = read_parquet(in_file)
    print(corr_df.shape)
    # get qtl results for gene of interest
    corr_df = corr_df.loc[(corr_df.phenotype_id == gene) & 
                          (corr_df.pval_nominal <= alpha)]
    print(corr_df.shape)
    if verbose:
        display(corr_df.head())    

    if corr_df.shape[0] == 0:
        corr_bed = DataFrame(data=None, columns=['chrom', 'start', 'stop', 'signal', 'id'])
    else:
        # these variants IDs are postion named so split and use
        peak_info = corr_df.variant_id.str.split('_', expand=True)
        peak_info = DataFrame(data={'chrom': peak_info[0], 
                                    'start': peak_info[1].astype(int), 
                                    'stop': peak_info[2].astype(int), 
                                    'feature': corr_df.variant_id})    

        corr_bed = peak_info.copy()
        corr_bed.rename(columns={'feature': 'id'}, inplace=True)
        # corr_bed['signal'] = corr_df.slope/corr_df.slope_se
        corr_bed['signal'] = corr_df.slope
        corr_bed = corr_bed[['chrom', 'start', 'stop', 'signal', 'id']]
    print(corr_bed.shape)
    if verbose:
        display(corr_bed.head())    
    corr_bed.to_csv(out_file, index=False, header=False, sep='\t')
    return corr_bed.signal.min(), corr_bed.signal.max()

In [None]:
%%time
cis_atac_min_value = 0
cis_atac_max_value = 1
for day in days:
    corr_file = f'{tensorqtl_dir}/{cohort}_{day}_RNAB-ATAC.cis_qtl_pairs.chr{chrom}.parquet'
    corr_bedgraph_file = f'{figures_dir}/tracks/{dx}.{day}_ciscorr.{gene}.bedgraph'
    this_min, this_max = format_correlation_bedgraph(gene_id, chrom, corr_file, 
                                                     corr_bedgraph_file, verbose=DEBUG)
    if this_min < cis_atac_min_value:
        cis_atac_min_value = this_min
    if this_max > cis_atac_max_value:
        cis_atac_max_value = this_max

### <i>cis</i> ATAC correlated with cell-type fractions

#### load the cell fraction correlation results

In [None]:
fracs_df = read_csv(fracs_atac_corr_file, index_col=0)
print(f'shape of full cell fraction correlations {fracs_df.shape}')

# the exo IDs are postion named so split and use   
peak_info = fracs_df.exo.str.split('_', expand=True)
features_df = DataFrame(data={'chrom': peak_info[0], 
                              'start': peak_info[1].astype(int), 
                              'stop': peak_info[2].astype(int), 
                              'feature': fracs_df.exo})
features_df.drop_duplicates(inplace=True)
print(f'features full shape {features_df.shape}')
# subset to features in the interval
features_df = features_df.loc[(features_df.chrom == f'chr{chrom}') & 
                              (features_df.start >= start_bp) & 
                              (features_df.stop <= stop_bp)]
print(f'features region shape {features_df.shape}')

# subset to peaks in the region
fracs_df = fracs_df.loc[fracs_df.exo.isin(features_df.feature)]
print(f'shape of cell fraction correlations in region {fracs_df.shape}')

# now merge the genomic position info into the results
fracs_df = fracs_df.merge(features_df, how='left', 
                          left_on='exo', right_on='feature')
print(f'shape of merge in region {fracs_df.shape}')

# set min max values
cis_frac_atac_min_value = fracs_df.z.min()
cis_frac_atac_max_value = fracs_df.z.max()

if math.isnan(cis_frac_atac_min_value):
    cis_frac_atac_min_value = 0
if math.isnan(cis_frac_atac_max_value):
    cis_frac_atac_max_value = 1  

if DEBUG:
    display(fracs_df.head())

#### create bedgraph per cell-type

In [None]:
cell_abbreviations = {'DopaminergicNeurons': 'DA', 
                      'ImmatureDopaminergicNeurons': 'iDA', 
                      'TH_Pel-Freez_ICC': 'TH-ICC'}
for cell_type, cell_abbrv in cell_abbreviations.items():
    print(cell_abbrv, cell_type)
    # subset based on cell type
    cell_fracs_df = fracs_df.loc[fracs_df.endo == cell_type]
    print(f'shape of cell fraction correlations in region for {cell_abbrv} is {cell_fracs_df.shape}')
    fracs_bed = cell_fracs_df[['chrom', 'start', 'stop', 'z', 'feature']].copy()
    print(fracs_bed.shape)
    if DEBUG:
        display(fracs_bed.head())
    # save the bedgraph
    fracs_corr_bedgraph_file = f'{figures_dir}/tracks/{dx}.{cell_abbrv}.ATAC.corr_cell_fracs.{gene}.bedgraph'
    fracs_bed.to_csv(fracs_corr_bedgraph_file, index=False, header=False, sep='\t')

### ATAC co-accessibility

In [None]:
def format_cicero_arcs(cicero_df: DataFrame, chrom: str, verbose: bool=False) -> DataFrame:
    # to speed up a bit do initial subset by chrom
    cicero_df = cicero_df.loc[cicero_df[0].str.startswith(f'chr{chrom}-')]
    cicero_score_df = concat([cicero_df[0].str.split(r':|-', expand=True), 
                              cicero_df[1].str.split(r':|-', expand=True), 
                              cicero_df[[2]]], axis=1)
    cicero_score_df.columns=['chrom1', 'start1', 'stop1', 'chrom2', 
                             'start2', 'stop2', 'coaccess']
    cicero_score_df[['start1', 'stop1', 
                     'start2', 'stop2']]=cicero_score_df[['start1', 'stop1', 
                                                          'start2', 'stop2']].astype('int')
    print(cicero_score_df.shape) 
    # convert coaccess df to arcs
    cicero_arcs_df = cicero_score_df.loc[(cicero_score_df.chrom1 == f'chr{chrom}') & 
                          (cicero_score_df.start1 >= start_bp) & 
                          (cicero_score_df.stop1 <= stop_bp) & 
                          (cicero_score_df['coaccess'].notna()) &
                          (cicero_score_df.coaccess > MIN_CICERO)]
    print(cicero_arcs_df.shape)
    return cicero_arcs_df

#### load and format the Cicero results by cell-type

In [None]:
%%time
# here not including all but most relevant
scat_cell_types = ['Dopaminergic_Neurons', 'Immature_Dopaminergic_Neurons', 'Late_neuron_Progenitor']
max_cicero_value = 0
for cell_type in scat_cell_types:
    print(cell_type)
    cicero_in_file = f'{quants_dir}/SCAT.{cell_type}.cicero.conns.txt.gz'
    cicero_out_file = f'{figures_dir}/tracks/{dx}.{cell_type}.cicero.{gene}.arcs'
    cicero_df = read_csv(cicero_in_file, sep='\t',skiprows=1, header=None)
    print(cicero_df.shape)
    cicero_arcs_df = format_cicero_arcs(cicero_df, chrom, verbose=DEBUG)
    this_max = cicero_arcs_df.coaccess.max()
    print(this_max)
    if this_max > max_cicero_value:
        max_cicero_value = this_max 
    cicero_arcs_df.to_csv(cicero_out_file, index=False, header=False, sep='\t')        
    if DEBUG:
        display(cicero_df.sample(5))    

### cis RNAB ~ METH correlation tracks

#### load the METH feature info

In [None]:
features_file = f'{quants_dir}/EPIC_annotation_hg38.txt'
features_df = read_csv(features_file, sep='\t', header=None)
features_df.columns = ['chrom', 'start', 'stop', 'feature']
print(features_df.shape)
# go ahead and subset to features in the region
features_df = features_df.loc[(features_df.chrom == f'chr{chrom}') & 
                              (features_df.start >= start_bp) & 
                              (features_df.stop <= stop_bp)]
features_df.reset_index(drop=True, inplace=True)
features_df.set_index('feature', inplace=True)  
# for bed to work start != stop, make a bit wider for visual size
features_df.stop = features_df.start + 4
print(f'features region shape {features_df.shape}')
if DEBUG:
    display(features_df.head())

#### load the chromosome file containing the genes cis METH correlation results, convert correlation df to bedgraph, and save correlation bedgraph

In [None]:
def format_meth_corr_bedgraph(gene: str, chrom: str, 
                              in_file: str, out_file: str, 
                              feats_df: DataFrame, alpha: float=1.0,
                              verbose: bool=True) -> {float, float}:
    corr_df = read_parquet(in_file)
    print(f'shape of full results{corr_df.shape}')
    # get qtl results for gene of interest
    corr_df = corr_df.loc[(corr_df.phenotype_id == gene) & 
                          (corr_df.pval_nominal <= alpha)]
    print(f'shape of results for gene with signal {corr_df.shape}')
    if verbose:
        display(corr_df.head())    

    if corr_df.shape[0] == 0:
        corr_bed = DataFrame(data=None, columns=['chrom', 'start', 'stop', 'signal', 'id'])
    else:
        # merge the feature info into the results
        corr_df = corr_df.merge(feats_df, how='inner', left_on='variant_id', right_index=True)
        corr_bed = corr_df[['chrom', 'start', 'stop', 'slope', 'variant_id']].copy()
        corr_bed.rename(columns={'slope': 'signal', 'variant_id': 'id'}, inplace=True)
    print(f'shape of results for gene with signal merge with feature annots {corr_bed.shape}')
    if verbose:
        display(corr_bed.head())
        print(len(set(feats_df.index) - set(corr_bed.id)))
    corr_bed.to_csv(out_file, index=False, header=False, sep='\t')
    return corr_bed.signal.min(), corr_bed.signal.max()

In [None]:
%%time
cis_meth_min_value = 0
cis_meth_max_value = 1
for day in days:
    corr_file = f'{tensorqtl_dir}/{cohort}_{day}_RNAB-METH.cis_qtl_pairs.chr{chrom}.parquet'
    corr_bedgraph_file = f'{figures_dir}/tracks/{dx}.{day}_ciscorr_METH.{gene}.bedgraph'
    if exists(corr_file):
        this_min, this_max = format_meth_corr_bedgraph(gene_id, chrom, corr_file, 
                                                       corr_bedgraph_file, features_df, 
                                                       verbose=DEBUG)
        if this_min < cis_meth_min_value:
            cis_meth_min_value = this_min
        if this_max > cis_meth_max_value:
            cis_meth_max_value = this_max

### <i>cis</i> METH correlated with cell-type fractions

#### load the cell fraction correlation results

In [None]:
fracs_df = read_csv(fracs_meth_corr_file, index_col=0)
# drop any is null score
fracs_df = fracs_df.loc[~fracs_df.z.isna()]
print(f'shape of full cell fraction correlations {fracs_df.shape}')

# subset to peaks in the region
fracs_df = fracs_df.loc[fracs_df.exo.isin(features_df.index)]
print(f'shape of cell fraction correlations in region {fracs_df.shape}')

# now merge the genomic position info into the results
fracs_df = fracs_df.merge(features_df, how='inner', 
                          left_on='exo', right_on='feature')
print(f'shape of merge in region {fracs_df.shape}')

# set min max values
cis_frac_meth_min_value = fracs_df.z.min()
cis_frac_meth_max_value = fracs_df.z.max()

if math.isnan(cis_frac_meth_min_value):
    cis_frac_meth_min_value = 0
if math.isnan(cis_frac_meth_max_value):
    cis_frac_meth_max_value = 1  

if DEBUG:
    display(fracs_df.head())

#### create bedgraph per cell-type

In [None]:
cell_abbreviations = {'DopaminergicNeurons': 'DA', 
                      'ImmatureDopaminergicNeurons': 'iDA', 
                      'TH_Pel-Freez_ICC': 'TH-ICC'}
for cell_type, cell_abbrv in cell_abbreviations.items():
    print(cell_abbrv, cell_type)
    # subset based on cell type
    cell_fracs_df = fracs_df.loc[fracs_df.endo == cell_type]
    print(f'shape of cell fraction correlations in region for {cell_abbrv} is {cell_fracs_df.shape}')
    fracs_bed = cell_fracs_df[['chrom', 'start', 'stop', 'z', 'exo']].copy()
    print(fracs_bed.shape)
    if DEBUG:
        display(fracs_bed.head())
    # save the bedgraph
    fracs_corr_bedgraph_file = f'{figures_dir}/tracks/{dx}.{cell_abbrv}.METH.corr_cell_fracs.{gene}.bedgraph'
    fracs_bed.to_csv(fracs_corr_bedgraph_file, index=False, header=False, sep='\t')

### create the Encode CRE bed

In [None]:
this_cmd = f'bigBedToBed {encode_bb_file} -chrom=chr{chrom} -start={start_bp} \
-end={stop_bp} {encode_bed_file}'
run_bash_cmd(this_cmd, DEBUG)

### format gwas track config string

In [None]:
this_content = f'[spacer]{nl}[GWAS]{nl}\
title = {dx} GWAS{nl}\
file = {gwas_bedgraph_file}{nl}\
file_type = bedgraph{nl}\
type = points:6{nl}\
color = black{nl}\
use_middle = true{nl}\
min_value = 0{nl}\
max_value = {ceil(gwas_bed.signal.max())}{nl}\
height = 5{nl}{nl}\
[hlines ovelayed]{nl}\
color = grey{nl}\
line_width = 1{nl}\
line_style = dotted{nl}\
y_values = 7.3{nl}\
file_type = hlines{nl}\
overlay_previous = share-y{nl}{nl}'

config_content = config_content + this_content

if DEBUG:
    print(this_content)

### format gwas LD track config string

In [None]:
this_content = f'[LD GWAS]{nl}\
file = {gwas_ld_bedgraph_file}{nl}\
file_type = bedgraph{nl}\
type = points:10{nl}\
color = red{nl}\
use_middle = true{nl}\
min_value = 0{nl}\
height = 5{nl}\
overlay_previous = share-y{nl}\
[x-axis]{nl}{nl}'

config_content = config_content + this_content

if DEBUG:
    print(this_content)

### format the genes annot config string

In [None]:
this_content = f'{nl}{nl}\
[genes]{nl}\
title = Genes{nl}\
file = {gene_annot_file}{nl}\
height = 2{nl}\
color = darkblue{nl}\
merge_transcripts = true{nl}\
prefered_name = gene_name{nl}\
fontsize = 12{nl}\
style = UCSC{nl}\
max_labels = 200{nl}\
file_type = gtf{nl}{nl}'

config_content = config_content + this_content

if DEBUG:
    print(this_content)

### format RNAB ~ ATAC tracks config string

In [None]:
for day in days:
    day_color = day_colors.get(day)
    corr_bedgraph_file = f'{figures_dir}/tracks/{dx}.{day}_ciscorr.{gene}.bedgraph'
    this_content = f'[{day} ATAC cis_corr]{nl}\
    title = {day} {gene} ~ ATAC{nl}\
    file = {corr_bedgraph_file}{nl}\
    file_type = bedgraph{nl}\
    color = {day_color}{nl}\
    height = 2{nl}\
    min_value = {floor(cis_atac_min_value)}{nl}\
    max_value = {ceil(cis_atac_max_value)}{nl}\
    [hlines ovelayed]{nl}\
    color = black{nl}\
    line_width = 0.5{nl}\
    line_style = solid{nl}\
    y_values = 0{nl}\
    file_type = hlines{nl}\
    overlay_previous = share-y{nl}\
    [hlines ovelayed]{nl}\
    color = grey{nl}\
    line_width = 0.5{nl}\
    line_style = dotted{nl}\
    y_values = 1.96{nl}\
    file_type = hlines{nl}\
    overlay_previous = share-y{nl}\
    [hlines ovelayed]{nl}\
    color = grey{nl}\
    line_width = 0.5{nl}\
    line_style = dotted{nl}\
    y_values = -1.96{nl}\
    file_type = hlines{nl}\
    overlay_previous = share-y{nl}{nl}'    

    config_content = config_content + this_content

    if DEBUG:
        print(this_content)

### format cell-tpye frac ~ ATAC results

In [None]:
cell_colors = {'DA': '#FFD700', 'iDA': '#EE5C42', 'TH-ICC': 'black'}
for cell_type, cell_abbrv in cell_abbreviations.items():
    cell_color = cell_colors.get(cell_abbrv)
    fracs_corr_bedgraph_file = f'{figures_dir}/tracks/{dx}.{cell_abbrv}.ATAC.corr_cell_fracs.{gene}.bedgraph'
    this_content = f'[{cell_abbrv} ATAC cis_corr]{nl}\
    title = {cell_abbrv} ~ ATAC{nl}\
    file = {fracs_corr_bedgraph_file}{nl}\
    file_type = bedgraph{nl}\
    color = {cell_color}{nl}\
    height = 2{nl}\
    min_value = {floor(cis_frac_atac_min_value)}{nl}\
    max_value = {ceil(cis_frac_atac_max_value)}{nl}\
    [hlines ovelayed]{nl}\
    color = black{nl}\
    line_width = 0.5{nl}\
    line_style = solid{nl}\
    y_values = 0{nl}\
    file_type = hlines{nl}\
    overlay_previous = share-y{nl}\
    [hlines ovelayed]{nl}\
    color = grey{nl}\
    line_width = 0.5{nl}\
    line_style = dotted{nl}\
    y_values = 1.96{nl}\
    file_type = hlines{nl}\
    overlay_previous = share-y{nl}\
    [hlines ovelayed]{nl}\
    color = grey{nl}\
    line_width = 0.5{nl}\
    line_style = dotted{nl}\
    y_values = -1.96{nl}\
    file_type = hlines{nl}\
    overlay_previous = share-y{nl}{nl}'    

    config_content = config_content + this_content

    if DEBUG:
        print(this_content)

### format the SCAT peaks

In [None]:
# this is from Frank's formatting and files
this_content = f'[Dopaminergic_Neurons]{nl}\
file = {quants_dir}/foundin_da65_SCAT-DA.means.bedgraph{nl}\
color = #FFD700{nl}\
height = 1.5{nl}\
title = DA{nl}\
min_value = 0.0{nl}\
max_value = 0.70{nl}{nl}\
[Immature_Dopaminergic_Neurons]{nl}\
file = {quants_dir}/foundin_da65_SCAT-iDA.means.bedgraph{nl}\
color = #EE5C42{nl}\
height = 1.5{nl}\
title = iDA{nl}\
min_value = 0.0{nl}\
max_value = 0.70{nl}{nl}\
[Late_neuron_Progenitor]{nl}\
file = {quants_dir}/foundin_da65_SCAT-lNP.means.bedgraph{nl}\
color = #2E8B57{nl}\
height = 1.5{nl}\
title = LNP{nl}\
min_value = 0.0{nl}\
max_value = 0.70{nl}{nl}\
[Early_neuron_Progenitor]{nl}\
file = {quants_dir}/foundin_da65_SCAT-eNP.means.bedgraph{nl}\
color = #4682B4{nl}\
height = 1.5{nl}\
title = ENP{nl}\
min_value = 0.0{nl}\
max_value = 0.70{nl}{nl}\
[Proliferating_Floor_Plate_Progenitors]{nl}\
file = {quants_dir}/foundin_da65_SCAT-PFPP.means.bedgraph{nl}\
color = #8B8B00{nl}\
height = 1.5{nl}\
title = PFPP{nl}\
min_value = 0.0{nl}\
max_value = 0.70{nl}{nl}'

config_content = config_content + this_content

if DEBUG:
    print(this_content)

### format cicero SCAT track config string

In [None]:
# scat_colors = {'Dopaminergic_Neurons': '#FFD700', 'Immature_Dopaminergic_Neurons': '#EE5C42', 
#                'Late_neuron_Progenitor': '#2E8B57', 'Early_neuron_Progenitor': '#4682B4', 
#                'Proliferating_Floor_Plate_Progenitors': '#8B8B00'}
scat_colors = {'Dopaminergic_Neurons': 'Blues', 
               'Immature_Dopaminergic_Neurons': 'Reds', 
               'Late_neuron_Progenitor': 'Greens'}
for cell_type in scat_cell_types:
    cicero_arcs_file = f'{figures_dir}/tracks/{dx}.{cell_type}.cicero.{gene}.arcs'
    this_color = scat_colors.get(cell_type)
    this_content = f'[Cierco]{nl}\
title = {cell_type}{nl}\
    co-accessibility{nl}\
file = {cicero_arcs_file}{nl}\
file_type = links{nl}\
color = {this_color}{nl}\
min_value = -0.2{nl}\
max_value = {ceil(max_cicero_value)}{nl}\
height = 4{nl}\
line_type = arcs{nl}\
line_width = 0.5{nl}\
orientation = inverted{nl}\
compact_arcs_level = 1{nl}{nl}'

    config_content = config_content + this_content

    if DEBUG:
        print(this_content)

### format RNAB ~ METH tracks config string

In [None]:
for day in days:
    day_color = day_colors.get(day)
    corr_bedgraph_file = f'{figures_dir}/tracks/{dx}.{day}_ciscorr_METH.{gene}.bedgraph'
    if exists(corr_bedgraph_file):
        this_content = f'[{day} METH cis_corr]{nl}\
        title = {day} {gene} ~ METH{nl}\
        file = {corr_bedgraph_file}{nl}\
        file_type = bedgraph{nl}\
        color = {day_color}{nl}\
        height = 2{nl}\
        min_value = {floor(cis_meth_min_value)}{nl}\
        max_value = {ceil(cis_meth_max_value)}{nl}\
        [hlines ovelayed]{nl}\
        color = black{nl}\
        line_width = 0.5{nl}\
        line_style = solid{nl}\
        y_values = 0{nl}\
        file_type = hlines{nl}\
        overlay_previous = share-y{nl}\
        [hlines ovelayed]{nl}\
        color = grey{nl}\
        line_width = 0.5{nl}\
        line_style = dotted{nl}\
        y_values = 1.96{nl}\
        file_type = hlines{nl}\
        overlay_previous = share-y{nl}\
        [hlines ovelayed]{nl}\
        color = grey{nl}\
        line_width = 0.5{nl}\
        line_style = dotted{nl}\
        y_values = -1.96{nl}\
        file_type = hlines{nl}\
        overlay_previous = share-y{nl}{nl}'

        config_content = config_content + this_content

        if DEBUG:
            print(this_content)

### format cell-tpye frac ~ METH results

In [None]:
cell_colors = {'DA': 'black', 'iDA': 'black', 'TH-ICC': 'black'}
for cell_type, cell_abbrv in cell_abbreviations.items():
    cell_color = cell_colors.get(cell_abbrv)
    fracs_corr_bedgraph_file = f'{figures_dir}/tracks/{dx}.{cell_abbrv}.METH.corr_cell_fracs.{gene}.bedgraph'
    this_content = f'[{cell_abbrv} METH cis_corr]{nl}\
    title = {cell_abbrv} ~ METH{nl}\
    file = {fracs_corr_bedgraph_file}{nl}\
    file_type = bedgraph{nl}\
    color = {cell_color}{nl}\
    height = 2{nl}\
    min_value = {floor(cis_frac_meth_min_value)}{nl}\
    max_value = {ceil(cis_frac_meth_max_value)}{nl}\
    [hlines ovelayed]{nl}\
    color = black{nl}\
    line_width = 0.5{nl}\
    line_style = solid{nl}\
    y_values = 0{nl}\
    file_type = hlines{nl}\
    overlay_previous = share-y{nl}\
    [hlines ovelayed]{nl}\
    color = grey{nl}\
    line_width = 0.5{nl}\
    line_style = dotted{nl}\
    y_values = 1.96{nl}\
    file_type = hlines{nl}\
    overlay_previous = share-y{nl}\
    [hlines ovelayed]{nl}\
    color = grey{nl}\
    line_width = 0.5{nl}\
    line_style = dotted{nl}\
    y_values = -1.96{nl}\
    file_type = hlines{nl}\
    overlay_previous = share-y{nl}{nl}'    

    config_content = config_content + this_content

    if DEBUG:
        print(this_content)

### format the HiC data tracks

In [None]:
# this is from Frank's formatting and files
this_content = f'[HiC regions da0]{nl}\
file = {quants_dir}/hic_da0_regions.bed{nl}\
height = 0.25{nl}\
title = da0 Regions{nl}\
file_type = bed{nl}\
display = collapsed{nl}\
color = #00AFBB{nl}\
labels = false{nl}{nl}\
[HiC da0]{nl}\
file = {quants_dir}/hic_da0.bedpe{nl}\
title = da0 Loops{nl}\
height = 2{nl}\
line_width = 2{nl}\
links_type = arcs{nl}\
color = #00AFBB{nl}\
line_style = solid{nl}\
file_type = links{nl}\
orientation = inverted{nl}\
use_middle = true{nl}{nl}\
[HiC regions da65]{nl}\
file = {quants_dir}/hic_da65_regions.bed{nl}\
height = 0.25{nl}\
title = da65 Regions{nl}\
file_type = bed{nl}\
display = collapsed{nl}\
color = #FC4E07{nl}\
labels = false{nl}{nl}\
[HiC da65]{nl}\
file = {quants_dir}/hic_da65.bedpe{nl}\
title = da65 Loops{nl}\
height = 2{nl}\
line_width = 2{nl}\
links_type = arcs{nl}\
color = #FC4E07{nl}\
line_style = solid{nl}\
file_type = links{nl}\
orientation = inverted{nl}\
use_middle = true{nl}{nl}'

config_content = config_content + this_content

if DEBUG:
    print(this_content)

### add encode track to config

In [None]:
this_content = f'{nl}{nl}\
[CRE]{nl}\
title = ENCODE CRE{nl}\
file = {encode_bed_file}{nl}\
height = 2{nl}\
fontsize = 8{nl}\
file_type = bed{nl}\
color = Blues{nl}\
display = collapsed{nl}\
labels = false{nl}\
border_color = black{nl}\
show_data_range = false{nl}\
[x-axis]{nl}{nl}'

config_content = config_content + this_content

if DEBUG:
    print(this_content)

### save the config file content

In [None]:
with open(config_file, 'w') as f:
    f.write(config_content)

### generate the figure

In [None]:
this_cmd = f'pyGenomeTracks --tracks {config_file} \
--region chr{chrom}:{start_bp}-{stop_bp} --dpi 200 \
--title "{dx} {gene}" \
--outFileName {figure_file}'

run_bash_cmd(this_cmd, DEBUG)

### view the generated figure

In [None]:
Image(figure_file)

In [None]:
!date