In [None]:
# HOME
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
# use seaborn plotting defaults
import seaborn as sns; sns.set()

# get the code
import sys
sys.path.append('../code')
from script_utils import show_output
from plot import plot_genomic
from plot import plot_snp
from rollingCNV import rollingCNV


home = '/Users/mahtin'
home = '/Users/martinscience'
# standard paths
testdata = os.path.join(home,"Dropbox/Icke/Work/somVar/testdata")
static_path = os.path.join(home, "Dropbox/Icke/Work/static")
cluster_path = os.path.join(home, "mount")
tooldata = os.path.join(home, "Dropbox/Icke/Work/somVar/tooldata")

# tool-specific paths
shell_path = "../shell"
# 
cnvdata = os.path.join(tooldata, "myCNVdata")
output_path = os.path.join(cnvdata, "output")

# the path to the input data
cnv_path = os.path.join(cnvdata, "cnv")
cnvPON_path = cnv_path = os.path.join(cnvdata, "chromCov")
cnv_path = os.path.join(cluster_path, "scratch/develop/PONcoverage/cnv")
cnvPON_path = os.path.join(cluster_path, "scratch/develop/PONcoverage/chromCov")

In [None]:
cnvPON_path

In [None]:
config = {
    'debug': False, # also export the left and right rolling window values (L/R)
    'diff_exp': 4, # the exponent for the diff function --> higher values make steeper peaks
    'coverage': {
        'filter': dict(
            min_cov = 30,
            min_PON_cov = 50,
            max_PON_std = 100,
        ),
        'normalize':False,
        'center': True,
        'expand':0.2, # after interpolation of rolling data from filtered df into full df, interpolate missing data within this fraction of window size, set 0 if no interpolation is wanted
        'data': {
            'log2ratio': {
                'mean':100
            }
        }
    },
    'heteroSNP': {
        'filter': dict(
            VAF=(0.05,0.95),
            minDepth=30,
            minEB=0.5
        ),
        'normalize': True,
        'center':False,
        'expand': 0.5,
        'data': {
            'absVAF': {
                'mean': 20
            },
            'VAF': {
                'std': 20
            },
            'deltaVAF': {
                'var': 20,
                'std': 20
            }
        }
    }
}
sample = "01_A"

rCov_df, rCovSNP_df = rollingCNV(sample, sample_cnv_path=cnv_path, PON_cnv_path=cnvPON_path, config=config)

In [None]:
rCovSNP_df

### step by step

In [None]:
from combineCNVdata import get_covNsnp
from rollingCNV import apply_rolling_coverage
from rollingCNV import apply_rolling_SNP


def rollingCNV(sample, sample_cnv_path, PON_cnv_path, config):
    '''
    combines all the hetSNP and coverage data per sample and 
    performs rolling computations for clustering
    '''
    
    # combine the chromosome data and associate coverage data with pon coverage
    snp_df, cov_df = get_covNsnp(
        sample, 
        sample_cnv_path=cnv_path, 
        PON_cnv_path=cnvPON_path, 
        verbose=config['debug']
    )
    
    # apply rolling coverage
    show_output(f"Performing rolling coverage computation for sample {sample}.")
    snpcov_df, rolling_cov_df = apply_rolling_coverage(snp_df, cov_df, config)
    
    # apply rolling SNP
    show_output(f"Performing rolling computation for hetSNP data of sample {sample}.")
    rolling_snpcov_df = apply_rolling_SNP(snpcov_df, config)
    show_output(f"Finished computations for sample {sample}.")
    return rolling_cov_df, rolling_snpcov_df  

In [None]:
sample = "01_A"
snp_df, cov_df = get_covNsnp(
    sample, 
    sample_cnv_path=cnv_path, 
    PON_cnv_path=cnvPON_path, 
    verbose=False
)

In [None]:
config = {
    'debug': True, # also export the left and right rolling window values (L/R)
    'diff_exp': 4, # the exponent for the diff function --> higher values make steeper peaks
    'coverage': {
        'filter': dict(
            min_cov = 30,
            min_PON_cov = 50,
            max_PON_std = 100,
        ),
        'normalize':False,
        'center': True,
        'expand':0.2, # after interpolation of rolling data from filtered df into full df, interpolate missing data within this fraction of window size, set 0 if no interpolation is wanted
        'data': {
            'log2ratio': {
                'mean':100
            }
        }
    },
    'heteroSNP': {
        'filter': dict(
            VAF=(0.05,0.95),
            minDepth=30,
            minEB=0.5
        ),
        'normalize': True,
        'center':False,
        'expand': 0.5,
        'data': {
            'absVAF': {
                'mean': 20
            },
            'VAF': {
                'std': 20
            },
            'deltaVAF': {
                'var': 20,
                'std': 20
            }
        }
    }
}

snpcov_df, rolling_cov_df = apply_rolling_coverage(snp_df, cov_df, config)

In [None]:
fig_params = dict(
    figsize=(50,8),
    colormap='coolwarm_r',
    color_chroms=True,
    ylim=(-1.5,2.5)
)

log2 = dict(
        title='log2ratio',
        plot_type='scatter',   # ['line', 'scatter']
        data='log2ratio',
        plot_args=dict(
            linewidth=0.3,
            color='black',
            s=0.2,
            alpha=.7
        ))
log2mean = dict(
        title='rollinglog2ratio',
        plot_type='line',   # ['line', 'scatter']
        data='log2ratiomean',
        plot_args=dict(
            linewidth=1,
            color='yellow',
            alpha=.7
        )
    )


log2diff = dict(
        title='rollinglog2ratio',
        plot_type='line',   # ['line', 'scatter']
        data='log2ratiomeanDiff',
        plot_args=dict(
            linewidth=1,
            color='blue',
            alpha=.7
        ))

log2L = dict(
        title='rollinglog2ratio',
        plot_type='line',   # ['line', 'scatter']
        data='log2ratiomeanL',
        plot_args=dict(
            linewidth=1,
            color='white',
            alpha=.7
        ))

log2R = dict(
        title='rollinglog2ratio',
        plot_type='line',   # ['line', 'scatter']
        data='log2ratiomeanR',
        plot_args=dict(
            linewidth=1,
            color='black',
            alpha=.7
        ))


chroms = ['chr5', 'chr7','chr8', 'chr11', 'chr17']
r1 = 'chr17:3Mb-9Mb'

_, _, _, _ = plot_genomic(snpcov_df, plots=[log2,log2mean], chroms='all', region='', **fig_params)

In [None]:
log2 = dict(
        title='log2ratio',
        plot_type='scatter',   # ['line', 'scatter']
        data='log2ratio',
        plot_args=dict(
            linewidth=0.3,
            color='black',
            s=2,
            alpha=1
        ))

plots = [
    log2,
    log2mean,
    log2diff,
    log2L,
    log2R
]

_, _, _, _ = plot_genomic(snpcov_df, plots=plots, chroms=chroms, region=r1, **fig_params)

In [None]:
rolling_snpcov_df = apply_rolling_SNP(snpcov_df, config)

In [None]:
fig_params = dict(
    figsize=(24,8),
    colormap='coolwarm_r',
    color_chroms=True,
    ylim=(-0.02,1.05),
    cov_offset=.1,  # how much log2ratio=0 is shifted above SNP-data
    cov_height=.5,
    label_size=13
)
############### VAF #################################
vaf = dict(
        title='VAF',
        plot_type='scatter',   # ['line', 'scatter']
        data='VAF',
        plot_args=dict(
            linewidth=0.5,
            color='black',
            s=1,
            alpha=.5
        ))

vafstd = dict(
        title='VAFstd',
        plot_type='line',   # ['line', 'scatter']
        data='VAFstd',
        plot_args=dict(
            linewidth=1,
            color='yellow',
            alpha=.7
        )
    )

vafstddiff = dict(
        title='VAFstdDiff',
        plot_type='line',   # ['line', 'scatter']
        data='VAFstdDiff',
        plot_args=dict(
            linewidth=1,
            color='blue',
            alpha=.7
        )
    )
############### absVAF #################################
absvaf = dict(
        title='absVAF',
        plot_type='scatter',   # ['line', 'scatter']
        data='absVAF',
        plot_args=dict(
            linewidth=0.5,
            color='blue',
            s=1,
            alpha=1
        ))

absvafmean = dict(
        title='absVAFmean',
        plot_type='line',   # ['line', 'scatter']
        data='absVAFmean',
        plot_args=dict(
            linewidth=1,
            color='blue',
            alpha=.7
        )
    )

absvafmeandiff = dict(
        title='absVAFdiff',
        plot_type='line',   # ['line', 'scatter']
        data='absVAFmeanDiff',
        plot_args=dict(
            linewidth=1,
            color='blue',
            alpha=.7
        )
    )

############### deltaVAF #################################
deltavaf = dict(
        title='deltaVAF',
        plot_type='scatter',   # ['line', 'scatter']
        data='deltaVAF',
        plot_args=dict(
            linewidth=0.5,
            color='green',
            s=5,
            alpha=1
        ))
deltavafstd = dict(
        title='deltaVAFstd',
        plot_type='line',   # ['line', 'scatter']
        data='deltaVAFstd',
        plot_args=dict(
            linewidth=1,
            color='yellow',
            alpha=.7
        )
    )

deltavafstddiff = dict(
        title='deltaVAFstddiff',
        plot_type='line',   # ['line', 'scatter']
        data='deltaVAFstdDiff',
        plot_args=dict(
            linewidth=1,
            color='blue',
            alpha=.7
        )
    )

deltavafvar = dict(
        title='deltaVAFvar',
        plot_type='line',   # ['line', 'scatter']
        data='deltaVAFvar',
        plot_args=dict(
            linewidth=1,
            color='green',
            alpha=.7
        )
    )

deltavafvardiff = dict(
        title='deltaVAFvardiff',
        plot_type='line',   # ['line', 'scatter']
        data='deltaVAFvarDiff',
        plot_args=dict(
            linewidth=1,
            color='blue',
            alpha=.7
        )
    )

chroms = ['chr5', 'chr7','chr8', 'chr11', 'chr17']
r1 = 'chr17:3Mb-9Mb'

std_plots = [vaf,vafstd, vafstddiff]
var_plots = [vaf,vafvar, vafvardiff]
selects = [vaf, absvafmean, vafstd, deltavafvar]

fig, ax, df, chrom_df = plot_snp(rolling_snpcov_df, snp_plots=selects, chroms='all', region='', **fig_params)

In [None]:

fig, ax, df, chrom_df = plot_snp(rolling_snpcov_df, snp_plots=selects, chroms=chroms, region='', **fig_params)