In [None]:
# get the code
import sys
import os
import re
import matplotlib.pyplot as plt
# use seaborn plotting defaults
import seaborn as sns; sns.set()
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle

# import package functions
sys.path.append('../code')
from script_utils_CNV import get_CNVconfig, show_output
from rollingCNV import interpolate, one_col_rolling, llh, rolling_data
from rollingCov import rolling_coverage
from combineCNV import filter_snp
from plot import plot_cov, plot_snp

######## need to find the chained assignment!!!
pd.set_option('mode.chained_assignment', None)

# HOME
home = '/Users/mahtin'
home = '/Users/martinscience'


# standard paths
static = os.path.join(home, "Dropbox/Icke/Work/static")
tooldata = os.path.join(home, "Dropbox/Icke/Work/somVar/tooldata")
testdata = os.path.join(home,"Dropbox/Icke/Work/somVar/testdata")
PON_path = os.path.join(static, "PON/HAEv7_hg38_NovaSeq")


cnvdata = os.path.join(tooldata, "myCNVdata")
output_path = os.path.join(cnvdata, "output")
plot_path = os.path.join(cnvdata, "plot")
fig_path = os.path.join(cnvdata, "figures")


## visualize coverage data

In [None]:
sample = "03_A-B"
cnv_df = pd.read_csv(os.path.join(output_path, f"CNV/{sample}.cnv.full.gz"), sep="\t", compression="gzip")
rsnp_df = pd.read_csv(os.path.join(output_path, f"CNV/{sample}.cnv.snp.gz"), sep="\t", compression="gzip")
cnv_df[:3]
rsnp_df[:3]

### load helpers

In [None]:
def sort_df(df):
    '''
    helper for sorting dfs for chromosomes
    '''
    df2 = df.copy()
    # make Chr column categorical for sorting .. and sort
    chrom_list = [f"chr{i+1}" for i in range(22)] + ['chrX']
    df2['Chr'] = pd.Categorical(df2['Chr'], chrom_list)
    return df2.sort_values(['Chr', 'FullExonPos'])


def get_chrom_df(df):

    # dropna is neccessary because grouping on categorical returns all categories
    chrom_df = df.groupby('Chr')['FullExonPos'].agg(['mean', 'min', 'max']).dropna()
    cols = list(chrom_df.columns)
    chrom_df['sum'] = chrom_df['max'] - chrom_df['min']
    chrom_df['cummin'] = chrom_df['sum'].cumsum()
    chrom_df['dif'] = (chrom_df['max'] - chrom_df['cummin']).astype(int)
    for col in cols:
        chrom_df[col] = (chrom_df[col] - chrom_df['dif']).astype(int)
    cols.append('dif')
    return chrom_df.loc[:, cols]


def make_color_chroms(
    ax, chrom_df, color_chroms, ylimits=(-10, 10), colormap="coolwarm_r"
):

    # set the cmap from provided argument
    cmap = plt.cm.get_cmap(colormap, 23)
    chrom_list = [f"chr{i+1}" for i in range(22)] + ["chrX"]
    # build the rects
    rects = []
    # set the height and ymin beyond the ylimits so borders are not seen
    ymin = ylimits[0] * 1.1
    height = (ylimits[1] - ymin) * 1.1

    for chrom in chrom_list:
        if chrom in chrom_df.index:
            row = chrom_df.loc[chrom]
            rect = Rectangle(
                (row["min"], ymin), width=row["max"] - row["min"], height=height
            )
            rects.append(rect)
        else:
            rect = Rectangle((0, 1), width=0.1, height=height)
            rects.append(rect)
    if color_chroms:
        rect_kwargs = dict(alpha=0.4, ec="none")
    else:
        rect_kwargs = dict(alpha=1, fc="none", ec="darkgray", lw=1, ls="-")
    # set the rectangle collection with colormap
    rect_collection = PatchCollection(rects, cmap=cmap, **rect_kwargs)
    # set the index for the color map from chrom integers
    rect_collection.set_array(
        chrom_df.index.str.replace("chr", "").str.replace("X", "23").astype(int)
    )

    # setting clim allows fixing the color for individual chroms
    # https://stackoverflow.com/questions/6028675/setting-color-range-in-matplotlib-patchcollection
    rect_collection.set_clim([0, 23])
    return ax.add_collection(rect_collection)


def add_chrom_labels(ax, chrom_df, ylimits=(-10,10)):
    
    # YOFFSET is the upper-relative y-position 
    YOFFSET = 1
    # get the min_chrom_fraction from minimum chrom_size / whole stretch
    min_chrom_frac = (chrom_df['max'] - chrom_df['min']).min() / chrom_df['max'].max()
    chrom_size = min(25, max(15, 200*min_chrom_frac))
    style = dict(size=chrom_size, color='#2f3832')
    # set the height and ymin beyond the ylimits so borders are not seen
    ypos = ylimits[0] + YOFFSET * (ylimits[1] - ylimits[0])
    for chrom, row in chrom_df.iterrows():
        if (len(chrom_df.index) > 12):
            chrom = chrom.replace('chr', '')
        ax.text(row['mean'], ypos, chrom, ha='center', **style)
    

def make_nice(position):
    '''
    takes position and returns closest multiple of 1, 2, 5 or 10
    '''
    # set nice values
    nice_positions = np.array([1,2,2.5,5,10])
    # get the 10s
    power10 = np.power(10, np.floor(np.log10(position)))
    # reduce to value between 1 and 10
    num = position / power10
    # find the closest nice position
    base = nice_positions[np.argmin(np.abs(nice_positions / num - 1))]
    return base * power10

def get_tick_pos(tick_dist, chrom_df):
    '''
    return from chrom_df the evenly-spread (tick_dist) positions per chrom
    '''
    return [pos for _, row in chrom_df.iterrows() for pos in range(row['min'] + tick_dist, row['max'], tick_dist)]


def str_pos(pos, df, precision=1):
    """
    returns string representation of base position
    on genomic coords
    """
    pos = df.iloc[np.argmin(np.abs(df["PlotPos"] - pos))]["Pos"]
    # get the closest base power
    power10 = int(np.round(np.log10(pos) / 3) * 3)
    # get the base fraction
    base = pos / np.power(10, power10)
    if power10 == 9:
        base = base * 1000
        power10 = 6
    if power10 == 6:
        suff = "Mb"
    elif power10 == 3:
        suff = "kb"
    base = re.sub(r"\.0$", "", str(round(base, precision)))
    if power10 == 0:
        suff = "b"
        base = int(base)
    # only print the suff if print_suff
    return f"{base}{suff}"


def get_precision(pos_list):
    """
    get the major tick precision from the range of chrom values
    """
    # get the range of positions
    prange = max(pos_list) - min(pos_list)
    # get the closest 10base
    power10 = int(np.round(np.log10(prange)))
    precision = max(7 - power10, 0)
    return precision


def set_ticks(ax, df, chrom_df, ticks=20, label_size=12):
    '''
    for a given tick number, set nicely spread ticks
    '''
    
    ## determine optimale tick distance
    # get the chrom_number
    chrom_count = len(chrom_df.index)
    # get the number of bases
    stretch = chrom_df['max'][-1]
    # set the number of desired ticks
    major_tick_dist = int(stretch / (ticks + 1))
    minor_tick_dist = int(stretch / ((ticks * 2) + 1))
    
    # feed tick distance into chrom_df to get chrom-specific coords
    major_pos = get_tick_pos(major_tick_dist, chrom_df)
    minor_pos = [pos - minor_tick_dist for pos in major_pos]
    
    ax.xaxis.set_major_locator(plt.FixedLocator(major_pos))
    # only print the genomic coords below a certain base total
    if stretch < 2e7:
        precision = get_precision(major_pos)
        major_labels = [
            str_pos(pos, df, precision=precision)  ###############
            for pos in major_pos
        ]
        ax.xaxis.set_major_formatter(plt.FixedFormatter(major_labels))
        # set the axis labels
        ax.set_xlabel("genomic coords", fontsize=1.25 * label_size)
    else:
        ax.xaxis.set_major_formatter(plt.NullFormatter())
    ax.xaxis.set_minor_locator(plt.FixedLocator(minor_pos))
    ax.xaxis.grid(which='major', linestyle='-', linewidth=2)
    ax.xaxis.grid(which='minor', linestyle='--', linewidth=1)
    ax.xaxis.set_tick_params(which='major', length=20, labelsize=label_size)
    ax.yaxis.set_tick_params(which='major', length=20, labelsize=label_size)
    # set the tick labels
    for tick in ax.xaxis.get_majorticklabels():
        tick.set_verticalalignment("bottom")
    return ax


def extract_pos(region):
    def convert(pos):
        if pos.endswith("Mb"):
            pos = int(float(pos.replace("Mb", "")) * 1e6)
        elif pos.endswith("kb"):
            pos = int(float(pos.replace("kb", "")) * 1000)
        else:
            pos = int(pos)
        return pos

    split = region.split(":")
    chrom = split[0]

    # if start and are used
    if len(split) > 1 and "-" in split[1]:
        se = split[1].split("-")
        start = convert(se[0])
        end = convert(se[1])
    else:
        start = 0
        end = 1e10
    return chrom, start, end

## coverage plotting

In [None]:
def plot_cov(
    df,
    plots,
    chroms="all",
    color_chroms=True,
    colormap="coolwarm_r",
    region="",
    figsize=(20, 4),
    ylim=(-1, 1),
    label_size=12,
):

    # ### DATA MANGLING ##########
    # get cols for rearranging
    org_cols = list(df.columns)

    # sort the df
    df = sort_df(df)
    # reduce the df to the selected chromosomes
    if region:
        chrom, start, end = extract_pos(region)
        df = df.query("Chr == @chrom and @start <= Pos <= @end")
    elif chroms != "all":
        df = df.query("Chr in @chroms")

    # get the chrom_df for collapsing the
    chrom_df = get_chrom_df(df)

    df = df.merge(chrom_df.loc[:, "dif"], on="Chr")
    df["PlotPos"] = df["FullExonPos"] - df["dif"]

    # rearrange the df as return value
    new_cols = org_cols[:4] + ["PlotPos"] + org_cols[4:]
    df = df.loc[:, new_cols]

    # ####### PLOTTING #######
    # plot the figure
    fig, ax = plt.subplots(figsize=figsize)

    # set the x-axis limits
    _ = ax.set_xlim(0, df["PlotPos"].max())

    # plot the graphs #######
    for plot in plots:
        if plot["plot_type"] == "line":
            plot = ax.plot(df["PlotPos"], df[plot["data"]], **plot["plot_args"])
        elif plot["plot_type"] == "scatter":
            plot = ax.scatter(df["PlotPos"], df[plot["data"]], **plot["plot_args"])

    _ = ax.set_ylim(ylim)
    # add the color chroms
    _ = make_color_chroms(
        ax, chrom_df, color_chroms, ylimits=ax.get_ylim(), colormap=colormap
    )

    # ####### LABELS ###################

    # quick fix for one y-label
    _ = ax.set_ylabel(
        " / ".join([plot["title"] for plot in plots]), fontsize=1.25 * label_size
    )

    # ####### CHROM LABELS #############
    add_chrom_labels(ax, chrom_df, ax.get_ylim())

    # set chrom borders
    for m in chrom_df["min"][1:]:
        _ = ax.axvline(x=m, c="k", lw=0.5, alpha=0.5, ls="-")
    # ###### X-AXIS ####################
    # set major ticks and grid for chrom
    ax = set_ticks(ax, df, chrom_df, label_size=label_size)

    # return fig and ax for further plotting and return edited dataframe
    return fig, ax, df, chrom_df

In [None]:
log2 = dict(
        title='L2R',
        plot_type='scatter',   # ['line', 'scatter']
        data='log2ratio2',
        plot_args=dict(
            linewidth=0.3,
            color='black',
            s=0.8,
            alpha=.7
        )
    )

log2mean = dict(
        title='L2R_mean',
        plot_type='line',   # ['line', 'scatter']
        data='log2ratio2_mean',
        plot_args=dict(
            linewidth=1,
            color='yellow',
            alpha=.7
        )
    )

chroms = ['chr5', 'chr7','chr8', 'chr11', 'chr17']

fig_params = dict(
    figsize=(34,4),
    colormap='coolwarm_r',
    color_chroms=True,
    ylim=(-1.5,2.5),
    label_size=15
)
chroms = ['chr3', 'chr4', 'chr5', 'chr6','chr7', 'chr9', 'chr12', 'chr17']
r20 = 'chr20:20.7Mb-34.5Mb'
r7 = 'chr7:95.8Mb-111Mb'
r17 = 'chr17:18.2Mb-25Mb'
fig, ax, df, chrom_df = plot_cov(cnv_df, plots=[log2, log2mean], chroms="all", region= 'chr7:95.8Mb-110Mb', **fig_params)

In [None]:
fig, ax, df, chrom_df = plot_cov(cnv_df.query('log2ratio2 == log2ratio2'), plots=[log2, log2mean], chroms="all", region='', **fig_params)

# visualize heteroSNP

sample = "03_A-B"
snp_df = pd.read_csv(os.path.join(output_path, f"snp/{sample}.snp.gz"), sep="\t", compression="gzip")

In [None]:
snp_df = cnv_df.query("VAF2 == VAF2")
snp_df[:3]

### for hetSNP visualization:
    + snp_vis in specified by snp_plots

In [None]:
def plot_snp(
    df,
    plots=[],
    chroms="all",
    color_chroms=True,
    colormap="coolwarm_r",
    region="",
    label_size=12,
    figsize=(20, 4),
    ylim=(-1, 1),
):

    # ### DATA MANGELING ##########
    # get cols for rearranging
    org_cols = list(df.columns)

    # sort the df
    df = sort_df(df)
    # reduce the df to the selected chromosomes
    if region:
        chrom, start, end = extract_pos(region)
        df = df.query("Chr == @chrom and @start <= Pos <= @end")
    elif chroms != "all":
        df = df.query("Chr in @chroms")

    # get the chrom_df for collapsing
    chrom_df = get_chrom_df(df)
    df = df.merge(chrom_df.loc[:, "dif"], on="Chr")
    df["PlotPos"] = df["FullExonPos"] - df["dif"]

    # rearrange the df as return value
    new_cols = org_cols[:4] + ["PlotPos"] + org_cols[4:]
    df = df.loc[:, new_cols]

    # ########################
    # ####### PLOTTING #######
    # plot the figure
    fig, ax = plt.subplots(figsize=figsize)

    # set the x-axis limits
    _ = ax.set_xlim(0, df["PlotPos"].max())

    # ####### plot the SNP graphs #######
    for plot in plots:
        if plot["plot_type"] == "line":
            plot = ax.plot(df["PlotPos"], df[plot["data"]], **plot["plot_args"])
        elif plot["plot_type"] == "scatter":
            # highjack plot_args with
            pa = plot["plot_args"]
            if "c" in pa:
                pa["c"] = df[pa["c"]]
            if "s" in pa:
                if isinstance(pa["s"], str):
                    pa["s"] = df[pa["s"]] * 20 + 1
            plot = ax.scatter(df["PlotPos"], df[plot["data"]], **pa)

    _ = ax.set_ylim(ylim)
    # add the color chroms
    _ = make_color_chroms(
        ax, chrom_df, color_chroms, ylimits=ax.get_ylim(), colormap=colormap
    )

    # ####### LABELS ###################

    # quick fix for one y-label
    _ = ax.set_ylabel(
        " / ".join([plot["title"] for plot in plots]), fontsize=1.25 * label_size
    )

    # ####### CHROM LABELS #############
    add_chrom_labels(ax, chrom_df, ax.get_ylim())

    # ###### X-AXIS ####################
    # set major ticks and grid for chrom

    ax = set_ticks(ax, df, chrom_df, label_size=label_size)
    # set helper lines
    #  _ = ax.axhline(y=1, c="k", lw=2, ls="-")
    _ = ax.axhline(y=0.5, c="k", lw=1.5, alpha=0.5, ls="--")
    
    # set chrom borders
    for m in chrom_df['min'][1:]:
        _ = ax.axvline(x=m, c="k", lw=0.5, alpha=0.5, ls="-")
    # return fig and ax for further plotting and return edited dataframe
    return fig, ax, df, chrom_df

In [None]:
vaf = dict(
        title='VAF',
        plot_type='scatter',  # ['line', 'scatter']
        data='VAF2',
        plot_args=dict(
            s=2,
            color='black',
            alpha=1
        )
    )

fig_params = dict(
    figsize=(34,4),
    colormap='coolwarm_r',
    color_chroms=True,
    ylim=(0,1),
    label_size=13
)

fig, ax, df, chrom_df = plot_snp(snp_df, plots=[vaf], chroms="all", region='', **fig_params)

# combine SNP and coverage figs

In [None]:
def plot_CNV(
    df,
    snp_plots=[],
    cov_plots=[],
    chroms="all",
    color_chroms=True,
    colormap="coolwarm_r",
    region="",
    label_size=12,
    figsize=(20, 4),
    ylims=dict(cov=(-1, 2.5), snp=(0, 1)),
):

    # ### DATA MANGELING ##########
    # get cols for rearranging
    org_cols = list(df.columns)
    # sort the df
    df = sort_df(df)
    # reduce the df to the selected chromosomes
    if region:
        chrom, start, end = extract_pos(region)
        df = df.query("Chr == @chrom and @start <= Pos <= @end")
    elif chroms != "all":
        df = df.query("Chr in @chroms")

    # get the chrom_df for collapsing
    chrom_df = get_chrom_df(df)
    df = df.merge(chrom_df.loc[:, "dif"], on="Chr")
    df["PlotPos"] = df["FullExonPos"] - df["dif"]

    # rearrange the df as return value
    new_cols = org_cols[:4] + ["PlotPos"] + org_cols[4:]
    df = df.loc[:, new_cols]

    # ########################
    # ####### PLOTTING #######
    # plot the figure
    fig, axes = plt.subplots(2, figsize=figsize, gridspec_kw={"height_ratios": [1, 2]})

    for ax in axes:
        # set the x-axis limits
        _ = ax.set_xlim(0, df["PlotPos"].max())

    # ######## plot COVERAGE
    for plot in cov_plots:

        df[plot["data"]] = df[plot["data"]]
        if plot["plot_type"] == "line":
            _ = axes[0].plot(df["PlotPos"], df[plot["data"]], **plot["plot_args"])

        elif plot["plot_type"] == "scatter":
            # highjack plot_args
            pa = plot["plot_args"]
            if "c" in pa:
                pa["c"] = df[pa["c"]]
            if "s" in pa:
                if isinstance(pa["s"], str):
                    pa["s"] = df[pa["s"]] * 20 + 1
            _ = axes[0].scatter(df["PlotPos"], df[plot["data"]], **pa)

    _ = axes[0].set_ylim(ylims["cov"])

    # ####### plot the SNP graphs #######
    for plot in snp_plots:
        if plot["plot_type"] == "line":
            plot = axes[1].plot(df["PlotPos"], df[plot["data"]], **plot["plot_args"])
        elif plot["plot_type"] == "scatter":
            # highjack plot_args with
            pa = plot["plot_args"]
            if "c" in pa:
                pa["c"] = df[pa["c"]]
            if "s" in pa:
                if isinstance(pa["s"], str):
                    pa["s"] = df[pa["s"]] * 20 + 1
            plot = axes[1].scatter(df["PlotPos"], df[plot["data"]], **pa)

    _ = axes[1].set_ylim(ylims["snp"])

    # add the color chroms
    for ax in axes:
        _ = make_color_chroms(
            ax, chrom_df, color_chroms, ylimits=ax.get_ylim(), colormap=colormap
        )

    # ####### LABELS ###################

    # quick fix for one y-label
    _ = axes[0].set_ylabel("COV [log2r]", fontsize=1.25 * label_size)
    _ = axes[1].set_ylabel("BAF", fontsize=1.25 * label_size)

    # ###### X-AXIS ####################
    # chrom lables
    add_chrom_labels(axes[1], chrom_df, ax.get_ylim())
    # set major ticks and grid for chrom
    axes[1] = set_ticks(axes[1], df, chrom_df, label_size=label_size)
    # remove ticks for coverage plot
    axes[0].xaxis.set_tick_params(which="both", labelbottom=False)
    # set helper lines
    # cov_plot
    for line_pos in [-1, 0, 1]:
        _ = axes[0].axhline(y=line_pos, c="k", lw=1.5, alpha=0.5, ls="--")
    #  VAF plot
    _ = axes[1].axhline(y=0.5, c="k", lw=1.5, alpha=0.5, ls="--")

    # set chrom borders
    for m in chrom_df["min"][1:]:
        for ax in axes:
            _ = ax.axvline(x=m, c="k", lw=0.5, alpha=0.5, ls="-")
    # return fig and ax for further plotting and return edited dataframe
    return fig, axes, df, chrom_df

In [None]:
log2 = dict(
        title='L2R',
        plot_type='scatter',   # ['line', 'scatter']
        data='log2ratio2',
        plot_args=dict(
            linewidth=0.3,
            color='black',
            s=1.8,
            alpha=1
        )
    )

log2mean = dict(
        title='L2R_mean',
        plot_type='line',   # ['line', 'scatter']
        data='log2ratio2_mean',
        plot_args=dict(
            linewidth=1,
            color='yellow',
            alpha=.7
        )
    )

vaf = dict(
        title='VAF',
        plot_type='scatter',  # ['line', 'scatter']
        data='VAF2',
        plot_args=dict(
            s=2,
            color='black',
            alpha=1
        )
    )

fig_params = dict(
    figsize=(34,6),
    colormap='coolwarm_r',
    color_chroms=True,
    label_size=13,
    ylims=dict(
        cov=(-1.2,2),
        snp=(0, 1)
    )
)
chroms = ['chr3', 'chr4', 'chr5', 'chr6','chr7', 'chr9', 'chr12', 'chr17']
r20 = 'chr20:20.7Mb-34.5Mb'
r7 = 'chr7:95.8Mb-111Mb'
r17 = 'chr17:18.2Mb-25Mb'

fig, axes, df, chrom_df = plot_CNV(cnv_df, cov_plots=[log2, log2mean], snp_plots=[vaf], chroms="all", region="chr20", **fig_params)

# plot for SNP matching

In [1]:
def make_SNP_plot(sample, cnv_df):

    df = cnv_df.loc[:, [c for c in cnv_df.columns if c.startswith("VAF")]]
    fig, ax = plt.subplots(figsize=(10, 10))
    _ = ax.scatter(df['VAF1'], df['VAF2'], s=0.25, alpha=0.4)
    _ = ax.set_xlabel("NVAF", fontsize=20)
    _ = ax.set_ylabel("TVAF", fontsize=20)
    
    # calculate offRate
    df0 = df[(df > 0.1).any(axis=1)]
    n = len(df0.index)
    df1 = df0[np.abs(df0["VAF1"] - df0["VAF2"]) > 0.25]
    m = len(df1.index)
    off_ratio = m / n * 100
    _ = ax.set_title(
        f"{sample} |  Tumor vs Normal - offRate {round(off_ratio, 1)}", fontsize=30
    )
    return fig, off_ratio

In [2]:
sample = "03_A-B"
cnv_df = pd.read_csv(os.path.join(output_path, f"CNV/{sample}.cnv.snp.gz"), sep="\t", compression="gzip")
_ = make_SNP_plot(sample, cnv_df)

NameError: name 'output_path' is not defined

# visualize clusterings

### load example clustered file from DBscan clustering

In [None]:
sample = "01_A"
df = pd.read_csv(os.path.join(output_path, f'cluster/{sample}.dbscan.csv'), sep='\t')

In [None]:
df

In [None]:
log2 = dict(
        title='log2ratio',
        plot_type='scatter',   # ['line', 'scatter']
        data='log2ratio',
        plot_args=dict(
            linewidth=0.3,
            color='black',
            s=0.2,
            alpha=.7
        )
    )

log2mean = dict(
        title='rollinglog2ratio',
        plot_type='line',   # ['line', 'scatter']
        data='log2ratiomean',
        plot_args=dict(
            linewidth=1,
            color='yellow',
            alpha=.7
        )
    )

absvaf = dict(
        title='absVAF',
        plot_type='scatter',  # ['line', 'scatter']
        data='absVAF',
        plot_args=dict(
            s=1,
            c='dbscan',
            alpha=.7
        )
    )

absvafmean = dict(
        title='deltaVAFvar',
        plot_type='line',   # ['line', 'scatter']
        data='deltaVAFvar',
        plot_args=dict(
            linewidth=1,
            color='blue',
            alpha=.7
        )
    )

fig_params = dict(
    figsize=(24,8),
    colormap='coolwarm_r',
    color_chroms=False,
    ylim=(0,1),
    cov_offset=.1,  # how much log2ratio=0 is shifted above SNP-data
    cov_height=.5,
    label_size=13
)


### expand plot_snp functionality to showing clusters

In [None]:
def plot_snp(df, snp_plots=[], cov_plots=[], chroms='all', cov_offset=.25, cov_height=0.5, color_chroms=True, colormap='coolwarm_r', region='', label_size=12, figsize=(20,4), ylim=(-1,1)):
    
    MAXLOG2RATIO = 2.5
    #### DATA MANGELING ##########
    # get cols for rearranging
    org_cols = list(df.columns)
    
    # sort the df
    df = sort_df(df)
    # reduce the df to the selected chromosomes
    if region:
        chrom, start, end = extract_pos(region)
        df = df.query('Chr == @chrom and @start <= Pos <= @end')
    elif chroms != 'all':
        df = df.query('Chr in @chroms')

    # get the chrom_df for collapsing the 
    chrom_df = get_chrom_df(df)
    
    df = df.merge(chrom_df.loc[:, 'dif'], on='Chr')
    df['PlotPos'] = df['FullExonPos'] - df['dif']
    
    # rearrange the df as return value
    new_cols = org_cols[:4] + ['PlotPos'] + org_cols[4:]
    df = df.loc[:, new_cols]
    
    #########################
    ######## PLOTTING #######
    # plot the figure
    fig, ax = plt.subplots(figsize=figsize)

    # set the x-axis limits
    _ = ax.set_xlim(0, df['PlotPos'].max())
    
    
    ######## PLOT COV Data
    
    if len(cov_plots):
        scale_factor = cov_height / (MAXLOG2RATIO + 1)
        offset = 1 + scale_factor + cov_offset

        ylim = (ylim[0], ylim[1] + cov_offset + cov_height)
        
        for plot in cov_plots:
            # normalize the coverage data:
            # 2.5 is the approx max log2ratio (LOH to 8N)

            df[plot['data']] = df[plot['data']] * scale_factor + offset
            if plot['plot_type'] == 'line':
                plot = ax.plot(df['PlotPos'],df[plot['data']], **plot['plot_args'])
            elif plot['plot_type'] == 'scatter':
                # highjack plot_args
                pa = plot['plot_args']
                if "c" in pa:
                    pa['c'] = df[pa['c']]
                if "s" in pa:
                    if isinstance(pa['s'], str):
                        pa['s'] = df[pa['s']] * 20 + 1
                plot = ax.scatter(df['PlotPos'],df[plot['data']], **pa)
    
    ######## plot the SNP graphs #######
    for plot in snp_plots:
        if plot['plot_type'] == 'line':
            plot = ax.plot(df['PlotPos'],df[plot['data']], **plot['plot_args'])
        elif plot['plot_type'] == 'scatter':
            # highjack plot_args
            pa = plot['plot_args']
            if "c" in pa:
                    pa['c'] = df[pa['c']]
            if "s" in pa:
                if isinstance(pa['s'], str):
                    pa['s'] = df[pa['s']] * 20 + 1
            plot = ax.scatter(df['PlotPos'],df[plot['data']], **pa)
    
    _ = ax.set_ylim(ylim)
    # add the color chroms
    _ = make_color_chroms(ax, chrom_df, color_chroms, ylimits=ax.get_ylim(), colormap=colormap)
    
    
    ######## LABELS ###################
    # set the axis labels
    _ = ax.set_xlabel('genomic coords', fontsize=1.25*label_size);
    # quick fix for one y-label
    _ = ax.set_ylabel(' / '.join([plot['title'] for plot in snp_plots]), fontsize=1.25*label_size)
    
    ######## CHROM LABELS #############
    add_chrom_labels(ax, chrom_df, ax.get_ylim())
    
    ####### X-AXIS ####################
    # set major ticks and grid for chrom
    
    ax = set_ticks(ax, df, chrom_df, label_size=label_size)
    
    # return fig and ax for further plotting and return edited dataframe
    return fig, ax, df, chrom_df

In [None]:
absvaf = dict(
        title='absVAF',
        plot_type='scatter',  # ['line', 'scatter']
        data='absVAF',
        plot_args=dict(
            c='dbscan',
            s='dbscan',
            alpha=.5,
            cmap='rainbow'
        )
    )

_ = plot_snp(df, snp_plots=[absvaf, absvafmean], cov_plots=[log2, log2mean], chroms=chroms, region='chr17', **fig_params)