In [None]:
from typing import Tuple, Dict, List

import numpy as np
import pandas as pd
import pathlib as pl
import scanpy as sc
import seaborn as sns

In [None]:
def get_high_cnv_metasig(diffCNV: pd.DataFrame, metasig: str, qval: float=0.05, threshold_perc: float=0.2) -> Tuple[pd.Series, pd.Series]:
    
    ms_df = diffCNV.loc[:,diffCNV.columns.str.startswith(metasig)]
    ms_df = ms_df[ms_df[f"{metasig}_qvalues"]<qval]
    
    ms_df_gains = ms_df[ms_df[f"{metasig}_patients_gain"]>1]
    ms_df_losses = ms_df[ms_df[f"{metasig}_patients_loss"]>1]
    
    sorted_gains = (ms_df_gains[f"{metasig}_perc_gains"] - ms_df_gains[f"{metasig}_rest_gains"]).sort_values(ascending=False)
    sorted_losses = (ms_df_losses[f"{metasig}_perc_losses"] - ms_df_losses[f"{metasig}_rest_losses"]).sort_values(ascending=False)
    
    high_gains = sorted_gains[sorted_gains>=threshold_perc]
    high_losses = sorted_losses[sorted_losses>=threshold_perc]
    
    return high_gains, high_losses

In [None]:
def get_high_cnv(diffCNV: pd.DataFrame, metasigs: np.ndarray) -> Dict[str, List[pd.Series]]:
    high_cnvs = {}
    for metasig in metasigs:
        high_gains, high_losses = get_high_cnv_metasig(diffCNV, metasig, threshold_perc=0.25)
        high_cnvs[metasig] = [high_gains, high_losses]
    return high_cnvs

In [None]:
from matplotlib import pyplot as plt
from matplotlib.collections import BrokenBarHCollection
import pandas

##### CODE INSPIRED/TAKEN FROM https://www.biostars.org/p/147364/#147637

# Here's the function that we'll call for each dataframe (once for chromosome
# ideograms, once for genes).  The rest of this script will be prepping data
# for input to this function
#
def chromosome_collections(df, y_positions, height,  **kwargs):
    """
    Yields BrokenBarHCollection of features that can be added to an Axes
    object.
    Parameters
    ----------
    df : pandas.DataFrame
        Must at least have columns ['chrom', 'start', 'end', 'color']. If no
        column 'width', it will be calculated from start/end.
    y_positions : dict
        Keys are chromosomes, values are y-value at which to anchor the
        BrokenBarHCollection
    height : float
        Height of each BrokenBarHCollection
    Additional kwargs are passed to BrokenBarHCollection
    """
    del_width = False
    if 'width' not in df.columns:
        del_width = True
        df['width'] = df['end'] - df['start']
    for chrom, group in df.groupby('chrom'):
        yrange = (y_positions[chrom], height)
        xranges = group[['start', 'width']].values
        
        yield BrokenBarHCollection(
            xranges, yrange, facecolors=group['colors'], **kwargs)
    if del_width:
        del df['width']

from matplotlib.lines import Line2D

def get_chrom_df(cmetasigs: np.ndarray, diffCNV: pd.DataFrame, high_cnv: np.ndarray, gain_or_loss: str = "gain"):
    assert gain_or_loss in ["gain","loss"]
    
    gain_loss_map = {"gain": 0, "loss": 1}
    dcnv = diffCNV.copy()
    palette = sns.color_palette()
    
    colors = {cmetasigs[i]: palette[i] for i in range(len(cmetasigs))}
    colors[0] = (0,0,0)
    
    dcnv["color"] = np.zeros((dcnv.shape[0],))
    sel_metasig = []
    custom_lines = []
    for metasig in cmetasigs:
        cnv = high_cnv[metasig][gain_loss_map[gain_or_loss]]
        if len(cnv)>0:
            dcnv.loc[cnv.index,"color"] = metasig
            sel_metasig.append(metasig)
            custom_lines.append(Line2D([0], [0], color=colors[metasig], lw=4))

    dcnv["color"] = dcnv["color"].apply(lambda x: colors[x])
    
    chromosome = pd.Series(dcnv.index.str.split(":").str[0])
    positions = pd.Series(dcnv.index.str.split(":").str[1])
    starts = positions.str.split("-").str[0].astype(int)
    ends = positions.str.split("-").str[1].astype(int)
    
    df = pd.concat([chromosome, starts, ends, dcnv["color"].reset_index(drop=True)],axis=1)
    df.columns = ["chrom","start","end", "colors"]
    
    return df, sel_metasig, custom_lines

def create_chrom_fig_info():

    # Height of each ideogram
    chrom_height = 1

    # Spacing between consecutive ideograms
    chrom_spacing = 1

    # Height of the gene track. Should be smaller than `chrom_spacing` in order to
    # fit correctly
    gene_height = 0.4

    # Padding between the top of a gene track and its corresponding ideogram
    gene_padding = 0.1

    # Decide which chromosomes to use
    chromosome_list = ['chr%s' % i for i in list(range(1, 23))]

    # Keep track of the y positions for ideograms and genes for each chromosome,
    # and the center of each ideogram (which is where we'll put the ytick labels)
    ybase = 0
    chrom_ybase = {}
    gene_ybase = {}
    chrom_centers = {}

    # Iterate in reverse so that items in the beginning of `chromosome_list` will
    # appear at the top of the plot
    for chrom in chromosome_list[::-1]:
        chrom_ybase[chrom] = ybase
        chrom_centers[chrom] = ybase + chrom_height / 2.
        gene_ybase[chrom] = ybase - gene_height - gene_padding
        ybase += chrom_height + chrom_spacing
        
    return chrom_ybase, chrom_height, chrom_centers, chromosome_list

def create_chrom_fig(chrom_df: pd.DataFrame, 
                     sel_metasigs: np.ndarray, 
                     custom_lines: np.ndarray,
                     n_plots: Tuple[int, int]= (1, 4),
                     figsize: Tuple[int, int]=(20, 5)):
    
    fig, ax = plt.subplots(n_plots[0], n_plots[1], figsize=figsize)
    flatax = ax.flatten()
    
    for i,k in enumerate(chrom_df):
        for collection in chromosome_collections(chrom_df[k], chrom_ybase, chrom_height):
            flatax[i].add_collection(collection)

        flatax[i].set_yticks([chrom_centers[i] for i in chromosome_list])
        flatax[i].set_yticklabels(chromosome_list)
        flatax[i].axis('tight')
        if len(sel_metasigs[k])>0:
            flatax[i].legend(custom_lines[k], np.char.replace(sel_metasigs[k], "metasig", "Meta-sig. "), frameon=False, loc="right")
        flatax[i].spines.right.set_visible(False)
        flatax[i].spines.top.set_visible(False)
        flatax[i].set_title(k)
    for j in range(len(chrom_df),len(flatax)):
        flatax[j].axis("off")
        
    return fig


In [None]:
cancer_types = ["crc","crc_icms","hgg","gbm","esophag","scc_red","breast"]

In [None]:
basedir = pathlib.Path("/path/to/where/all/the/cansig/results/are")

In [None]:
diffCNVs = {c: pd.read_csv(basedir / f"{c}"/"metasignatures" / "diff-cnvs.csv",index_col=0) for c in cancer_type}

In [None]:
metasigs = {}
for c in cancer_type:
    metasigs[c] = np.unique(diffCNVs[c].columns.str.split("_").str[0])
    metasigs[c] = np.setdiff1d(metasigs[c], ["outlier"])

In [None]:
high_cnv_pc = {}
for cancer in cancer_type:
    high_cnv_pc[cancer] = get_high_cnv(diffCNVs[cancer], metasigs[cancer])

In [None]:
unique_combinations = []
cancer_type_mapping = {"crc": "CRC", "crc_icms": "CRC/iCMS", 
                       "hgg": "HGG", "gbm": "GBM", "scc_red": "SCC", "breast": "BRCA", "esophag": "ESCC"}
for typ in ["gain","loss"]:
    unique_combinations += [cancer_type_mapping[s] + " " + typ for s in cancer_types]

In [None]:
reverse_mapping = {v: k for k,v in cancer_type_mapping.items()}

In [None]:
chrom_dfs = {}
for c in unique_combinations:
    ct, cnvt = c.split(" ")
    cdf, sm, cl = get_chrom_df(cmetasigs = metasigs[reverse_mapping[ct]], diffCNV=diffCNVs[reverse_mapping[ct]], high_cnv=high_cnv_pc[reverse_mapping[ct]], gain_or_loss=cnvt)
    chrom_dfs[c] = {"chrom_df": cdf, "sel_metasigs": sm, "custom_lines": cl}

In [None]:
chrom_ybase, chrom_height, chrom_centers, chromosome_list = create_chrom_fig_info()

In [None]:
gain_cdfs = {k: chrom_dfs[k]["chrom_df"] for k in chrom_dfs if "gain" in k}
gain_sms = {k: chrom_dfs[k]["sel_metasigs"] for k in chrom_dfs if "gain" in k}
gain_cls = {k: chrom_dfs[k]["custom_lines"] for k in chrom_dfs if "gain" in k}
fig = create_chrom_fig(chrom_df=gain_cdfs, sel_metasigs=gain_sms, custom_lines=gain_cls, n_plots=(3,3), figsize=(15,12))
fig.savefig("figures/diff_CNV_gain.svg",bbox_inches="tight")

In [None]:
loss_cdfs = {k: chrom_dfs[k]["chrom_df"] for k in chrom_dfs if "loss" in k}
loss_sms = {k: chrom_dfs[k]["sel_metasigs"] for k in chrom_dfs if "loss" in k}
loss_cls = {k: chrom_dfs[k]["custom_lines"] for k in chrom_dfs if "loss" in k}
fig = create_chrom_fig(chrom_df=loss_cdfs, sel_metasigs=loss_sms, custom_lines=loss_cls, n_plots=(3,3), figsize=(15,12))
fig.savefig("figures/diff_CNV_loss.svg",bbox_inches="tight")