# Cis effects dotplot

## Load input files

In [1]:
import cnvutils as ut
import pandas as pd
import numpy as np
import altair as alt
import os
from toolz.curried import pipe

In [2]:
# Altair options
alt.data_transformers.disable_max_rows()

# def json_dir(data, data_dir):
#     os.makedirs(data_dir, exist_ok=True)
#     return pipe(data, alt.to_json(filename=os.path.join(data_dir, "{prefix}-{hash}.{extension}")) )

# alt.data_transformers.register("json_dir", json_dir)
# alt.data_transformers.enable("json_dir", data_dir="plot_data")

DataTransformerRegistry.enable('default')

In [3]:
CHROMOSOME = "8"
TRANS_OR_CIS = "cis"

cancer_types = [
    "brca",
    "colon",
    "hnscc",
    "lscc",
    "luad",
    "ovarian"
]

### Get t test proteomics results

In [4]:
p = ut.\
get_reshaped_ttest_results(CHROMOSOME, "p", TRANS_OR_CIS).\
drop(columns="Database_ID").\
assign(cnv_event="8p_loss")

q = ut.\
get_reshaped_ttest_results(CHROMOSOME, "q", TRANS_OR_CIS).\
drop(columns="Database_ID").\
assign(cnv_event="8q_gain")

prot = p.append(q).reset_index(drop=True)

prot

Unnamed: 0,cancer_type,protein,adj_p,change,cnv_event
0,brca,ADAMDEC1,6.978531e-01,-0.298616,8p_loss
1,brca,AGPAT5,2.144297e-03,-0.849746,8p_loss
2,brca,ANGPT2,5.032264e-01,0.258756,8p_loss
3,brca,ARHGEF10,4.226186e-04,-0.728021,8p_loss
4,brca,ASAH1,2.771828e-02,-0.711515,8p_loss
5,brca,ATP6V1B2,1.433931e-04,-0.408102,8p_loss
6,brca,BIN3,3.055856e-02,-0.556700,8p_loss
7,brca,BLK,1.155079e-01,0.441371,8p_loss
8,brca,BMP1,1.624506e-01,-0.489983,8p_loss
9,brca,BNIP3L,1.985276e-02,-0.891026,8p_loss


### Add locations

In [5]:
locs = ut.\
get_gene_locations().\
reset_index().\
drop(columns="Database_ID").\
dropna(how="any").\
drop_duplicates(keep="first").\
sort_values(by="Name")

locs

Unnamed: 0,Name,chromosome,start_bp,end_bp,arm
25370,A1BG,19,58345178.0,58353492.0,q
71458,A1BG-AS1,19,58347718.0,58355455.0,q
49644,A1CF,10,50799409.0,50885675.0,q
2990,A2M,12,9067664.0,9116229.0,p
52644,A2M-AS1,12,9065163.0,9068689.0,p
14408,A2ML1,12,8822621.0,8887001.0,p
88822,A2ML1-AS1,12,8776219.0,8830947.0,p
76697,A2ML1-AS2,12,8819816.0,8820713.0,p
29096,A2MP1,12,9228533.0,9275817.0,p
9213,A3GALT2,1,33306766.0,33321098.0,p


In [6]:
prot = prot.merge(
    right=locs,
    how="inner",
    left_on=["protein"],
    right_on=["Name"]
).sort_values(by=["cancer_type", "chromosome", "arm", "start_bp"])

prot = prot.assign(chromosome="chr" + prot["chromosome"])

In [7]:
prot

Unnamed: 0,cancer_type,protein,adj_p,change,cnv_event,Name,chromosome,start_bp,end_bp,arm
343,brca,TDRP,0.556640,-0.210114,8p_loss,TDRP,chr8,489792.0,545781.0,p
123,brca,ERICH1,0.002312,-0.759474,8p_loss,ERICH1,chr8,614746.0,738106.0,p
17,brca,ARHGEF10,0.000423,-0.728021,8p_loss,ARHGEF10,chr8,1823926.0,1958641.0,p
151,brca,KBTBD11,0.729737,-0.097163,8p_loss,KBTBD11,chr8,1973677.0,2006936.0,p
213,brca,MYOM2,0.096691,-1.938151,8p_loss,MYOM2,chr8,2045046.0,2165552.0,p
12,brca,ANGPT2,0.503226,0.258756,8p_loss,ANGPT2,chr8,6499651.0,6563409.0,p
6,brca,AGPAT5,0.002144,-0.849746,8p_loss,AGPAT5,chr8,6708642.0,6761503.0,p
94,brca,DEFA4,0.410099,0.477351,8p_loss,DEFA4,chr8,6935820.0,6938306.0,p
89,brca,DEFA3,0.836135,0.135719,8p_loss,DEFA3,chr8,7015869.0,7018297.0,p
185,brca,MFHAS1,0.316798,0.274803,8p_loss,MFHAS1,chr8,8783354.0,8893630.0,p


In [8]:
summ = prot.groupby(["cancer_type", "cnv_event"]).agg(
    sig_ct=("adj_p", lambda x: (x <= 0.05).sum()),
    tot_ct=("adj_p", lambda x: x.size)
)

summ = summ.assign(sig_prop=summ["sig_ct"] / summ["tot_ct"])
summ

Unnamed: 0_level_0,Unnamed: 1_level_0,sig_ct,tot_ct,sig_prop
cancer_type,cnv_event,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
brca,8p_loss,38.0,70.0,0.542857
brca,8q_gain,45.0,91.0,0.494505
colon,8p_loss,21.0,46.0,0.456522
colon,8q_gain,28.0,68.0,0.411765
hnscc,8p_loss,27.0,65.0,0.415385
hnscc,8q_gain,27.0,83.0,0.325301
lscc,8p_loss,30.0,67.0,0.447761
lscc,8q_gain,30.0,89.0,0.337079
luad,8p_loss,40.0,64.0,0.625
luad,8q_gain,37.0,89.0,0.41573


In [9]:
(prot[(prot["cancer_type"] == "hnscc") & (prot["cnv_event"] == "8p_loss")]["adj_p"] <= 0.05).sum()

27

In [10]:
(prot[(prot["cancer_type"] == "hnscc") & (prot["cnv_event"] == "8q_gain")]["adj_p"] <= 0.05).sum()

27

## Make plot

In [11]:
prot

Unnamed: 0,cancer_type,protein,adj_p,change,cnv_event,Name,chromosome,start_bp,end_bp,arm
343,brca,TDRP,0.556640,-0.210114,8p_loss,TDRP,chr8,489792.0,545781.0,p
123,brca,ERICH1,0.002312,-0.759474,8p_loss,ERICH1,chr8,614746.0,738106.0,p
17,brca,ARHGEF10,0.000423,-0.728021,8p_loss,ARHGEF10,chr8,1823926.0,1958641.0,p
151,brca,KBTBD11,0.729737,-0.097163,8p_loss,KBTBD11,chr8,1973677.0,2006936.0,p
213,brca,MYOM2,0.096691,-1.938151,8p_loss,MYOM2,chr8,2045046.0,2165552.0,p
12,brca,ANGPT2,0.503226,0.258756,8p_loss,ANGPT2,chr8,6499651.0,6563409.0,p
6,brca,AGPAT5,0.002144,-0.849746,8p_loss,AGPAT5,chr8,6708642.0,6761503.0,p
94,brca,DEFA4,0.410099,0.477351,8p_loss,DEFA4,chr8,6935820.0,6938306.0,p
89,brca,DEFA3,0.836135,0.135719,8p_loss,DEFA3,chr8,7015869.0,7018297.0,p
185,brca,MFHAS1,0.316798,0.274803,8p_loss,MFHAS1,chr8,8783354.0,8893630.0,p


In [12]:
def pval_plot(df, title, group_col, val_col, color_col, dx, y=True, sig=0.05):
    
    df = df.sort_values(by=[
        "chromosome",
        "arm",
        "start_bp",
        "end_bp"
    ])
    
    val_log_col = "neg_log_p"
    log_cutoff = -np.log10(sig)
    df = df.assign(**{val_log_col: - np.log10(df[val_col])})
    
    if y:
        chart_y = alt.Y(
            val_log_col,
            title=val_log_col
        )
        
    else:
        chart_y = alt.Y(
            val_log_col,
            axis=alt.Axis(
                labels=False,
                ticks=False,
                title=None
            )
        )
        
    chart = alt.Chart(df).mark_point().encode(
        x=alt.X(
            group_col,
            title="Protein",
            sort=df["Name"].tolist()
        ),
        y=chart_y,
        color=color_col
    )
    
    line = alt.Chart(pd.DataFrame({
        'y': [log_cutoff],
        "label": [f"-log({sig})"]
    })).mark_rule(color="crimson").encode(
        y="y"
    )

    text = line.mark_text(
        align="right",
        dx=dx
    ).encode(
        text="label"
    )

    if y:
        return (chart + line + text).properties(title=title)
    else:
        return (chart + line).properties(title=title)

alt.vconcat(
    pval_plot(prot[prot["cnv_event"] == "8p_loss"], "8p loss", "Name", "adj_p", "cancer_type", -755),
    pval_plot(prot[prot["cnv_event"] == "8q_gain"], "8q gain", "Name", "adj_p", "cancer_type", -955)
).resolve_scale(y="shared").configure_title(anchor="start", dx=70)