In [1]:
import pandas as pd
import pcprutils as ut
import numpy as np
import altair as alt

In [2]:
hn_reg = pd.read_csv("hnscc_regression.tsv", sep="\t")
hn_reg

Unnamed: 0,Tissue,Gene,m,b
0,Normal,AADAC,0.599738,17.448712
1,Normal,AAK1,-0.004513,24.396185
2,Normal,AARS2,0.045875,23.694298
3,Normal,AARSD1,1.581359,10.203151
4,Normal,AASDHPPT,0.166157,23.169844
...,...,...,...,...
6083,Tumor,ZSWIM8,0.342667,20.922590
6084,Tumor,ZW10,0.313533,21.579339
6085,Tumor,ZWINT,0.992880,8.764980
6086,Tumor,ZXDC,2.057887,-2.979253


In [3]:
prot_trans = ut.load_prot_trans([
    "ccrcc",
    "endometrial",
    "hnscc",
    "lscc",
    "luad",
])

                                          



                                                



                                         



In [4]:
hn_prot_trans = prot_trans["hnscc"]

In [5]:
def get_resid(reg_df, prot_trans_df):
    merged = prot_trans_df.merge(
        reg_df,
        on=["Gene", "Tissue"],
        how="inner"
    )
    
    x = merged["Transcriptomics"]
    y = merged["Proteomics"]
    m = merged["m"]
    b = merged["b"]
    
    orth_m = -1 / m
    orth_b = y - orth_m * x
    int_x = (b - orth_b) / (orth_m - m)
    int_y = m * int_x + b
    
    d = np.sqrt((int_x - x) ** 2 + (int_y - y) ** 2)
    
    merged = merged.assign(
        orth_resid=d,
        intersect_x=int_x,
        intersect_y=int_y
    )
    
    return merged

hn_resid = get_resid(hn_reg, hn_prot_trans)
hn_resid

Unnamed: 0,Patient_ID,Gene,Proteomics,Tissue,Transcriptomics,m,b,orth_resid,intersect_x,intersect_y
0,C3L-00977,AADAC,21.413714,Tumor,5.66,0.306844,19.847916,0.163416,5.612063,21.569941
1,C3L-00987,AADAC,24.530248,Tumor,11.21,0.306844,19.847916,1.187949,11.558479,23.394561
2,C3L-00994,AADAC,21.295667,Tumor,2.52,0.306844,19.847916,0.644832,2.709158,20.679203
3,C3L-00995,AADAC,19.727526,Tumor,3.11,0.306844,19.847916,1.027395,2.808619,20.709722
4,C3L-00997,AADAC,21.636534,Tumor,5.61,0.306844,19.847916,0.064268,5.628853,21.575093
...,...,...,...,...,...,...,...,...,...,...
442689,C3N-03876.N,ZYG11B,22.725237,Normal,11.62,0.614547,15.486016,0.083652,11.663799,22.653967
442690,C3N-03878.N,ZYG11B,22.773351,Normal,11.71,0.614547,15.486016,0.077523,11.750589,22.707304
442691,C3N-03888.N,ZYG11B,22.733180,Normal,11.51,0.614547,15.486016,0.148014,11.587497,22.607076
442692,C3N-03928.N,ZYG11B,22.732640,Normal,11.97,0.614547,15.486016,0.093293,11.921153,22.812123


In [6]:
def plot_tissue_residuals(resid_df, gene, tissue):
    df = resid_df[(resid_df["Gene"] == gene) & (resid_df["Tissue"] == tissue)]
    
    tran_min = df["Transcriptomics"].min()
    tran_max = df["Transcriptomics"].max()
    tran_rng = tran_max - tran_min
    
    prot_min = df["Proteomics"].min()
    prot_max = df["Proteomics"].max()
    prot_rng = prot_max - prot_min
    
    xs = np.arange(
        tran_min,
        tran_max + 1,
    )
    
    color_hex = "#5778a4" if tissue == "Normal" else "#e49444"
    
    all_m = df["m"].drop_duplicates(keep="first")
    assert all_m.shape[0] == 1
    m = all_m.iloc[0]

    all_b = df["b"].drop_duplicates(keep="first")
    assert all_b.shape[0] == 1
    b = all_b.iloc[0]
    
    reg_line_df = pd.DataFrame({
        "x": xs,
        "y": m * xs + b,
    })
    
    reg_line = alt.Chart(reg_line_df).mark_line(color=color_hex).encode(
        x=alt.X("x",scale=alt.Scale(domain=[tran_min, tran_max])),
        y=alt.Y("y",scale=alt.Scale(domain=[prot_min, prot_max])),
    )
    
    pt_data = []
    for pid in df["Patient_ID"].drop_duplicates(keep="first"):
        
        pt_df = pd.DataFrame({
            "x": [
                df[df["Patient_ID"] == pid]["Transcriptomics"].iloc[0],
                df[df["Patient_ID"] == pid]["intersect_x"].iloc[0],
            ],
            "y": [
                df[df["Patient_ID"] == pid]["Proteomics"].iloc[0],
                df[df["Patient_ID"] == pid]["intersect_y"].iloc[0],
            ],
            "resid": [df[df["Patient_ID"] == pid]["orth_resid"].iloc[0]] * 2,
            "Patient_ID": [df[df["Patient_ID"] == pid]["Patient_ID"].iloc[0]] * 2,
        })
        
        pt_base = alt.Chart(pt_df).encode(
            x=alt.X("x",scale=alt.Scale(domain=[tran_min, tran_max])),
            y=alt.Y("y",scale=alt.Scale(domain=[prot_min, prot_max])),
            tooltip=["Patient_ID", "resid"],
        )
        
        pt_data.append(pt_base.mark_line(color=color_hex))
        pt_data.append(pt_base.mark_point(color=color_hex))
        
    return alt.layer(*([reg_line] + pt_data)), tran_rng, prot_rng
    

def plot_residuals(resid_df, gene):
    tchart, txr, tyr = plot_tissue_residuals(resid_df, gene, "Tumor")
    nchart, nxr, nyr = plot_tissue_residuals(resid_df, gene, "Normal")
    
    rng = max((txr, tyr), (nxr, nyr))
    
    return alt.layer(tchart, nchart).properties(
        width=nxr * 75,
        height=nyr * 75,
    ).resolve_scale(
        x="shared",
        y="shared"
    )

plot_residuals(hn_resid, "AADAC")