In [1]:
import pandas as pd
import pcprutils as ut
import numpy as np
import altair as alt

In [2]:
hn_reg = pd.read_csv("hnscc_regression.tsv", sep="\t")
hn_reg

Unnamed: 0,Tissue,Gene,m,b
0,Normal,AADAC,0.599738,17.448712
1,Normal,AAK1,-0.004513,24.396185
2,Normal,AARS2,0.045875,23.694298
3,Normal,AARSD1,1.581359,10.203151
4,Normal,AASDHPPT,0.166157,23.169844
5,Normal,AASS,1.021847,13.293912
6,Normal,ABCA12,0.271436,17.487236
7,Normal,ABCA8,0.524418,17.742629
8,Normal,ABCC5,0.288910,19.161421
9,Normal,ABCD1,0.532212,19.230806


In [3]:
prot_trans = ut.load_prot_trans([
    "ccrcc",
    "endometrial",
    "hnscc",
    "lscc",
    "luad",
])

                                          



                                                



                                         



In [4]:
hn_prot_trans = prot_trans["hnscc"]

In [12]:
def get_resid(reg_df, prot_trans_df):
    merged = prot_trans_df.merge(
        reg_df,
        on=["Gene", "Tissue"],
        how="inner"
    )
    
    x = merged["Transcriptomics"]
    y = merged["Proteomics"]
    m = merged["m"]
    b = merged["b"]
    
    orth_m = -1 / m
    orth_b = y - orth_m * x
    int_x = (b - orth_b) / (orth_m - m)
    int_y = m * int_x + b
    
    d = np.sqrt((int_x - x) ** 2 + (int_y - y) ** 2)
    
    merged = merged.assign(
        orth_resid=d,
        intersect_x=int_x,
        intersect_y=int_y
    )
    
    return merged

hn_resid = get_resid(hn_reg, hn_prot_trans)
hn_resid

Unnamed: 0,Patient_ID,Gene,Proteomics,Tissue,Transcriptomics,m,b,orth_resid,intersect_x,intersect_y
0,C3L-00977,AADAC,21.413714,Tumor,5.66,0.306844,19.847916,0.163416,5.612063,21.569941
1,C3L-00987,AADAC,24.530248,Tumor,11.21,0.306844,19.847916,1.187949,11.558479,23.394561
2,C3L-00994,AADAC,21.295667,Tumor,2.52,0.306844,19.847916,0.644832,2.709158,20.679203
3,C3L-00995,AADAC,19.727526,Tumor,3.11,0.306844,19.847916,1.027395,2.808619,20.709722
4,C3L-00997,AADAC,21.636534,Tumor,5.61,0.306844,19.847916,0.064268,5.628853,21.575093
5,C3L-00999,AADAC,20.236512,Tumor,6.08,0.306844,19.847916,1.412034,5.665788,21.586426
6,C3L-01138,AADAC,22.001161,Tumor,3.85,0.306844,19.847916,0.929141,4.122558,21.112896
7,C3L-01237,AADAC,22.549388,Tumor,7.43,0.306844,19.847916,0.403076,7.548240,22.164045
8,C3L-02617,AADAC,21.912340,Tumor,4.26,0.306844,19.847916,0.723956,4.472369,21.220233
9,C3L-02621,AADAC,22.075945,Tumor,3.96,0.306844,19.847916,0.968367,4.244065,21.150180


In [23]:
def plot_residuals(resid_df, gene, tissue):
    df = resid_df[(resid_df["Gene"] == gene) & (resid_df["Tissue"] == tissue)]
    
    tran_min = df["Transcriptomics"].min()
    tran_max = df["Transcriptomics"].max()
    prot_min = df["Proteomics"].min()
    prot_max = df["Proteomics"].max()
    
    all_min = min(tran_min, prot_min)
    all_max = max(tran_max, prot_max)
    
    xs = np.arange(
        all_min,
        all_max,
        (all_max - all_min) / 3
    )
    
    all_m = df["m"].drop_duplicates(keep="first")
    assert all_m.shape[0] == 1
    m = all_m.iloc[0]

    all_b = df["b"].drop_duplicates(keep="first")
    assert all_b.shape[0] == 1
    b = all_b.iloc[0]
    
    reg_line_df = pd.DataFrame({
        "x": xs,
        "y": m * xs + b,
    })
    
    reg_line = alt.Chart(reg_line_df).mark_line().encode(
        x=alt.X("x",scale=alt.Scale(domain=[all_min, all_max])),
        y=alt.Y("y",scale=alt.Scale(domain=[all_min, all_max])),
    )
    
    pt_data = []
    for pid in df["Patient_ID"].drop_duplicates(keep="first"):
        
        pt_df = pd.DataFrame({
            "x": [
                df[df["Patient_ID"] == pid]["Transcriptomics"].iloc[0],
                df[df["Patient_ID"] == pid]["intersect_x"].iloc[0],
            ],
            "y": [
                df[df["Patient_ID"] == pid]["Proteomics"].iloc[0],
                df[df["Patient_ID"] == pid]["intersect_y"].iloc[0],
            ],
        })
        
        pt_base = alt.Chart(pt_df).encode(
            x=alt.X("x",scale=alt.Scale(domain=[all_min, all_max])),
            y=alt.Y("y",scale=alt.Scale(domain=[all_min, all_max])),
        )
        
        pt_data.append(pt_base.mark_line())
        pt_data.append(pt_base.mark_point())
        
    return alt.layer(
        *([reg_line] + pt_data)
    ).properties(
        width=500,
        height=500,
    )
    
plot_residuals(hn_resid, "AADAC", "Tumor")