In [10]:
import pandas as pd
import pcprutils as ut
import numpy as np
import altair as alt

In [11]:
def get_resid(reg_df, prot_trans_df):
    merged = prot_trans_df.merge(
        reg_df,
        on=["Gene", "Tissue"],
        how="inner"
    )
    
    x = merged["Transcriptomics"]
    y = merged["Proteomics"]
    m = merged["m"]
    b = merged["b"]
    
    orth_m = -1 / m
    orth_b = y - orth_m * x
    int_x = (b - orth_b) / (orth_m - m)
    int_y = m * int_x + b
    
    d = np.sqrt((int_x - x) ** 2 + (int_y - y) ** 2)
    
    above_line = y > int_y
    
    merged = merged.assign(
        orth_resid=d,
        intersect_x=int_x,
        intersect_y=int_y,
        above_reg_line = above_line
    )
    
    return merged



In [12]:
def calculate_residuals(cancer_types):
    prot_trans = ut.load_prot_trans(cancer_types)
    for cancer_type in cancer_types:
        reg_df = pd.read_csv(f'{cancer_type}_regression.tsv', sep="\t")
        prot_trans_df = prot_trans[cancer_type]
        resid_df = get_resid(reg_df, prot_trans_df)
        resid_df.to_csv(
            f'{cancer_type}_residuals.tsv.gz', 
            sep='\t',
            compression='gzip',
            index=False
        )

In [13]:
calculate_residuals([
    "ccrcc",
    "endometrial",
    "hnscc",
    "lscc",
    "luad",
])

                                          



                                                



                                         

