# Find proteins outside the event (trans) that are commonly different between samples with and without the event

## Filtered: only look at interacting proteins of cis affected proteins

## Setup

In [1]:
import cnvutils
import pandas as pd
import numpy as np
import os
import altair as alt

In [2]:
# These variables specify which chromosome and arm we're working on, and whether to do cis or trans effects
params = cnvutils.load_params(os.path.join("..", "data", "params.json"))
CHROMOSOME = params["CHROMOSOME"]
ARM = params["ARM"]
CIS_OR_TRANS = "trans"

read_path = os.path.join("..", "data", f"{CHROMOSOME}{ARM}_{CIS_OR_TRANS}_ttest_filtered.tsv")

ttest_results = pd.\
read_csv(read_path, sep="\t").\
rename(columns={"Name": "protein"})

In [3]:
ttest_results

Unnamed: 0,cancer_type,protein,Database_ID,adj_p,change
0,brca,AAR2,NP_001258803.1,0.657920,0.219442
1,brca,ABCE1,NP_001035809.1,0.986577,0.042210
2,brca,ABCF3,NP_060828.2,0.888344,0.123245
3,brca,ACE2,NP_068576.1,0.963489,-0.433630
4,brca,ACTB,NP_001092.1,0.988513,-0.027390
5,brca,AGAP1,NP_001032208.1|NP_055729.2,0.990524,-0.012605
6,brca,AGPS,NP_003650.1,0.986577,0.063421
7,brca,AHCYL1,NP_006612.2|NP_001229602.1,0.957679,-0.095484
8,brca,AKAP1,NP_003479.1,0.878254,0.299219
9,brca,ALG2,NP_149078.1,0.957625,0.101727


## Select the proteins with a significant change, and take a detour to make some plots

In [4]:
prots = ttest_results[ttest_results["adj_p"] <= 0.05].reset_index(drop=True)
prots_cts = prots.groupby("cancer_type").count()[["protein"]]

fail_prots = ttest_results[ttest_results["adj_p"] > 0.05].reset_index(drop=True)
fail_cts = fail_prots.groupby("cancer_type").count()[["protein"]]

prots_cts.insert(0, "count_type", "Significant difference")
fail_cts.insert(0, "count_type", "No significant difference")

counts = prots_cts.append(fail_cts).sort_index().reset_index(drop=False)

alt.Chart(counts).mark_bar().encode(
    x=alt.X(
        "count_type",
        axis=alt.Axis(
            title=None,
            labels=False
        ),
        sort=["Significant difference"]
    ),
    y=alt.Y(
        "protein",
        axis=alt.Axis(
            title="Number of proteins"
        )
    ),
    color=alt.Color(
        "count_type",
        title=None,
        sort=["Significant difference"],
        scale=alt.Scale(
            domain=["Significant difference", "No significant difference"],
            range=["#2d3da4", "#d1d1d1"]
        )
    )
).facet(
    column=alt.Column(
        "cancer_type",
        title=None
    )
).properties(
    title=f"Chr {CHROMOSOME}{ARM} {CIS_OR_TRANS} effects"
).configure_title(
    anchor="middle"
)

## Find how many cancers each protein was different in

In [5]:
def make_simple_change(change_val):
    if change_val == 0:
        return 0
    if change_val > 0:
        return 1
    if change_val < 0:
        return -1

prots = prots.assign(
    simplified_change=prots["change"].apply(make_simple_change)
)

In [6]:
prots

Unnamed: 0,cancer_type,protein,Database_ID,adj_p,change,simplified_change
0,brca,ATP6V1A,NP_001681.2,0.021451,-0.393656,-1
1,brca,ATP6V1D,NP_057078.1,0.007969,-0.456358,-1
2,colon,ATP6V1A,,0.001439,-0.216519,-1
3,colon,ATP6V1E1,,0.000379,-0.256006,-1
4,colon,ATP6V1F,,0.007178,-0.242232,-1
5,colon,ATP6V1G1,,0.000408,-0.287800,-1
6,colon,ATP6V1H,,0.001529,-0.215079,-1
7,colon,HSPA4L,,0.008031,-0.547894,-1
8,colon,NARS,,0.025721,-0.244841,-1
9,colon,RPS6KB2,,0.010908,-0.351681,-1


In [7]:
prots_summary = prots.groupby("protein").agg(**{
    "cancers": ("cancer_type", lambda x: x.sort_values().drop_duplicates(keep="first").tolist()),
    "mean_simp_change": ("simplified_change", np.mean)
})

prots_summary = prots_summary.\
assign(
    num_cancers=prots_summary["cancers"].apply(len),
    tmp_sort=prots_summary["cancers"].apply(lambda x: "".join(x))
).\
sort_values(by=["num_cancers", "tmp_sort"], ascending=[False, True]).\
drop(columns="tmp_sort")

prots_summary

Unnamed: 0_level_0,cancers,mean_simp_change,num_cancers
protein,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ATP6V1A,"[brca, colon, luad]",-1.0,3
ATP6V1E1,"[colon, lscc, luad]",-1.0,3
ATP6V1G1,"[colon, lscc, luad]",-1.0,3
ATP6V1H,"[colon, lscc, luad]",-1.0,3
ATP6V1D,"[brca, luad]",-1.0,2
ATP6V1F,"[colon, luad]",-1.0,2
ALG2,"[lscc, luad]",-1.0,2
ERGIC1,"[lscc, luad]",0.0,2
PTPN7,"[lscc, luad]",-1.0,2
HSPA4L,[colon],-1.0,1


In [8]:
prots_summary["num_cancers"].value_counts().sort_index(ascending=False)

3     4
2     5
1    72
Name: num_cancers, dtype: int64

## Save results

In [9]:
prots_summary = prots_summary.assign(
    cancers=prots_summary["cancers"].apply(lambda x: "_".join(x))
)

output_file = os.path.join("..", "data", f"{CHROMOSOME}{ARM}_{CIS_OR_TRANS}_common_diff_filtered.tsv")
prots_summary.to_csv(output_file, sep="\t")