For the TCGA survival analysis we obtained a significant correlation between T1236C and decrease overall survivability. 
We cannot know if this is causation or simply correlation, but we can examine mutations correlated with T1236C and see if they could be the actual MDR1 regulators. 

In this notebook we check which TCGA mutations are highly correlated with T1236C. Are they located in MDR1 as well? 
Are they located in genes regulated to MDR1? 


## Imports

In [2]:
import pandas as pd
from Utils_MDR1 import get_muts_single_patient, variant_info
from tqdm import tqdm
import pickle
import os

## Main

In [None]:
''' For each patient with T1236C, get all its mutations '''

mut_id = 1 #T1236C

#get the list of patients with T1236C
patients_dict = pd.read_pickle("../Data/mutated_patients_dict.pickle")
patients_with_mut = patients_dict[variant_info[mut_id]["variant_name"]]

#get a list of other mutations that these patients have
patients_and_types = pd.read_pickle("../Data/TCGA_patients_and_cancer_types_df.pickle") #get a dict[patient] -> cancer tpye

for patient in tqdm(patients_with_mut): #iterate over patients with the current mutation
    cancer_type = patients_and_types[patients_and_types["case_id"] == patient]["Cancer_Type"].values[0] #get their cancer type (needed for the path for the rest of the mutations)
    cur_patient_muts = get_muts_single_patient(patient, cancer_type) #a df with all of the mutations of this patient
    #this process takes a long time so we will save the results in a pickle, to only run it once. 
    with open(f"../Data/T1236C_patients_muts/{patient}.pickle",'wb') as f:
        pickle.dump(cur_patient_muts, f)
        

In [64]:
''' Concatinate mutations from all T1236C carriers '''

# get the patients with T1236C and all their other TCGA mutations
path = "../Data/T1236C_patients_muts/"
#iterate over all the files in the folder and concatinate the mutations of all the patients
df_all_muts = pd.DataFrame()

for filename in os.listdir(path):
    muts_this_patient = pd.read_pickle(f"{path}{filename}")
    df_all_muts = pd.concat([df_all_muts, muts_this_patient])


In [61]:
''' create a mutation id and then count- how many T1236C carriers also carry this mutation? ''' 

#create mut_id
cols_mut_id = ["Gene", "Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2"]        
df_all_muts["mut_id"] = df_all_muts.apply(lambda x: ":".join([str(x[col]) for col in cols_mut_id]), axis = 1)

#correlated mutations counts
shared_muts_value_counts = df_all_muts["mut_id"].value_counts()[1:] #removing T1236C itself from the list
with open(f"../Data/muts_correlated_with_T1236C.pickle",'wb') as f:
    pickle.dump(shared_muts_value_counts, f)
