# ARID1A Mutation Trans Effect on Phosphoproteomics

This notebook analyzes the trans effect of ARID1A mutation on interacting and other proteins Phosphoproteomics, in Endometrial, Colon, and Ovarian cancer.

### Library Imports

In [1]:
import pandas as pd
import numpy as np
import scipy.stats

import warnings
warnings.filterwarnings("ignore")

import cptac
import cptac.algorithms as al

en = cptac.Endometrial()
co = cptac.Colon()
ov = cptac.Ovarian()

                                    

### Select Gene

In [2]:
gene = "ARID1A"

### Investigate Proteomics, Phosphoproteomics, Acetylproteomics, or Transcriptomics

In [3]:
#omics = "proteomics"
#omics = "transcriptomics"
omics = "phosphoproteomics"
#omics = "acetylproteomics"

### Track all significant comparisons in Dataframe

In [4]:
all_significant_comparisons = pd.DataFrame(columns=['Cancer_Type', 'Gene', 'Comparison','Interacting_Protein','P_Value'])

In [5]:
def add_to_all_significant_comparisons(df, cancer, interacting, all_sig_comp):
    expanded = df
    expanded['Gene'] = gene
    expanded['Cancer_Type'] = cancer
    expanded['Interacting_Protein'] = interacting
    
    updated_all_comparisons = pd.concat([all_sig_comp, expanded], sort=False)
    
    return updated_all_comparisons

# Interacting Proteins: Phosphoproteomics

### Generate interacting protein list

In [6]:
'''Use get interacting proteins method to generate list of interacting proteins'''
interacting_proteins = al.get_interacting_proteins(gene)

print("Interacting Proteins:")
for interacting_protein in interacting_proteins:
    print(interacting_protein)

Interacting Proteins:
SMARCC1
SUPT16H
CCND1
SMARCD3
CREBBP
SMARCB1
KAT2B
DNMT3A
SMARCA2
SMARCC2
KMT2D
ARID1A
SMARCE1
PHF10
BAZ1B
ARID1B
NF1
CDC5L
SMARCD1
SMARCD2
SMARCA4
ACTL6A
BCL7B
SS18
DPF2
DPF3
KLF1
GATA1
ING1
PGR
BCL7C
HIC1


## Endometrial

### Test for significant comparisons in any of interacting proteins

In [7]:
'''Create dataframe in order to do comparisons with wrap_ttest'''
protdf = en.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics, omics_genes=interacting_proteins)
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']

'''Create the binary valued column needed to do the comparison'''
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.at[ind,'Label'] = 'Mutated'
    else:
        protdf.at[ind,'Label'] = 'Wildtype'

'''Format the dataframe correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)


'''Make list of columns to be compared using t-tests'''
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

'''Call wrap_ttest, pass in formatted dataframe'''
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

'''Print results, if anything significant was found'''
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Endometrial", True, all_significant_comparisons)


SMARCD3 did not match any columns in phosphoproteomics dataframe. SMARCD3_phosphoproteomics column inserted, but filled with NaN.
SMARCB1 did not match any columns in phosphoproteomics dataframe. SMARCB1_phosphoproteomics column inserted, but filled with NaN.
SMARCD1 did not match any columns in phosphoproteomics dataframe. SMARCD1_phosphoproteomics column inserted, but filled with NaN.
KLF1 did not match any columns in phosphoproteomics dataframe. KLF1_phosphoproteomics column inserted, but filled with NaN.
GATA1 did not match any columns in phosphoproteomics dataframe. GATA1_phosphoproteomics column inserted, but filled with NaN.
Doing t-test comparisons

                       Comparison       P_Value
0  ARID1A-S1755_phosphoproteomics  7.963747e-10
1  ARID1A-S1184_phosphoproteomics  4.338936e-07
2  ARID1A-S1604_phosphoproteomics  1.501846e-06
3   ARID1A-S363_phosphoproteomics  3.402219e-05





## Colon

### Test for significant comparisons in any of interacting proteins

In [8]:
'''Create dataframe in order to do comparisons with wrap_ttest'''
protdf = co.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics, omics_genes=interacting_proteins)
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']

'''Create the binary valued column needed to do the comparison'''
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.at[ind,'Label'] = 'Mutated'
    else:
        protdf.at[ind,'Label'] = 'Wildtype'

'''Format the dataframe correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)

'''Make list of columns to be compared using t-tests'''
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

'''Call wrap_ttest, pass in formatted dataframe'''
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

'''Print results, if anything significant was found'''
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Colon", True, all_significant_comparisons)


SMARCC1 did not match any columns in phosphoproteomics dataframe. SMARCC1_phosphoproteomics column inserted, but filled with NaN.
SUPT16H did not match any columns in phosphoproteomics dataframe. SUPT16H_phosphoproteomics column inserted, but filled with NaN.
CCND1 did not match any columns in phosphoproteomics dataframe. CCND1_phosphoproteomics column inserted, but filled with NaN.
SMARCD3 did not match any columns in phosphoproteomics dataframe. SMARCD3_phosphoproteomics column inserted, but filled with NaN.
CREBBP did not match any columns in phosphoproteomics dataframe. CREBBP_phosphoproteomics column inserted, but filled with NaN.
SMARCB1 did not match any columns in phosphoproteomics dataframe. SMARCB1_phosphoproteomics column inserted, but filled with NaN.
KAT2B did not match any columns in phosphoproteomics dataframe. KAT2B_phosphoproteomics column inserted, but filled with NaN.
DNMT3A did not match any columns in phosphoproteomics dataframe. DNMT3A_phosphoproteomics column ins

## Ovarian

In [9]:
'''Create dataframe in order to do comparisons with wrap_ttest'''
protdf = ov.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics, omics_genes=interacting_proteins) 
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']

'''Create the binary valued column needed to do the comparison'''
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.loc[ind,'Label'] = 'Mutated'
    else:
        protdf.loc[ind,'Label'] = 'Wildtype'

'''Format the dataframe correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)
protdf = protdf.loc[:,~protdf.columns.duplicated()]

'''Make list of columns to be compared using t-tests'''
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

'''Call wrap_ttest, pass in formatted dataframe'''
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

'''Print results, if anything significant was found'''
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Ovarian", True, all_significant_comparisons)


CCND1 did not match any columns in phosphoproteomics dataframe. CCND1_phosphoproteomics column inserted, but filled with NaN.
SMARCD3 did not match any columns in phosphoproteomics dataframe. SMARCD3_phosphoproteomics column inserted, but filled with NaN.
SMARCD1 did not match any columns in phosphoproteomics dataframe. SMARCD1_phosphoproteomics column inserted, but filled with NaN.
SS18 did not match any columns in phosphoproteomics dataframe. SS18_phosphoproteomics column inserted, but filled with NaN.
KLF1 did not match any columns in phosphoproteomics dataframe. KLF1_phosphoproteomics column inserted, but filled with NaN.
GATA1 did not match any columns in phosphoproteomics dataframe. GATA1_phosphoproteomics column inserted, but filled with NaN.
Doing t-test comparisons

No significant comparisons.


# All Proteins: Phosphoproteomics

## Endometrial

In [10]:
try:
    print("\nGene: ", gene)

    '''Use all proteins'''
    proteomics = en.get_proteomics()
    all_proteins = proteomics.columns
    #all_proteins = all_proteins[:100]

    '''Create dataframe in order to do comparisons with wrap_ttest'''
    protdf = en.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics)
    protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']
    
    '''Create the binary valued column needed to do the comparison'''
    for ind, row in protdf.iterrows():
        if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
            protdf.at[ind,'Label'] = 'Mutated'
        else:
            protdf.at[ind,'Label'] = 'Wildtype'

    '''Format the datafram correctly'''
    protdf = protdf.drop(gene+"_Mutation",axis=1)
    protdf = protdf.drop(gene+"_Location",axis=1)
    protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
    protdf = protdf.drop("Sample_Status",axis=1)

    '''Make list of columns to be compared using t-tests'''
    col_list = list(protdf.columns)
    col_list.remove('Label')

    print("Doing t-test comparisons\n")
    
    '''Call wrap_ttest, pass in formatted dataframe'''
    wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

    '''Print results, if anything significant was found'''
    if wrap_results is not None:
            print(wrap_results)
            print("\n\n")
            
            all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Endometrial", False, all_significant_comparisons)


except Exception as e:
    print("Error in Comparison")
    print(e)


Gene:  ARID1A
Doing t-test comparisons

                       Comparison       P_Value
0  ARID1A-S1755_phosphoproteomics  7.963747e-10
1  ARID1A-S1184_phosphoproteomics  4.338936e-07





## Colon

In [11]:
try:
    print("\nGene: ", gene)

    '''Use all proteins'''
    proteomics = co.get_proteomics()
    all_proteins = proteomics.columns
    #all_proteins = all_proteins[:100]

    '''Create dataframe in order to do comparisons with wrap_ttest'''
    protdf = co.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics)
    protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']
    
    '''Create the binary valued column needed to do the comparison'''
    for ind, row in protdf.iterrows():
        if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
            protdf.at[ind,'Label'] = 'Mutated'
        else:
            protdf.at[ind,'Label'] = 'Wildtype'

    '''Format the datafram correctly'''
    protdf = protdf.drop(gene+"_Mutation",axis=1)
    protdf = protdf.drop(gene+"_Location",axis=1)
    protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
    protdf = protdf.drop("Sample_Status",axis=1)

    '''Make list of columns to be compared using t-tests'''
    col_list = list(protdf.columns)
    col_list.remove('Label')

    print("Doing t-test comparisons\n")
    
    '''Call wrap_ttest, pass in formatted dataframe'''
    wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

    '''Print results, if anything significant was found'''
    if wrap_results is not None:
            print(wrap_results)
            print("\n\n")
            
            all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Colon", False, all_significant_comparisons)


except Exception as e:
    print("Error in Comparison")
    print(e)


Gene:  ARID1A
Doing t-test comparisons

No significant comparisons.


## Ovarian

In [12]:

print("\nGene: ", gene)

'''Use all proteins'''
proteomics = ov.get_proteomics()
all_proteins = list(set(proteomics.columns))

'''Create dataframe in order to do comparisons with wrap_ttest'''
protdf = ov.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics)
protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']
protdf = protdf.loc[:,~protdf.columns.duplicated()]

'''Create the binary valued column needed to do the comparison'''
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.at[ind,'Label'] = 'Mutated'
    else:
        protdf.at[ind,'Label'] = 'Wildtype'

'''Format the datafram correctly'''
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)

'''Make list of columns to be compared using t-tests'''
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

'''Call wrap_ttest, pass in formatted dataframe'''
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

'''Print results, if anything significant was found'''
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Ovarian", False, all_significant_comparisons)




Gene:  ARID1A
Doing t-test comparisons

                     Comparison       P_Value
0  STMN1-S38s_phosphoproteomics  1.498115e-07





### Print all significant comparisons

In [13]:
if len(all_significant_comparisons) > 0:
    display(all_significant_comparisons)
    
else:
    print('No Significant Comparisons!')

Unnamed: 0,Cancer_Type,Gene,Comparison,Interacting_Protein,P_Value
0,Endometrial,ARID1A,ARID1A-S1755_phosphoproteomics,True,7.963747e-10
1,Endometrial,ARID1A,ARID1A-S1184_phosphoproteomics,True,4.338936e-07
2,Endometrial,ARID1A,ARID1A-S1604_phosphoproteomics,True,1.501846e-06
3,Endometrial,ARID1A,ARID1A-S363_phosphoproteomics,True,3.402219e-05
0,Endometrial,ARID1A,ARID1A-S1755_phosphoproteomics,False,7.963747e-10
1,Endometrial,ARID1A,ARID1A-S1184_phosphoproteomics,False,4.338936e-07
0,Ovarian,ARID1A,STMN1-S38s_phosphoproteomics,False,1.498115e-07


### Write significant comparisons (if any) to shared CSV file

In [14]:
existing_results = pd.read_csv(gene+'_Trans_Results.csv')

updated_results = pd.concat([existing_results, all_significant_comparisons], sort=False)

updated_results.to_csv(path_or_buf = gene + '_Trans_Results.csv', index=False)