# RB1 Mutation Trans Effect on Transcriptomics

### Library Imports

In [1]:
import pandas as pd
import numpy as np
import scipy.stats

import warnings
warnings.filterwarnings("ignore")

import cptac
import cptac.utils as al

brain = cptac.Gbm()

                                    

### Select Gene

In [2]:
gene = "RB1"

### Investigate Proteomics, Phosphoproteomics, Acetylproteomics, or Transcriptomics

In [13]:
#omics = "proteomics"
omics = "transcriptomics"
#omics = "phosphoproteomics"
#omics = "acetylproteomics"

### Track all significant comparisons in Dataframe

In [14]:
all_significant_comparisons = pd.DataFrame(columns=['Cancer_Type', 'Gene', 'Comparison','Interacting_Protein','P_Value'])

In [15]:
def add_to_all_significant_comparisons(df, cancer, interacting, all_sig_comp):
    expanded = df
    expanded['Gene'] = gene
    expanded['Cancer_Type'] = cancer
    expanded['Interacting_Protein'] = interacting
    
    updated_all_comparisons = pd.concat([all_sig_comp, expanded], sort=False)
    
    return updated_all_comparisons

# Interacting Proteins: Transcriptomics

### Generate interacting protein list

Make a call to cptac.utils to get the interacting proteins method, which interacts with the uniprot and string databases to generate a list of known interacting partners with the given gene.

In [16]:
# Use get interacting proteins method to generate list of interacting proteins
interacting_proteins = al.get_interacting_proteins(gene)

# Show interacting protein list
print("Interacting Proteins:")
for interacting_protein in interacting_proteins:
    print(interacting_protein)

Interacting Proteins:
CDK4
CCND1
CCND2
CCNE1
E2F3
CDK6
CDK2
RB1
TP53
CCNA2
PPP1CA
E2F1
DNMT1
E2F2
CCND3
RBBP4
HDAC1
TFDP1
E2F4
TP53BP1
CDK1
CDKN1A
E2F5
CDKN2A
DP2
HMGA2
MCM7
HBP1
AATF
LIN9
BRCA1
ABL1
BCR
NCOA6
MORF4L2
SERPINB2


### Test for significant comparisons in any of interacting proteins

In [17]:
# Create dataframe in order to do comparisons with wrap_ttest
protdf = brain.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics, omics_genes=interacting_proteins)
protdf = brain.reduce_multiindex(protdf, flatten=True)
protdf = protdf[protdf.Sample_Status == 'Tumor'] # drop Normal samples

# Create the binary valued column needed to do the comparison
for ind, row in protdf.iterrows():
    if row[gene+"_Mutation_Status"] != 'Wildtype_Tumor':
        protdf.at[ind,'Label'] = 'Mutated'
    else:
        protdf.at[ind,'Label'] = 'Wildtype'

# Format the dataframe correctly
protdf = protdf.drop(gene+"_Mutation",axis=1)
protdf = protdf.drop(gene+"_Location",axis=1)
protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
protdf = protdf.drop("Sample_Status",axis=1)


# Make list of columns to be compared using t-tests
col_list = list(protdf.columns)
col_list.remove('Label')

print("Doing t-test comparisons\n")

# Call wrap_ttest, pass in formatted dataframe
wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

# Print results, if anything significant was found
if wrap_results is not None:
        print(wrap_results)
        print("\n\n")
        
        all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Gbm", True, all_significant_comparisons)
        

Doing t-test comparisons

                                  Comparison       P_Value
0   CCNE1_transcriptomics_ENSG00000105173.12  1.579781e-07
1   TFDP1_transcriptomics_ENSG00000198176.11  6.867372e-07
2     CDK2_transcriptomics_ENSG00000123374.9  1.224552e-06
3  CDKN2A_transcriptomics_ENSG00000147889.15  1.255831e-06
4     RB1_transcriptomics_ENSG00000139687.12  1.583221e-06
5   DNMT1_transcriptomics_ENSG00000130816.13  1.166545e-04
6   BRCA1_transcriptomics_ENSG00000012048.18  2.545444e-04
7    E2F1_transcriptomics_ENSG00000101412.12  4.630799e-04
8     CDK6_transcriptomics_ENSG00000105810.8  1.343566e-03







In [18]:
display(wrap_results) 

Unnamed: 0,Comparison,P_Value,Gene,Cancer_Type,Interacting_Protein
0,CCNE1_transcriptomics_ENSG00000105173.12,1.579781e-07,RB1,Gbm,True
1,TFDP1_transcriptomics_ENSG00000198176.11,6.867372e-07,RB1,Gbm,True
2,CDK2_transcriptomics_ENSG00000123374.9,1.224552e-06,RB1,Gbm,True
3,CDKN2A_transcriptomics_ENSG00000147889.15,1.255831e-06,RB1,Gbm,True
4,RB1_transcriptomics_ENSG00000139687.12,1.583221e-06,RB1,Gbm,True
5,DNMT1_transcriptomics_ENSG00000130816.13,0.0001166545,RB1,Gbm,True
6,BRCA1_transcriptomics_ENSG00000012048.18,0.0002545444,RB1,Gbm,True
7,E2F1_transcriptomics_ENSG00000101412.12,0.0004630799,RB1,Gbm,True
8,CDK6_transcriptomics_ENSG00000105810.8,0.001343566,RB1,Gbm,True


# All Proteins: Transcriptomics 

In [8]:
try:
    print("\nGene: ", gene)

    # Use all proteins

    # Create dataframe in order to do comparisons with wrap_ttest
    protdf = brain.join_omics_to_mutations(mutations_genes=[gene], omics_df_name=omics)
    protdf = brain.reduce_multiindex(protdf, flatten=True) # flatten for specific column names
    protdf = protdf.loc[protdf['Sample_Status'] == 'Tumor']
    
    # Create binary column 
    protdf['Label'] = np.where(
                protdf[gene+'_Mutation_Status'] == 'Wildtype_Tumor', 'Wildtype', 'Mutated')

    # Format the dataframe correctly'''
    protdf = protdf.drop(gene+"_Mutation",axis=1)
    protdf = protdf.drop(gene+"_Location",axis=1)
    protdf = protdf.drop(gene+"_Mutation_Status", axis=1)
    protdf = protdf.drop("Sample_Status",axis=1)

    # Make list of columns to be compared using t-tests
    col_list = list(protdf.columns)
    col_list.remove('Label')

    print("Doing t-test comparisons\n")
    
    # Call wrap_ttest, pass in formatted dataframe
    wrap_results = al.wrap_ttest(protdf, 'Label', col_list)

    # Print results, if anything significant was found
    if wrap_results is not None:
            print(wrap_results)
            print("\n\n")
            
            all_significant_comparisons = add_to_all_significant_comparisons(wrap_results, "Gbm", False, all_significant_comparisons)


except Exception as e:
    print("Error in Comparison")
    print(e)


Gene:  RB1




Doing t-test comparisons

                                         Comparison       P_Value
0      AC106786.1_transcriptomics_ENSG00000223652.2  5.592300e-15
1          RBBP8_transcriptomics_ENSG00000101773.15  2.069821e-13
2         UBE2CP2_transcriptomics_ENSG00000265939.1  5.321912e-12
3          ANKUB1_transcriptomics_ENSG00000206199.8  8.386719e-12
4        SLC47A2_transcriptomics_ENSG00000180638.16  1.493243e-11
..                                              ...           ...
148      S100PBP_transcriptomics_ENSG00000116497.16  7.183099e-07
149  RP11-746P2.5_transcriptomics_ENSG00000229064.1  7.433115e-07
150      FBXO36P1_transcriptomics_ENSG00000266117.1  7.580728e-07
151   SLC16A1-AS1_transcriptomics_ENSG00000226419.5  7.809521e-07
152        PARD6B_transcriptomics_ENSG00000124171.7  8.258120e-07

[153 rows x 2 columns]





# Print all significant comparisons

In [9]:
if len(all_significant_comparisons) > 0:
    display(all_significant_comparisons)
    
else:
    print('No Significant Comparisons!')

Unnamed: 0,Cancer_Type,Gene,Comparison,Interacting_Protein,P_Value
0,Gbm,RB1,CCNE1_transcriptomics_ENSG00000105173.12,True,1.579781e-07
1,Gbm,RB1,TFDP1_transcriptomics_ENSG00000198176.11,True,6.867372e-07
2,Gbm,RB1,CDK2_transcriptomics_ENSG00000123374.9,True,1.224552e-06
3,Gbm,RB1,CDKN2A_transcriptomics_ENSG00000147889.15,True,1.255831e-06
4,Gbm,RB1,RB1_transcriptomics_ENSG00000139687.12,True,1.583221e-06
...,...,...,...,...,...
148,Gbm,RB1,S100PBP_transcriptomics_ENSG00000116497.16,False,7.183099e-07
149,Gbm,RB1,RP11-746P2.5_transcriptomics_ENSG00000229064.1,False,7.433115e-07
150,Gbm,RB1,FBXO36P1_transcriptomics_ENSG00000266117.1,False,7.580728e-07
151,Gbm,RB1,SLC16A1-AS1_transcriptomics_ENSG00000226419.5,False,7.809521e-07


### Write Significant Comparisons (if any) to Shared CSV file

In [10]:
'''
existing_results = pd.read_csv(gene+'_Trans_Results.csv')

updated_results = pd.concat([existing_results, all_significant_comparisons], sort=False)

updated_results.to_csv(path_or_buf = gene + '_Trans_Results.csv', index=False)'''

"\nexisting_results = pd.read_csv(gene+'_Trans_Results.csv')\n\nupdated_results = pd.concat([existing_results, all_significant_comparisons], sort=False)\n\nupdated_results.to_csv(path_or_buf = gene + '_Trans_Results.csv', index=False)"