In [1]:
import cptac
import scipy
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import math
import pandas as pd
import statistics
import parse_correlations_dataframe as get_corr
import copy
import get_correlations
import cptac.utils as ut

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
brca = cptac.Brca()
ccrcc = cptac.Ccrcc()
colon = cptac.Colon()
en = cptac.Endometrial()
gbm = cptac.Gbm()
luad = cptac.Luad()
ovarian = cptac.Ovarian()
hnscc  = cptac.Hnscc()
lscc = cptac.Lscc()

                                                

In [4]:
cancers = [brca, ccrcc, colon, en, gbm, luad, ovarian, hnscc, lscc]

In [5]:
cancers2 = [ccrcc, en, gbm, luad, hnscc, lscc]

In [6]:
hi = set()
hi.add("yo")

In [7]:
hi

{'yo'}

In [8]:
def get_all_gene_list(tissues):
    #Returns a list of the genes that are in both proteomics and transcriptomics
    whole_gene_list = set()
    for tissue in tissues:
        tissue_list = get_gene_list(tissue)
        for gene in tissue_list:
            whole_gene_list.add(gene)
    return sorted(list(whole_gene_list))

In [9]:
def get_all_gene_list_old(tissues):
    #Returns a list of the genes that are in both proteomics and transcriptomics
    whole_gene_list = set()
    for tissue in tissues:
        gene_list = []
        prot = tissue.get_proteomics()
        if isinstance(prot.columns, pd.MultiIndex):
            prot = ut.reduce_multiindex(df = prot, levels_to_drop="Database_ID",quiet=True)
        trans = tissue.get_transcriptomics()
        if isinstance(trans.columns, pd.MultiIndex):
            trans = ut.reduce_multiindex(df = trans, levels_to_drop="Database_ID",quiet=True)
        for gene in prot:
            whole_gene_list.add(gene)
        for gene in trans:
            whole_gene_list.add(gene)
    return sorted(list(whole_gene_list))

In [10]:
def get_gene_list(tissue):
    #Returns a list of the genes that are in both proteomics and transcriptomics
    gene_list = []
    prot = tissue.get_proteomics()
    if isinstance(prot.columns, pd.MultiIndex):
        prot = prot.columns.get_level_values(0)
    trans = tissue.get_transcriptomics()
    if isinstance(trans.columns, pd.MultiIndex):
        trans = trans.columns.get_level_values(0)
    for i in prot:
        if i in trans and i not in gene_list:
            gene_list.append(i)
    return gene_list

In [11]:
gene_list = get_all_gene_list(cancers)

In [12]:
type_dict = {brca:"brca",ccrcc:"ccrcc",colon:"colon",en:"endometrial",gbm:"gbm",luad:"luad",
                  ovarian:"ovarian",hnscc:"hnscc",lscc:"lscc"}

In [13]:
# https://link.springer.com/article/10.3758/s13428-012-0289-7
def compare_correlations(r1, r2, n1, n2):
    rp1 = np.arctanh(r1)
    rp2 = np.arctanh(r2)
    
    if n1 < 4 or n2 < 4:
        return(np.nan)
    Sr12 = math.sqrt((1/(n1-3))+(1/(n2-3)))
    z = (rp1-rp2) / Sr12
    p = scipy.stats.norm.sf(abs(z))*2
    return (p)

In [14]:
def find_mut_tumor(cancer_type, gene):
    try:
        gene_multi = cancer_type.multi_join({'proteomics': gene, 'transcriptomics': gene, 'somatic_mutation': gene}, tissue_type = 'tumor', flatten = True)
        normal = cancer_type.multi_join({'proteomics': gene, 'transcriptomics': gene}, tissue_type= 'normal', flatten = True)
        normal.columns = ['proteomics', 'transcriptomics']
        gene_multi.columns = ['proteomics', 'transcriptomics', 'mutation', 'location', 'mutation_status']
        trans = list(gene_multi["transcriptomics"])
        prot = list(gene_multi['proteomics'])
        group = []
        for i in gene_multi['mutation_status']:
            if type(i) == str:
                group.append("mutation")
            else:
                group.append("wt") 
        if group.count('wt') < 2 or len(normal) < 2:
            return float("NaN")
        group.extend(['normal']*len(normal))
        prot.extend(list(normal['proteomics']))
        trans.extend(list(normal['transcriptomics']))
        gene_df = pd.DataFrame({'Type': group, 'Proteomics': prot, 'Transcriptomics': trans})
        gene_df = gene_df.dropna()
        return gene_df
    except:
        return float("NaN")

In [15]:
def get_df_with_type(df,input_type):
    return(df.loc[df['Type']==input_type])

In [16]:
def permute(df,original_correlation, label_1, label_2, column_one, column_two, permutation_times):
    permutation_list = []
    permu_df = copy.deepcopy(df)

    for i in range(permutation_times):
        permu_df["Type"] = np.random.permutation(permu_df["Type"])
        permu_is_label_1 = permu_df["Type"] == label_1
        permu_is_label_2 = permu_df["Type"] == label_2
        label_1_correlation,label_1_pval = scipy.stats.pearsonr(permu_df[permu_is_label_1][column_one], permu_df[permu_is_label_1][column_two])
        label_2_correlation,label_2_pval = scipy.stats.pearsonr(permu_df[permu_is_label_2][column_one], permu_df[permu_is_label_2][column_two])
        delta = label_1_correlation - label_2_correlation
        permutation_list.append(delta)
        
    z_score = (original_correlation - np.mean(permutation_list)) / np.std(permutation_list)
    p_val = scipy.stats.norm.sf(abs(z_score))*2
    return p_val

In [48]:
from mlxtend.evaluate import permutation_test

In [227]:
# first_pass = dict()
tot_diff_list = []
tot_pval_list = []
tot_perm_list = []
for cancer in cancers2:
    cancer_diff_list = [type_dict[cancer]]
    cancer_pval_list = [type_dict[cancer]]
    cancer_perm_list = [type_dict[cancer]]
    tumor_cancer_df = cancer.join_omics_to_omics("transcriptomics","proteomics",tissue_type="tumor",quiet=True)
    if isinstance(tumor_cancer_df.columns, pd.MultiIndex):
        tumor_cancer_df = ut.reduce_multiindex(df = tumor_cancer_df, levels_to_drop="Database_ID",quiet=True)
            
    normal_cancer_df = cancer.join_omics_to_omics("transcriptomics","proteomics",tissue_type="normal",quiet=True)
    if isinstance(normal_cancer_df.columns, pd.MultiIndex):
        normal_cancer_df = ut.reduce_multiindex(df = normal_cancer_df, levels_to_drop="Database_ID",quiet=True)
    
    for gene in gene_list:
        gene_trans = gene + "_transcriptomics"
        gene_prot = gene + "_proteomics"
        gene_in_tumor = gene_trans in tumor_cancer_df.columns and gene_prot in tumor_cancer_df.columns
        gene_in_normal = gene_trans in normal_cancer_df.columns and gene_prot in normal_cancer_df.columns
        
        if not(gene_in_tumor and gene_in_normal):
            cancer_diff_list.append(np.nan)
            cancer_pval_list.append(np.nan)
            cancer_perm_list.append(np.nan)
            continue

        tumor_df = tumor_cancer_df[[gene_trans,gene_prot]]
#         print(tumor_df[gene_trans])
#         print(tumor_df[gene_prot])
        if isinstance(tumor_df[gene_trans], pd.core.frame.DataFrame) or isinstance(tumor_df[gene_prot], pd.core.frame.DataFrame): #This is to take first column of multi-index
            trans_col = tumor_df[gene_trans]
            if isinstance(tumor_df[gene_trans], pd.core.frame.DataFrame):
                trans_col = trans_col.iloc[:,0]
            prot_col = tumor_df[gene_prot]
            if isinstance(tumor_df[gene_prot], pd.core.frame.DataFrame):
                prot_col = prot_col.iloc[:,0]
            frame = {gene_trans : trans_col, gene_prot : prot_col}
            tumor_df = pd.DataFrame(frame)
        num_tumor = len(tumor_df)
        tumor_df = tumor_df.dropna()
        tumor_corr = tumor_df.corr().iloc[0][1]
        
#         print(normal_df[gene_trans])
#         print(normal_df[gene_prot])
        normal_df = normal_cancer_df[[gene_trans,gene_prot]]
        if isinstance(normal_df[gene_trans], pd.core.frame.DataFrame) or isinstance(normal_df[gene_prot], pd.core.frame.DataFrame): #This is to take first column of multi-index
            trans_col = normal_df[gene_trans]
            if isinstance(normal_df[gene_trans], pd.core.frame.DataFrame):
                trans_col = trans_col.iloc[:,0]
            prot_col = normal_df[gene_prot]
            if isinstance(normal_df[gene_prot], pd.core.frame.DataFrame):
                prot_col = prot_col.iloc[:,0]
            frame = {gene_trans : trans_col, gene_prot : prot_col}
            normal_df = pd.DataFrame(frame)
        num_normal = len(normal_df)
        normal_df = normal_df.dropna()
        normal_corr = normal_df.corr().iloc[0][1]
        
        corr_diff = tumor_corr - normal_corr
        cancer_diff_list.append(corr_diff)
        
        gene_pval = compare_correlations(tumor_corr, normal_corr, num_tumor, num_normal)
        cancer_pval_list.append(gene_pval)
        
        #Here we do permutations. Cut down number?
        if num_tumor < 4 or num_normal < 4 or gene_pval > .01:
            cancer_perm_list.append(np.nan)
            continue
        tumor_label_list = ['tumor'] * len(tumor_df)
        tumor_df["Type"] = tumor_label_list
        
        normal_label_list = ['normal'] * len(normal_df)
        normal_df["Type"] = normal_label_list
        
        perm_list = [tumor_df,normal_df]
        perm_df = pd.concat(perm_list)
        
        column_one = perm_df.columns[0]
        column_two = perm_df.columns[1]
        perm_val = permute(perm_df,corr_diff,"tumor","normal",column_one,column_two,1000)
        cancer_perm_list.append(perm_val)
#         cancer_perm_list.append(np.nan)
#17 minutes and 10 seconds for 100 genes permutation, p val < .05 and 1000 permutations per
        
    tot_diff_list.append(cancer_diff_list)
    tot_pval_list.append(cancer_pval_list)
    tot_perm_list.append(cancer_perm_list)

ValueError: x and y must have length at least 2.

In [215]:
tumor_df = tumor_cancer_df[[gene_trans,gene_prot]]

In [216]:
tumor_df

Name,ABI1_transcriptomics,ABI1_proteomics,ABI1_proteomics
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C3L-00004,26.692315,-0.130282,
C3L-00010,24.618292,,
C3L-00011,47.983112,0.200461,
C3L-00026,21.324011,0.582155,
C3L-00079,25.178665,,
...,...,...,...
C3N-01646,23.429352,0.046889,
C3N-01648,39.167637,0.094193,
C3N-01649,23.722191,,
C3N-01651,23.938233,0.118175,


In [217]:
if isinstance(tumor_df[gene_trans], pd.core.frame.DataFrame) or isinstance(tumor_df[gene_prot], pd.core.frame.DataFrame): #This is to take first column of multi-index
    print("hi")
    trans_col = tumor_df[gene_trans]
    if isinstance(tumor_df[gene_trans], pd.core.frame.DataFrame):
        trans_col = trans_col.iloc[:,0]
    prot_col = tumor_df[gene_prot]
    if isinstance(tumor_df[gene_prot], pd.core.frame.DataFrame):
        prot_col = prot_col.iloc[:,0]
    frame = {gene_trans : trans_col, gene_prot : prot_col}
    tumor_df = pd.DataFrame(frame)

hi


In [218]:
tumor_df

Unnamed: 0_level_0,ABI1_transcriptomics,ABI1_proteomics
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
C3L-00004,26.692315,-0.130282
C3L-00010,24.618292,
C3L-00011,47.983112,0.200461
C3L-00026,21.324011,0.582155
C3L-00079,25.178665,
...,...,...
C3N-01646,23.429352,0.046889
C3N-01648,39.167637,0.094193
C3N-01649,23.722191,
C3N-01651,23.938233,0.118175


In [174]:
if isinstance(tumor_df[gene_trans], pd.core.frame.DataFrame) or isinstance(tumor_df[gene_prot], pd.core.frame.DataFrame):
    print("hi")

In [184]:
normal_df

Name,ABHD6_transcriptomics,ABHD6_proteomics
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
C3L-00004.N,20.690153,0.041242
C3L-00010.N,20.026105,0.177484
C3L-00011.N,28.403306,0.442436
C3L-00026.N,25.670387,0.043691
C3L-00079.N,17.425588,-0.395671
...,...,...
C3N-01646.N,30.047970,0.522017
C3N-01648.N,19.367721,0.402142
C3N-01649.N,23.215246,0.602206
C3N-01651.N,22.201853,0.501867


In [183]:
normal_df[gene_prot]

KeyError: 'A1BG_proteomics'

In [159]:
tumor_df

Patient_ID
ABI1_proteomics    Empty DataFrame
Columns: [ABI1_proteomics]
Ind...
Name: ABI1_transcriptomics, dtype: object

In [144]:
perm_df

Name,AKAP17A_transcriptomics,AKAP17A_transcriptomics,AKAP17A_proteomics,Type
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C3L-00104,224925.013065,0.0,0.544187,tumor
C3L-00365,251674.374330,0.0,0.203106,tumor
C3L-00674,205830.598754,0.0,0.020474,tumor
C3L-00677,268763.653430,0.0,0.139892,tumor
C3L-01040,180929.603827,0.0,0.107471,tumor
...,...,...,...,...
PT-RN5K.N,121224.465429,0.0,-0.727790,normal
PT-RU72.N,115447.303360,0.0,-0.708098,normal
PT-UTHO.N,162974.757645,0.0,-0.349061,normal
PT-WVLH.N,117977.283349,0.0,-0.602661,normal


In [113]:
permute2(perm_df,corr_diff,"tumor","normal",column_one,column_two,1000)

Name         ACLY_transcriptomics  ACLY_proteomics  ACLY_proteomics    Type
Patient_ID                                                                 
C3L-00004              127.431921         0.640347         1.058305   tumor
C3L-00010              157.060848         0.887528         1.331261   tumor
C3L-00011               80.023745         0.349048        -0.115492   tumor
C3L-00026              158.636481         0.907927         0.459999   tumor
C3L-00079               90.420426         0.064131         0.073501   tumor
...                           ...              ...              ...     ...
C3N-01646.N             43.665836        -1.010941        -0.607228  normal
C3N-01648.N             48.895973        -1.718317        -0.776481  normal
C3N-01649.N             47.823339        -1.359644        -0.846564  normal
C3N-01651.N             65.531812        -1.144428        -0.500871  normal
C3N-01808.N             64.300822        -0.980743        -0.606716  normal

[185 rows x

TypeError: No loop matching the specified signature and casting was found for ufunc add

In [124]:
hi = ut.reduce_multiindex(df = ccrcc.get_proteomics(), levels_to_drop="Database_ID",quiet=True)

In [129]:
"ACLY_proteomics" in hi.columns

False

In [138]:
print(tumor_df["ACLY_proteomics"])

Name        ACLY_proteomics  ACLY_proteomics
Patient_ID                                  
C3L-00004          0.640347         1.058305
C3L-00010          0.887528         1.331261
C3L-00011          0.349048        -0.115492
C3L-00026          0.907927         0.459999
C3L-00079          0.064131         0.073501
...                     ...              ...
C3N-01646          0.525380         0.629745
C3N-01648         -0.089212        -0.351126
C3N-01649          0.034884        -0.285154
C3N-01651          0.442074         0.025897
C3N-01808          0.625828         0.697103

[110 rows x 2 columns]


In [140]:
isinstance(tumor_df["ACLY_transcriptomics"], pd.core.frame.DataFrame)

False

In [136]:
#Here we are only currently taking the first column of multi_indices
dftry = tumor_df.iloc[:, 0:2]

In [137]:
dftry

Name,ACLY_transcriptomics,ACLY_proteomics
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
C3L-00004,127.431921,0.640347
C3L-00010,157.060848,0.887528
C3L-00011,80.023745,0.349048
C3L-00026,158.636481,0.907927
C3L-00079,90.420426,0.064131
...,...,...
C3N-01646,159.803693,0.525380
C3N-01648,25.479118,-0.089212
C3N-01649,116.451276,0.034884
C3N-01651,87.199126,0.442074


In [112]:
def permute2(df,original_correlation, label_1, label_2, column_one, column_two, permutation_times):
    permutation_list = []
    permu_df = copy.deepcopy(df)
    print(permu_df)
    print(label_1)
    print(label_2)
    print(column_one)
    print(column_two)
    for i in range(permutation_times):
        permu_df["Type"] = np.random.permutation(permu_df["Type"])
        permu_is_label_1 = permu_df["Type"] == label_1
        permu_is_label_2 = permu_df["Type"] == label_2
        print("Here:")
        print(permu_df[permu_is_label_1][column_two])
        label_1_correlation,label_1_pval = scipy.stats.pearsonr(permu_df[permu_is_label_1][column_one], permu_df[permu_is_label_1][column_two])
        label_2_correlation,label_2_pval = scipy.stats.pearsonr(permu_df[permu_is_label_2][column_one], permu_df[permu_is_label_2][column_two])
        delta = label_1_correlation - label_2_correlation
        permutation_list.append(delta)
        print("done")
        
    z_score = (original_correlation - np.mean(permutation_list)) / np.std(permutation_list)
    p_val = scipy.stats.norm.sf(abs(z_score))*2
    return p_val

In [220]:
labels = ["Cancer"] 
labels.extend(gene_list)
df = pd.DataFrame.from_records(tot_diff_list,columns=labels)
df2 = pd.DataFrame.from_records(tot_pval_list,columns=labels)
df3 = pd.DataFrame.from_records(tot_perm_list,columns=labels)

In [225]:
df3

Unnamed: 0,Cancer,A1BG,A1CF,A2M,A2ML1,A4GALT,AAAS,AACS,AADAC,AADAT,...,ZSWIM9,ZW10,ZWILCH,ZWINT,ZXDA,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,ccrcc,,,,,,,,,,...,,,,,,,,,,
1,endometrial,,,,,,,,,,...,,,,,,,,,,
2,gbm,,,,,,,,,,...,,,,,,,,,,
3,luad,,,,,,,,,,...,,,,,,,,,,
4,hnscc,,,,,,,,,,...,,,,,,,,,,
5,lscc,,,,,,,,,,...,,,,,,,,,,


In [223]:
# df.to_csv("corr_diff.csv",index=False)

In [224]:
# df2.to_csv("p_val.csv",index=False)

In [87]:
df2

Unnamed: 0,Cancer,A1BG,A1CF,A2M,A2ML1,A4GALT,AAAS,AACS,AADAC,AADAT,...,ZSWIM9,ZW10,ZWILCH,ZWINT,ZXDA,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,ccrcc,0.008435,0.001958,0.061198,,0.347296,0.767248,0.1477092,0.081185,0.370717,...,,0.069946,0.1166079,0.2703798,,0.065181,0.4565249,0.106062,0.327473,0.191946
1,endometrial,0.824775,,0.936116,0.03499895,0.264571,0.030133,0.003175433,,0.287843,...,0.654079,0.78193,0.007278204,0.002185463,,0.359389,0.2029671,0.55678,0.236825,0.261292
2,gbm,0.475921,,0.953751,,,0.944833,0.2692001,,0.995065,...,,0.428331,0.0004868691,,,0.752768,0.4370716,0.036739,0.512177,0.185044
3,luad,0.778949,,0.433724,,,0.131262,7.607273e-12,0.065583,0.003088,...,,1.2e-05,5.299375e-16,2.143507e-05,,0.574093,2.38615e-08,0.00036,6e-05,2e-06
4,hnscc,0.98856,0.446635,0.252807,0.0675544,0.118225,0.011762,0.7665152,9.8e-05,,...,,0.005881,0.3638297,8.984708e-06,,0.001435,0.0001751749,0.934953,0.090658,0.795908
5,lscc,0.169387,,0.610199,2.37765e-12,0.156933,0.030961,0.1035001,0.300931,,...,0.000682,0.000139,6.967096e-08,6.784408e-12,,0.005178,0.08362345,0.008578,0.011934,0.000571


In [22]:
normal_df = cancer.join_omics_to_omics("transcriptomics","proteomics",tissue_type="normal")

In [34]:
# normal_df[["A1BG_transcriptomics","A1CF_transcriptomics"]]
# normal_df
"A1BG_transcriptomics" in normal_df.columns and "A1CF_transcriptomics" in normal_df.columns

True

In [31]:
hi = "yo"
hi2 = "yip"
hi + hi2

'yoyip'

In [103]:
df3

Unnamed: 0,A1BG,A1CF,A2M,A2ML1,A4GALT
0,brca,,,,
1,ccrcc,0.049344,0.000831,0.019891,
2,colon,,,,
3,endometrial,0.775861,,0.912147,0.01393949
4,gbm,0.380497,,0.72014,
5,luad,0.77103,,0.009256,
6,ovarian,,,,
7,hnscc,0.991894,0.788576,0.211574,0.0004487003
8,lscc,0.140028,,0.098622,1.278508e-22


In [None]:
        
        #join omics to omics, proteomics to transcriptomics. One call for tumor, one to normal
        #join one column from proteomics, one from transcriptomics. Make one normal, one tumor. each will have 2 columns
        #here we call Humberto's function as well, giving us p-value - make dataframe with multijoin that has label for 
        #whether tumor or normal (3 columns in total, transcrip, prot, and tumor/normal. delta correlation as corr1 - corr2)
        #just a stats call to get corr numbers
        #If Nathaniel's p-value looks bad, then don't give it to Humberto. Otherwise, pass it in
#         corr_1,num_samples_1 = get_single_gene_correlations(gene, [cancer],input_tissue_type = "tumor")
#         corr_2,num_samples_2 = get_single_gene_correlations(gene,[cancer],input_tissue_type = "normal")
#             check mut vs wt tumor:

In [None]:
#create 3 giant dataframes - difference in correlation, Nathaniel's p-value from z score, and Humberto's permutation based p-value

In [59]:
["hi"].extend(['yo','wuz'])

In [60]:
one = ["hi"]
one.extend(['yo','wuz'])
one

['hi', 'yo', 'wuz']

In [147]:
second_df = copy.deepcopy(tumor_df)

In [149]:
second_df
second_df = second_df.iloc[:, 0:3]

In [150]:
second_df

Name,AKAP17A_transcriptomics,AKAP17A_transcriptomics,AKAP17A_proteomics
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C3L-00104,224925.013065,0.0,0.544187
C3L-00365,251674.374330,0.0,0.203106
C3L-00674,205830.598754,0.0,0.020474
C3L-00677,268763.653430,0.0,0.139892
C3L-01040,180929.603827,0.0,0.107471
...,...,...,...
C3N-03183,258341.911092,0.0,0.290588
C3N-03184,166729.683250,0.0,0.660597
C3N-03186,181008.038925,0.0,-0.255908
C3N-03188,222308.197768,0.0,0.028869


In [151]:
third_df = second_df.iloc[:,0:1]

In [153]:
add_col = second_df['AKAP17A_proteomics']

In [154]:
third_df['AKAP17A_proteomics'] = add_col

In [155]:
third_df

Name,AKAP17A_transcriptomics,AKAP17A_proteomics
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
C3L-00104,224925.013065,0.544187
C3L-00365,251674.374330,0.203106
C3L-00674,205830.598754,0.020474
C3L-00677,268763.653430,0.139892
C3L-01040,180929.603827,0.107471
...,...,...
C3N-03183,258341.911092,0.290588
C3N-03184,166729.683250,0.660597
C3N-03186,181008.038925,-0.255908
C3N-03188,222308.197768,0.028869


In [156]:
if isinstance(tumor_df[gene_trans], pd.core.frame.DataFrame) or isinstance(tumor_df[gene_prot], pd.core.frame.DataFrame): #This is to take first column of multi-index
    trans_col = tumor_df[gene_trans]
    if isinstance(tumor_df[gene_trans], pd.core.frame.DataFrame):
        trans_col = trans_col.iloc[:,0:1]
    prot_col = tumor_df[gene_prot]
    if isinstance(tumor_df[gene_prot], pd.core.frame.DataFrame):
        prot_col = prot_col.iloc[:,0:1]
    trans_col[gene_prot] = prot_col
    tumor_df = trans_col

In [157]:
tumor_df

Name,AKAP17A_transcriptomics,AKAP17A_proteomics
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
C3L-00104,224925.013065,0.544187
C3L-00365,251674.374330,0.203106
C3L-00674,205830.598754,0.020474
C3L-00677,268763.653430,0.139892
C3L-01040,180929.603827,0.107471
...,...,...
C3N-03183,258341.911092,0.290588
C3N-03184,166729.683250,0.660597
C3N-03186,181008.038925,-0.255908
C3N-03188,222308.197768,0.028869
