In [36]:
import pandas as pd
import seaborn as sns 
import numpy as np
import glob
import matplotlib.pyplot as plt
%matplotlib inline

In [37]:
#Import FPKM, MAF and protein coding list files for required cancer type

#Add path to MAF and FPKM data colleted from TCGA
path = r'/Users/jake/OneDrive - University of Glasgow/Project/BRCA/R_data'

#all csv files
all_files = glob.glob(path + "/*.csv")

#for loop through to read in tumour FPKM, normal FPKM and MAF files
for filename in all_files:
    
    if 'tumour' in filename:
        tumour_FPKM = pd.read_csv(filename)
    elif 'normal' in filename:
        normal_FPKM = pd.read_csv(filename)
    elif 'MAF' in filename:
        MAF = pd.read_csv(filename,low_memory=False,index_col=0)

        
#Load the protein coding gene list 
protein_coding_genes_file = '/Users/jake/OneDrive - University of Glasgow/Project/ensembl_GRCH38_protein_coding_genes.tsv'
protein_coding_genes = pd.read_csv(protein_coding_genes_file,sep='\t',index_col=0)

In [38]:
MAF.head()

Unnamed: 0,Hugo_Symbol,Entrez_Gene_Id,Center,NCBI_Build,Chromosome,Start_Position,End_Position,Strand,Variant_Classification,Variant_Type,...,FILTER,CONTEXT,src_vcf_id,tumor_bam_uuid,normal_bam_uuid,case_id,GDC_FILTER,COSMIC,MC3_Overlap,GDC_Validation_Status
1,FOXD3,27022,WUGSC,GRCh38,chr1,63323535,63323535,+,Silent,SNP,...,PASS,CCGCAGAAGAA,6b6664ad-7c70-459d-a42f-2af30ffae87e,69b6d195-4ba2-48d8-bd2c-892f4b45867f,fde72cef-7c2a-4c7b-8187-a18460d92c90,8332806e-f547-4aae-89af-6d5bec831fd2,wga_pair,COSM388847,True,Unknown
2,KPRP,448834,WUGSC,GRCh38,chr1,152760163,152760163,+,Missense_Mutation,SNP,...,PASS,TTGTGGCCCCC,6b6664ad-7c70-459d-a42f-2af30ffae87e,69b6d195-4ba2-48d8-bd2c-892f4b45867f,fde72cef-7c2a-4c7b-8187-a18460d92c90,8332806e-f547-4aae-89af-6d5bec831fd2,wga_pair,COSM423920,True,Unknown
3,SFT2D2,375035,WUGSC,GRCh38,chr1,168246650,168246650,+,3'UTR,SNP,...,PASS,TTCTGTTGAGT,6b6664ad-7c70-459d-a42f-2af30ffae87e,69b6d195-4ba2-48d8-bd2c-892f4b45867f,fde72cef-7c2a-4c7b-8187-a18460d92c90,8332806e-f547-4aae-89af-6d5bec831fd2,wga_pair,,True,Unknown
4,KIAA1407,57577,WUGSC,GRCh38,chr3,113965097,113965097,+,3'UTR,SNP,...,PASS,GGTGGAGATAA,6b6664ad-7c70-459d-a42f-2af30ffae87e,69b6d195-4ba2-48d8-bd2c-892f4b45867f,fde72cef-7c2a-4c7b-8187-a18460d92c90,8332806e-f547-4aae-89af-6d5bec831fd2,wga_pair,,True,Unknown
5,FNDC3B,64778,WUGSC,GRCh38,chr3,172133474,172133474,+,Missense_Mutation,SNP,...,PASS,AGGTTATTCTC,6b6664ad-7c70-459d-a42f-2af30ffae87e,69b6d195-4ba2-48d8-bd2c-892f4b45867f,fde72cef-7c2a-4c7b-8187-a18460d92c90,8332806e-f547-4aae-89af-6d5bec831fd2,wga_pair,COSM445966,True,Unknown


In [39]:
#IMPACT col is the VEP status, with HIGH and MODERATE displaying variants that affect proteins
#Create new MAF DF with these values extracted

MAF_VEP_protein = MAF[(MAF['IMPACT'] == 'HIGH') | (MAF['IMPACT'] == 'MODERATE')]

In [40]:
#Write protein coding MAF to csv ... path.split('/')[5] = Cancer type

MAF_VEP_protein.to_csv(path.split('/')[5]+'_MAF_protein_coding_full.csv')

In [41]:
### FPKM DFs ####

In [42]:
#First rename gene ensembl col
tumour_FPKM.rename(columns={'Unnamed: 0':'Gene_ensembl_id'},inplace=True)
normal_FPKM.rename(columns={'Unnamed: 0':'Gene_ensembl_id'},inplace=True)

In [43]:
#Check DFs 
tumour_FPKM.head()

Unnamed: 0,Gene_ensembl_id,Gene_symbol,TCGA-E2-A15G-01A-11R-A12D-07,TCGA-E2-A1B5-01A-21R-A12P-07,TCGA-EW-A2FS-01A-11R-A17B-07,TCGA-EW-A1P7-01A-21R-A144-07,TCGA-LL-A5YO-01A-21R-A28M-07,TCGA-BH-A1FN-01A-11R-A13Q-07,TCGA-A7-A3RF-01A-11R-A22K-07,TCGA-E9-A1NG-01A-21R-A14M-07,...,TCGA-AO-A03T-01A-21R-A034-07,TCGA-E9-A6HE-01A-11R-A31O-07,TCGA-A1-A0SG-01A-11R-A144-07,TCGA-A2-A259-01A-11R-A16F-07,TCGA-A7-A0DB-01A-11R-A00Z-07,TCGA-AN-A04A-01A-21R-A034-07,TCGA-C8-A134-01A-11R-A115-07,TCGA-BH-A1ES-01A-11R-A137-07,TCGA-A8-A09W-01A-11R-A00Z-07,TCGA-BH-A0B0-01A-21R-A115-07
0,ENSG00000000003,TSPAN6,7.866571,4.418394,7.154286,7.662819,5.059936,11.193642,0.89918,5.735883,...,3.539063,8.577926,11.319418,11.563954,17.923848,4.442201,8.910983,9.66036,6.759432,21.865617
1,ENSG00000000005,TNMD,0.175111,2.943996,0.383271,1.12914,0.065756,0.0,0.0,0.024517,...,0.259209,0.560924,0.042512,0.137827,0.688694,0.079306,0.158778,0.026014,0.169028,0.812712
2,ENSG00000000419,DPM1,27.364624,27.119087,22.629077,25.556876,38.373725,37.11268,16.96513,27.339538,...,30.397583,24.112057,24.823272,24.561668,25.140996,23.343134,75.062615,37.163885,30.30876,23.497849
3,ENSG00000000457,SCYL3,8.670967,4.985135,3.829031,3.213883,3.006991,6.354279,1.947814,3.377767,...,2.263571,2.463196,3.035415,3.70533,4.745113,3.149436,2.276358,6.291898,7.003735,6.49427
4,ENSG00000000460,C1orf112,2.558665,0.999935,1.227351,1.116023,1.428246,4.215916,0.4179,1.483986,...,0.638193,2.264492,1.221613,1.73296,2.295444,0.784594,3.798579,2.435636,4.498767,3.114424


In [44]:
#First filter for protein coding genes along gene_ensembl_id

tumour_FPKM_protein = tumour_FPKM[tumour_FPKM['Gene_ensembl_id'].isin(protein_coding_genes['ensembl_gene_id'])]
normal_FPKM_protein = normal_FPKM[normal_FPKM['Gene_ensembl_id'].isin(protein_coding_genes['ensembl_gene_id'])]

In [45]:
#check for duplicated genes - Each has two different Ensembl IDs 

tumour_FPKM_protein[tumour_FPKM_protein['Gene_symbol'].duplicated()]

Unnamed: 0,Gene_ensembl_id,Gene_symbol,TCGA-E2-A15G-01A-11R-A12D-07,TCGA-E2-A1B5-01A-21R-A12P-07,TCGA-EW-A2FS-01A-11R-A17B-07,TCGA-EW-A1P7-01A-21R-A144-07,TCGA-LL-A5YO-01A-21R-A28M-07,TCGA-BH-A1FN-01A-11R-A13Q-07,TCGA-A7-A3RF-01A-11R-A22K-07,TCGA-E9-A1NG-01A-21R-A14M-07,...,TCGA-AO-A03T-01A-21R-A034-07,TCGA-E9-A6HE-01A-11R-A31O-07,TCGA-A1-A0SG-01A-11R-A144-07,TCGA-A2-A259-01A-11R-A16F-07,TCGA-A7-A0DB-01A-11R-A00Z-07,TCGA-AN-A04A-01A-21R-A034-07,TCGA-C8-A134-01A-11R-A115-07,TCGA-BH-A1ES-01A-11R-A137-07,TCGA-A8-A09W-01A-11R-A00Z-07,TCGA-BH-A0B0-01A-21R-A115-07
44529,ENSG00000258724,PINX1,0.012371,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.008439,0.0,0.0,0.006076,0.0,0.0,0.0
49222,ENSG00000267281,ATF7,0.999692,0.241846,0.730434,0.554871,0.442755,0.385124,0.387155,0.249453,...,0.452491,0.361057,0.159026,0.27497,0.424316,0.126574,0.439518,0.217979,0.072261,0.486419
50079,ENSG00000269226,TMSB15B,0.021255,0.012145,0.07427,0.025006,0.043232,0.073575,0.004792,0.017194,...,0.051126,0.011133,0.02795,0.024164,0.010654,0.069521,0.111351,0.013683,0.0,0.01733
56468,ENSG00000280987,MATR3,0.179635,0.187556,0.222547,0.175797,0.147481,0.47824,0.136963,0.101059,...,0.130938,0.130875,0.07731,0.202741,0.127696,0.09401,0.162016,0.157692,0.36593,0.095866


In [46]:
#For ATF7, ENSG00000170653 = AFT7, whereas ENSG00000267281 = ATF7-NPFF

display(tumour_FPKM_protein[tumour_FPKM_protein['Gene_symbol'] == 'ATF7'])


Unnamed: 0,Gene_ensembl_id,Gene_symbol,TCGA-E2-A15G-01A-11R-A12D-07,TCGA-E2-A1B5-01A-21R-A12P-07,TCGA-EW-A2FS-01A-11R-A17B-07,TCGA-EW-A1P7-01A-21R-A144-07,TCGA-LL-A5YO-01A-21R-A28M-07,TCGA-BH-A1FN-01A-11R-A13Q-07,TCGA-A7-A3RF-01A-11R-A22K-07,TCGA-E9-A1NG-01A-21R-A14M-07,...,TCGA-AO-A03T-01A-21R-A034-07,TCGA-E9-A6HE-01A-11R-A31O-07,TCGA-A1-A0SG-01A-11R-A144-07,TCGA-A2-A259-01A-11R-A16F-07,TCGA-A7-A0DB-01A-11R-A00Z-07,TCGA-AN-A04A-01A-21R-A034-07,TCGA-C8-A134-01A-11R-A115-07,TCGA-BH-A1ES-01A-11R-A137-07,TCGA-A8-A09W-01A-11R-A00Z-07,TCGA-BH-A0B0-01A-21R-A115-07
12871,ENSG00000170653,ATF7,10.693964,6.363569,6.420141,7.273099,3.263588,7.055567,6.124648,7.770795,...,4.414692,3.830657,6.368449,5.183244,6.273052,7.790345,5.372403,9.494554,5.227363,8.302556
49222,ENSG00000267281,ATF7,0.999692,0.241846,0.730434,0.554871,0.442755,0.385124,0.387155,0.249453,...,0.452491,0.361057,0.159026,0.27497,0.424316,0.126574,0.439518,0.217979,0.072261,0.486419


In [47]:
# => rename Gene_symbol for ENSG00000267281 as ATF7-NPFF, based off of index location

index_val = tumour_FPKM_protein[tumour_FPKM_protein['Gene_ensembl_id'] == 'ENSG00000267281'].index

tumour_FPKM_protein.loc[index_val,'Gene_symbol'] = 'ATF7-NPFF'

#Display value to check its been renamed successfully
display(tumour_FPKM_protein[tumour_FPKM_protein['Gene_symbol'] == 'ATF7-NPFF'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Unnamed: 0,Gene_ensembl_id,Gene_symbol,TCGA-E2-A15G-01A-11R-A12D-07,TCGA-E2-A1B5-01A-21R-A12P-07,TCGA-EW-A2FS-01A-11R-A17B-07,TCGA-EW-A1P7-01A-21R-A144-07,TCGA-LL-A5YO-01A-21R-A28M-07,TCGA-BH-A1FN-01A-11R-A13Q-07,TCGA-A7-A3RF-01A-11R-A22K-07,TCGA-E9-A1NG-01A-21R-A14M-07,...,TCGA-AO-A03T-01A-21R-A034-07,TCGA-E9-A6HE-01A-11R-A31O-07,TCGA-A1-A0SG-01A-11R-A144-07,TCGA-A2-A259-01A-11R-A16F-07,TCGA-A7-A0DB-01A-11R-A00Z-07,TCGA-AN-A04A-01A-21R-A034-07,TCGA-C8-A134-01A-11R-A115-07,TCGA-BH-A1ES-01A-11R-A137-07,TCGA-A8-A09W-01A-11R-A00Z-07,TCGA-BH-A0B0-01A-21R-A115-07
49222,ENSG00000267281,ATF7-NPFF,0.999692,0.241846,0.730434,0.554871,0.442755,0.385124,0.387155,0.249453,...,0.452491,0.361057,0.159026,0.27497,0.424316,0.126574,0.439518,0.217979,0.072261,0.486419


In [48]:
#Now set index to both Gene_symbol and Gene_ensembl_id, followed by transposing table

tumour_FPKM_protein_T = tumour_FPKM_protein.set_index(['Gene_symbol','Gene_ensembl_id']).transpose()
normal_FPKM_protein_T = normal_FPKM_protein.set_index(['Gene_symbol','Gene_ensembl_id']).transpose()

In [49]:
tumour_FPKM_protein_T

Gene_symbol,TSPAN6,TNMD,DPM1,SCYL3,C1orf112,FGR,CFH,FUCA2,GCLC,NFYA,...,RPS4Y2,MATR3,AC005154.5,TMEM272,AC120114.4,BLACAT1,AC006978.2,AL365214.3,AL512506.3,GIMAP1-GIMAP5
Gene_ensembl_id,ENSG00000000003,ENSG00000000005,ENSG00000000419,ENSG00000000457,ENSG00000000460,ENSG00000000938,ENSG00000000971,ENSG00000001036,ENSG00000001084,ENSG00000001167,...,ENSG00000280969,ENSG00000280987,ENSG00000281039,ENSG00000281106,ENSG00000281348,ENSG00000281406,ENSG00000281593,ENSG00000281613,ENSG00000281883,ENSG00000281887
TCGA-E2-A15G-01A-11R-A12D-07,7.866571,0.175111,27.364624,8.670967,2.558665,1.928970,2.215282,14.838384,10.524397,19.052122,...,0.0,0.179635,0.031567,0.083592,0.390052,0.000000,0.325629,0.0,0.000000,0.000000
TCGA-E2-A1B5-01A-21R-A12P-07,4.418394,2.943996,27.119087,4.985135,0.999935,4.456954,8.149327,28.237424,10.469930,9.486448,...,0.0,0.187556,0.054112,0.107469,0.267447,0.074499,0.139546,0.0,0.000000,0.000000
TCGA-EW-A2FS-01A-11R-A17B-07,7.154286,0.383271,22.629077,3.829031,1.227351,2.823288,7.049678,28.651534,5.426292,19.119003,...,0.0,0.222547,0.023637,0.027384,0.262855,0.092203,0.658321,0.0,0.054370,0.000000
TCGA-EW-A1P7-01A-21R-A144-07,7.662819,1.129140,25.556876,3.213883,1.116023,7.513366,13.877968,19.519627,3.414307,13.768242,...,0.0,0.175797,0.083563,0.055320,0.068834,0.811712,0.086198,0.0,0.042714,0.000000
TCGA-LL-A5YO-01A-21R-A28M-07,5.059936,0.065756,38.373725,3.006991,1.428246,10.452676,6.648409,17.796223,4.306384,19.987341,...,0.0,0.147481,0.038525,0.038256,0.000000,0.070720,0.000000,0.0,0.073847,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TCGA-AN-A04A-01A-21R-A034-07,4.442201,0.079306,23.343134,3.149436,0.784594,0.882086,7.064705,21.917805,1.852359,6.440126,...,0.0,0.094010,0.000000,0.003418,0.051032,0.099507,0.042603,0.0,0.000000,0.000000
TCGA-C8-A134-01A-11R-A115-07,8.910983,0.158778,75.062615,2.276358,3.798579,3.820268,5.251800,23.068440,5.255835,25.897565,...,0.0,0.162016,0.015504,0.051320,0.134100,1.878398,0.095958,0.0,0.029719,0.000000
TCGA-BH-A1ES-01A-11R-A137-07,9.660360,0.026014,37.163885,6.291898,2.435636,0.723370,5.496997,12.611419,5.651720,18.595177,...,0.0,0.157692,0.000000,0.020180,0.150659,0.111912,0.062888,0.0,0.023372,0.000000
TCGA-A8-A09W-01A-11R-A00Z-07,6.759432,0.169028,30.308760,7.003735,4.498767,1.712172,8.021087,39.941200,5.069134,15.821901,...,0.0,0.365930,0.000000,0.014048,0.069922,0.000000,0.029186,0.0,0.021694,0.000000


In [50]:
#Drop the other three duplicated genes (only for time being, may add back in later)

duplicated_genes_drop = ['PINX1','TMSB15B','MATR3']

tumour_FPKM_protein_T.drop(duplicated_genes_drop,axis=1,inplace=True,level=0)
normal_FPKM_protein_T.drop(duplicated_genes_drop,axis=1,inplace=True,level=0)

In [51]:
#Add 'Cancer' column to DF, with value = 1 for tumour, value = 0 for normal

tumour_FPKM_protein_T['Cancer','Cancer'] = 1
normal_FPKM_protein_T['Cancer','Cancer'] = 0

In [52]:
tumour_FPKM_protein_T

Gene_symbol,TSPAN6,TNMD,DPM1,SCYL3,C1orf112,FGR,CFH,FUCA2,GCLC,NFYA,...,RPS4Y2,AC005154.5,TMEM272,AC120114.4,BLACAT1,AC006978.2,AL365214.3,AL512506.3,GIMAP1-GIMAP5,Cancer
Gene_ensembl_id,ENSG00000000003,ENSG00000000005,ENSG00000000419,ENSG00000000457,ENSG00000000460,ENSG00000000938,ENSG00000000971,ENSG00000001036,ENSG00000001084,ENSG00000001167,...,ENSG00000280969,ENSG00000281039,ENSG00000281106,ENSG00000281348,ENSG00000281406,ENSG00000281593,ENSG00000281613,ENSG00000281883,ENSG00000281887,Cancer
TCGA-E2-A15G-01A-11R-A12D-07,7.866571,0.175111,27.364624,8.670967,2.558665,1.928970,2.215282,14.838384,10.524397,19.052122,...,0.0,0.031567,0.083592,0.390052,0.000000,0.325629,0.0,0.000000,0.000000,1
TCGA-E2-A1B5-01A-21R-A12P-07,4.418394,2.943996,27.119087,4.985135,0.999935,4.456954,8.149327,28.237424,10.469930,9.486448,...,0.0,0.054112,0.107469,0.267447,0.074499,0.139546,0.0,0.000000,0.000000,1
TCGA-EW-A2FS-01A-11R-A17B-07,7.154286,0.383271,22.629077,3.829031,1.227351,2.823288,7.049678,28.651534,5.426292,19.119003,...,0.0,0.023637,0.027384,0.262855,0.092203,0.658321,0.0,0.054370,0.000000,1
TCGA-EW-A1P7-01A-21R-A144-07,7.662819,1.129140,25.556876,3.213883,1.116023,7.513366,13.877968,19.519627,3.414307,13.768242,...,0.0,0.083563,0.055320,0.068834,0.811712,0.086198,0.0,0.042714,0.000000,1
TCGA-LL-A5YO-01A-21R-A28M-07,5.059936,0.065756,38.373725,3.006991,1.428246,10.452676,6.648409,17.796223,4.306384,19.987341,...,0.0,0.038525,0.038256,0.000000,0.070720,0.000000,0.0,0.073847,0.000000,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TCGA-AN-A04A-01A-21R-A034-07,4.442201,0.079306,23.343134,3.149436,0.784594,0.882086,7.064705,21.917805,1.852359,6.440126,...,0.0,0.000000,0.003418,0.051032,0.099507,0.042603,0.0,0.000000,0.000000,1
TCGA-C8-A134-01A-11R-A115-07,8.910983,0.158778,75.062615,2.276358,3.798579,3.820268,5.251800,23.068440,5.255835,25.897565,...,0.0,0.015504,0.051320,0.134100,1.878398,0.095958,0.0,0.029719,0.000000,1
TCGA-BH-A1ES-01A-11R-A137-07,9.660360,0.026014,37.163885,6.291898,2.435636,0.723370,5.496997,12.611419,5.651720,18.595177,...,0.0,0.000000,0.020180,0.150659,0.111912,0.062888,0.0,0.023372,0.000000,1
TCGA-A8-A09W-01A-11R-A00Z-07,6.759432,0.169028,30.308760,7.003735,4.498767,1.712172,8.021087,39.941200,5.069134,15.821901,...,0.0,0.000000,0.014048,0.069922,0.000000,0.029186,0.0,0.021694,0.000000,1


In [53]:
#Concat tumour FPKM and normmal FPKM

FPKM_protein = pd.concat([tumour_FPKM_protein_T,normal_FPKM_protein_T])

In [54]:
#Rename index to 'Patient_ID'
FPKM_protein.index.rename('Sample_ID',inplace=True)

In [55]:
FPKM_protein

Gene_symbol,A1BG,A1CF,A2M,A2ML1,A3GALT2,A4GALT,A4GNT,AAAS,AACS,AADAC,...,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11A,ZYG11B,ZYX,ZZEF1
Gene_ensembl_id,ENSG00000121410,ENSG00000148584,ENSG00000175899,ENSG00000166535,ENSG00000184389,ENSG00000128274,ENSG00000118017,ENSG00000094914,ENSG00000081760,ENSG00000114771,...,ENSG00000086827,ENSG00000174442,ENSG00000122952,ENSG00000198205,ENSG00000198455,ENSG00000070476,ENSG00000203995,ENSG00000162378,ENSG00000159840,ENSG00000074755
Sample_ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
TCGA-E2-A15G-01A-11R-A12D-07,0.200303,0.004517,62.037218,0.026726,0.084797,8.820653,0.012246,17.064390,4.003460,0.000000,...,9.270557,3.850772,12.056367,2.546744,5.460352,6.906665,1.025668,6.338343,35.457595,6.086200
TCGA-E2-A1B5-01A-21R-A12P-07,0.078879,0.001936,122.033436,0.010181,0.000000,7.768512,0.083964,8.840108,2.254441,0.089149,...,4.957518,3.323456,7.535085,1.080096,2.954966,2.809487,2.071007,4.775001,40.303351,2.810731
TCGA-EW-A2FS-01A-11R-A17B-07,0.113500,0.005073,95.119330,0.042247,0.095241,3.354760,0.009169,13.570087,1.781865,0.068148,...,3.028452,3.054153,13.206652,0.718556,2.435512,5.147258,0.871777,4.124022,49.438603,2.197803
TCGA-EW-A1P7-01A-21R-A144-07,0.143305,0.000000,201.961591,14.542587,0.000000,16.977221,0.054026,7.362115,2.425583,0.022945,...,11.589017,2.738781,5.150129,1.809843,3.896019,6.793951,1.043631,5.543776,81.649278,4.979830
TCGA-LL-A5YO-01A-21R-A28M-07,0.297307,0.016537,98.326382,1.909908,0.025872,4.685443,0.000000,9.971461,4.634258,0.047602,...,6.837874,5.136185,31.486492,1.042460,2.905342,3.914095,2.300490,4.636067,99.992003,3.883858
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TCGA-E9-A1N5-11A-41R-A14D-07,0.123577,0.000000,218.502445,0.112979,0.096784,3.891323,0.149083,9.849961,6.297582,0.019786,...,10.611371,4.560166,14.725212,1.765314,3.393286,5.751026,0.344517,8.554784,68.433139,5.730586
TCGA-BH-A1FG-11B-12R-A13Q-07,0.041596,0.000000,159.379363,0.098113,0.016289,1.619335,0.000000,4.526233,1.064364,0.000000,...,9.439939,2.975958,2.165248,2.337643,7.132933,2.126796,0.429540,9.423160,48.506383,1.833404
TCGA-BH-A1EO-11A-31R-A137-07,0.007304,0.000000,318.847446,0.024039,0.009534,14.061376,0.110143,6.003210,1.774402,17.886628,...,6.842202,1.699611,2.079415,1.152451,2.119747,2.327924,0.016622,6.492188,53.802001,2.881474
TCGA-BH-A0AY-11A-23R-A089-07,0.075899,0.008443,295.401390,0.424663,0.039629,14.567147,0.011446,7.793528,3.039119,1.020793,...,8.217763,2.675360,2.736198,1.803752,3.421900,5.143964,0.643424,7.798448,61.729866,4.497501


In [56]:
#Save the full FPKM to csv
FPKM_protein.to_csv(path.split('/')[5]+'_FPKM_protein_all.csv',sep='\t')

In [57]:
#Write normal and tumour FPKM files to csv

tumour_FPKM_protein_T.to_csv(path.split('/')[5]+'_FPKM_tumour.csv',sep='\t')
normal_FPKM_protein_T.to_csv(path.split('/')[5]+'_FPKM_normal.csv',sep='\t')