In [1]:
import pandas as pd
import numpy as np

In [2]:
# read in your combined sheet
data = pd.read_excel('GSE85914_combined.xlsx', sheet_name = 'COMBINED', index_col = 2)

# no gene info
counts = data.drop(['Strand', 'Name', 'Product'], axis = 1).astype(float)

# the other pages have important gene length info
gene_locs = pd.read_excel('GSE85914_combined.xlsx', sheet_name = 'dapF', index_col = 6)
gene_locs = gene_locs[['Translation Start', 'Translation Stop']].astype(float)

In [3]:
length = abs(gene_locs['Translation Stop'] - gene_locs['Translation Start'])

In [4]:
length

Synonym
b0001              65.0
b0002            2462.0
b0003             932.0
b0004            1286.0
b0005             296.0
                  ...  
predicted RNA       NaN
predicted RNA       NaN
predicted RNA       NaN
predicted RNA       NaN
predicted RNA       NaN
Length: 4941, dtype: float64

In [5]:
# looks like there might be some rows in length that we don't want
# pull out only the rows that have counts
length = length.loc[counts.index]

In [6]:
# normalize to length (fpk = fragments per kilobase)
fpk = counts.div(length * 1e-3, axis = 0)

# Genes with 0 length will be infinity now, just remove them
fpk = fpk.dropna()

In [7]:
# get the tpm (tpm = transcripts per million)
tpm = fpk.div(fpk.sum(), axis = 1)*1e6

In [11]:
# take the log2
# add one so that the tpm = 0 values don't go to infinity
logtpm = np.log(tpm+1)
logtpm.values.max()

12.001743759951792

In [53]:
logtpm.to_excel('Project_8_logtpm_data.xlsx')

In [14]:
logtpm_centered = logtpm.sub(logtpm[['Expression WT','Expression WT 2','Expression WT 3','Expression WT 4','Expression WT 5']].mean(axis=1),axis=0)

In [12]:
logtpm

Unnamed: 0_level_0,Expression WT,Expression dapF_parent,Expression dapF_sup_1-1,Expression dapF_sup_3-1,Expression WT 2,Expression dgk_parent,Expression dgk_sup_1-1,Expression dgk_sup_2-1,Expression dgk_sup_3-1,Expression WT 3,...,Expression entC_sup_2-1,Expression entC_sup_3-1,Expression WT 4,Expression ppk,Expression ppk_sup_1-1,Expression ppk_sup_1-2,Expression WT 5,Expression zwf_parent,Expression zwf_sup_1-1,Expression zwf_sup_1-2
Synonym,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
b0001,7.664101,7.993314,8.171657,8.015867,7.664101,7.588291,7.344696,7.748507,7.547027,7.304203,...,7.589930,7.369447,7.940001,7.912327,7.751465,7.771181,7.664101,7.424499,7.646803,7.816572
b0002,5.649127,5.773986,5.324568,5.696436,5.649127,5.312779,4.874485,5.376997,5.054683,5.887201,...,5.465958,5.267674,5.031299,5.189210,5.087964,5.323012,5.649127,5.718759,5.516028,5.260602
b0003,6.134134,6.036031,5.234791,6.025107,6.134134,6.179012,5.494812,6.112140,5.837034,6.438606,...,6.182408,4.985530,5.541303,5.821785,5.693044,5.871812,6.134134,6.276768,5.899492,5.801494
b0004,4.995925,4.945054,4.104494,4.959639,4.995925,5.110412,4.451840,5.090181,4.734996,5.265926,...,5.006393,4.448783,4.537841,4.800322,4.656330,4.883172,4.995925,5.125652,4.754041,4.661021
b0005,3.261289,3.032238,2.991474,3.025626,3.261289,3.563147,4.065596,4.969315,3.388427,3.234353,...,3.900505,3.142412,3.679767,3.632981,3.951883,3.789138,3.261289,3.277185,3.797993,3.915951
b0006,3.414008,3.232035,3.298981,3.495155,3.414008,3.408614,3.520013,3.061228,3.640650,3.164503,...,2.901332,2.421607,3.486960,3.708664,3.746343,3.113631,3.414008,3.383944,3.280641,3.685674
b0007,2.495335,2.107844,2.107568,1.931994,2.495335,2.234997,2.648400,2.228275,2.583464,2.678825,...,2.460909,2.469114,2.433342,2.412647,2.447856,2.539122,2.495335,2.269224,2.270070,2.197362
b0008,6.679171,6.274270,5.991579,6.357442,6.679171,6.710157,6.726580,6.449787,6.772299,6.669373,...,6.867498,6.611172,6.894519,6.838608,6.958238,6.666194,6.679171,6.769794,6.605621,6.193238
b0009,3.889434,4.454440,4.489127,4.603865,3.889434,4.076670,4.461873,4.194884,4.005108,3.693931,...,4.035593,3.608675,3.845570,3.901713,4.227588,4.007501,3.889434,3.976969,4.125513,3.773401
b0010,2.244193,2.778247,3.202279,3.151534,2.244193,2.125915,2.547280,2.481808,2.395893,2.194106,...,2.440583,2.464792,2.206338,2.257519,2.494884,2.286880,2.244193,2.520199,2.517997,2.487108


In [15]:
logtpm_centered

Unnamed: 0_level_0,Expression WT,Expression dapF_parent,Expression dapF_sup_1-1,Expression dapF_sup_3-1,Expression WT 2,Expression dgk_parent,Expression dgk_sup_1-1,Expression dgk_sup_2-1,Expression dgk_sup_3-1,Expression WT 3,...,Expression entC_sup_2-1,Expression entC_sup_3-1,Expression WT 4,Expression ppk,Expression ppk_sup_1-1,Expression ppk_sup_1-2,Expression WT 5,Expression zwf_parent,Expression zwf_sup_1-1,Expression zwf_sup_1-2
Synonym,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
b0001,0.016799,0.346013,0.524356,0.368566,0.016799,-0.059010,-0.302605,0.101205,-0.100275,-0.343098,...,-0.057371,-0.277855,0.292700,0.265026,0.104164,0.123880,0.016799,-0.222802,-0.000498,0.169270
b0002,0.075951,0.200810,-0.248608,0.123260,0.075951,-0.260397,-0.698691,-0.196179,-0.518493,0.314025,...,-0.107218,-0.305502,-0.541877,-0.383966,-0.485212,-0.250164,0.075951,0.145583,-0.057148,-0.312574
b0003,0.057672,-0.040432,-0.841672,-0.051355,0.057672,0.102550,-0.581651,0.035678,-0.239428,0.362144,...,0.105945,-1.090933,-0.535159,-0.254678,-0.383418,-0.204650,0.057672,0.200306,-0.176971,-0.274968
b0004,0.037617,-0.013255,-0.853815,0.001330,0.037617,0.152103,-0.506468,0.131872,-0.223312,0.307617,...,0.048084,-0.509525,-0.420467,-0.157986,-0.301978,-0.075137,0.037617,0.167343,-0.204268,-0.297287
b0005,-0.078308,-0.307359,-0.348123,-0.313971,-0.078308,0.223549,0.725999,1.629717,0.048829,-0.105245,...,0.560908,-0.197185,0.340169,0.293383,0.612286,0.449541,-0.078308,-0.062413,0.458395,0.576354
b0006,0.035311,-0.146663,-0.079717,0.116457,0.035311,0.029917,0.141315,-0.317470,0.261952,-0.214194,...,-0.477366,-0.957091,0.108262,0.329967,0.367646,-0.265067,0.035311,0.005246,-0.098057,0.306976
b0007,-0.024300,-0.411790,-0.412067,-0.587640,-0.024300,-0.284637,0.128766,-0.291359,0.063830,0.159191,...,-0.058725,-0.050521,-0.086292,-0.106987,-0.071778,0.019488,-0.024300,-0.250410,-0.249564,-0.322272
b0008,-0.041110,-0.446011,-0.728702,-0.362839,-0.041110,-0.010124,0.006299,-0.270494,0.052018,-0.050908,...,0.147216,-0.109109,0.174238,0.118326,0.237957,-0.054088,-0.041110,0.049513,-0.114660,-0.527044
b0009,0.047874,0.612880,0.647566,0.762304,0.047874,0.235109,0.620313,0.353323,0.163547,-0.147630,...,0.194032,-0.232886,0.004009,0.060152,0.386027,0.165941,0.047874,0.135408,0.283953,-0.068159
b0010,0.017588,0.551643,0.975674,0.924930,0.017588,-0.100689,0.320676,0.255204,0.169288,-0.032499,...,0.213978,0.238187,-0.020266,0.030914,0.268279,0.060276,0.017588,0.293594,0.291392,0.260503


In [16]:
logtpm_centered.to_csv('logtpm_centered.csv')