<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Proteomics-data-to-SmartTable" data-toc-modified-id="Proteomics-data-to-SmartTable-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Proteomics data to SmartTable</a></span><ul class="toc-item"><li><span><a href="#Global-Protein-Stats" data-toc-modified-id="Global-Protein-Stats-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Global Protein Stats</a></span></li><li><span><a href="#Comparison-to-JM113" data-toc-modified-id="Comparison-to-JM113-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Comparison to JM113</a></span></li><li><span><a href="#DATA" data-toc-modified-id="DATA-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>DATA</a></span></li></ul></li></ul></div>

# Proteomics data to SmartTable

In [10]:
import os, sys, pandas as pd, numpy as np
def flatten_headers( level_sep, *headers):
    return [level_sep.join(levels).strip() 
           for levels in zip(*headers)]

def excel_sheet_to_smart_table( infile, sheet_name, headers, level_sep, index_col  ):
    df = pd.read_excel(infile, sheet_name, header=headers)
    levels = []
    for header in headers:
        levels.append([col if 'Unnamed' not in col else '' for col in df.columns.get_level_values(header)])
    df.columns = flatten_headers(level_sep, *levels )
    df[index_col] = df[index_col].str.split('|').str.get(-1)
    df = df.set_index(index_col)
    df.to_csv('{}.tab'.format(sheet_name.replace(' ', '_')), sep='\t')
    return df
    
proteomics_dir = os.path.join('TestTeamData/Proteomics/')
protein_stats_f = os.path.join(proteomics_dir, 'P_putida_Batch6408_ProteinStats.xlsx')


## Global Protein Stats

In [43]:
global_stats = excel_sheet_to_smart_table(protein_stats_f, 
                                 sheet_name='Global_stats', 
                                 headers=[0,1],
                                 level_sep=' ',
                                 index_col='Protein_short')
global_stats

Unnamed: 0_level_0,Protein,#Peptides,#Redundancy,P-value STRAIN,G-value STRAIN,Log2(Mean) - Strain JM113,Log2(Mean) - Strain JM164,Log2(Mean) - Strain JM165,Log2(Mean) - Strain JM166,Counts - Strain JM113,Counts - Strain JM164,Counts - Strain JM165,Counts - Strain JM166
Protein_short,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
ter,ABF_000342|ter,60,60,0.000209,1.000000,37.675352,39.499589,37.760468,35.865446,3,3,3,3
paaF,ABF_000344|paaF,14,0,0.111115,0.000840,34.401265,,34.956284,,3,0,3,0
paaH,ABF_000345|paaH,42,42,0.025342,1.000000,37.197808,37.683606,35.842100,37.694280,3,3,3,3
acot8,ABF_000347|acot8,7,6,0.008718,0.118733,31.256606,33.237223,28.799118,29.709569,3,3,1,2
ABF_006577,ABF_006577,14,0,0.111115,0.000840,34.401265,,34.956284,,3,0,3,0
ABF_006578,ABF_006578,29,29,0.006702,0.118733,28.864795,29.667421,37.673757,35.182028,1,2,3,3
aroG-D146N,aroG-D146N,63,61,0.233555,1.000000,36.534121,36.487846,36.399651,36.565304,3,3,3,3
asbF,asbF,21,21,0.008140,1.000000,37.513894,37.573793,37.029588,37.972384,3,3,3,3
G18UU-17326,gnl|A0A140FVX0|A0A140FVX0_PSEPK|G18UU-17326,6,6,0.058740,1.000000,32.117833,32.861165,32.406601,32.825128,3,3,3,3
G18UU-17384,gnl|A0A140FVX4|A0A140FVX4_PSEPK|G18UU-17384,11,11,0.534954,1.000000,30.141992,29.981609,30.206626,30.781432,3,3,3,3


## Comparison to JM113

In [44]:

comparison_to_JM113 = excel_sheet_to_smart_table(protein_stats_f, 
                                        sheet_name='Comparison_to_JM113', 
                                        headers=[0,1],
                                        sep=' ',
                                        index_col='Protein_short')
comparison_to_JM113

Unnamed: 0_level_0,Protein,#Peptides,#Redundancy,P-value(JM113) JM164,P-value(JM113) JM165,P-value(JM113) JM166,G-value(JM113) JM164,G-value(JM113) JM165,G-value(JM113) JM166,Trend(JM113) JM164,Trend(JM113) JM165,Trend(JM113) JM166,Log2FC JM164/JM113,Log2FC JM165/JM113,Log2FC JM166/JM113,Counts JM113,Counts JM164,Counts JM165,Counts JM166
Protein_short,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
ter,ABF_000342|ter,60,60,0.006412,0.993794,0.006708,1.000000,1.000000,1.000000,1,0,-1,1.824237,0.085117,-1.809905,3,3,3,3
paaF,ABF_000344|paaF,14,0,,0.111119,,0.011778,1.000000,0.011778,-1,0,-1,,0.555019,,3,0,3,0
paaH,ABF_000345|paaH,42,42,0.698128,0.081679,0.685541,1.000000,1.000000,1.000000,0,0,0,0.485798,-1.355708,0.496472,3,3,3,3
acot8,ABF_000347|acot8,7,6,0.057137,0.095927,0.177192,1.000000,0.152016,0.622987,0,0,0,1.980617,-2.457489,-1.547037,3,3,1,2
ABF_006577,ABF_006577,14,0,,0.111119,,0.011778,1.000000,0.011778,-1,0,-1,,0.555019,,3,0,3,0
ABF_006578,ABF_006578,29,29,,,,1.000000,0.152016,0.152016,0,0,0,,,,1,2,3,3
aroG-D146N,aroG-D146N,63,61,0.878211,0.261499,0.955719,1.000000,1.000000,1.000000,0,0,0,-0.046275,-0.134470,0.031183,3,3,3,3
asbF,asbF,21,21,0.978237,0.084056,0.102663,1.000000,1.000000,1.000000,0,0,0,0.059899,-0.484306,0.458490,3,3,3,3
G18UU-17326,gnl|A0A140FVX0|A0A140FVX0_PSEPK|G18UU-17326,6,6,0.050472,0.571740,0.062057,1.000000,1.000000,1.000000,0,0,0,0.743333,0.288768,0.707295,3,3,3,3
G18UU-17384,gnl|A0A140FVX4|A0A140FVX4_PSEPK|G18UU-17384,11,11,0.982754,0.998790,0.551987,1.000000,1.000000,1.000000,0,0,0,-0.160383,0.064635,0.639440,3,3,3,3


## DATA

In [45]:
data = excel_sheet_to_smart_table(protein_stats_f,
                        'DATA',
                        headers=[0],
                        sep=' ',
                        index_col='Protein_short')
data

Unnamed: 0_level_0,Protein,#Peptides,#Redundancy,JM113_DP_R1,JM113_DP_R2,JM113_DP_R3,JM164_DP_R1,JM164_DP_R2,JM164_DP_R3,JM165_DP_R1,JM165_DP_R2,JM165_DP_R3,JM166_DP_R1,JM166_DP_R2,JM166_DP_R3
Protein_short,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
ter,ABF_000342|ter,60,60,37.837181,37.405531,37.783343,38.764589,39.255572,40.478606,37.534819,37.850700,37.895886,36.076590,36.156527,35.363221
paaF,ABF_000344|paaF,14,0,34.641466,34.303244,34.259085,,,,34.468436,35.210308,35.190107,,,
paaH,ABF_000345|paaH,42,42,38.325754,36.384222,36.883447,38.171300,37.893900,36.985617,35.608359,36.445505,35.472435,37.848358,37.597604,37.636878
acot8,ABF_000347|acot8,7,6,30.966924,31.758922,31.043974,32.196987,33.082004,34.432678,,,28.799118,29.786194,,29.632944
ABF_006577,ABF_006577,14,0,34.641466,34.303244,34.259085,,,,34.468436,35.210308,35.190107,,,
ABF_006578,ABF_006578,29,29,28.864795,,,32.120413,27.214428,,37.378831,38.151894,37.490546,35.309900,35.313618,34.922565
aroG-D146N,aroG-D146N,63,61,36.543769,36.563401,36.495194,36.563604,36.354461,36.545474,36.377772,36.381583,36.439597,36.424039,36.705038,36.566836
asbF,asbF,21,21,37.814327,37.078319,37.649035,37.762081,37.399350,37.559949,36.916134,36.938695,37.233934,37.887541,37.985111,38.044499
G18UU-17326,gnl|A0A140FVX0|A0A140FVX0_PSEPK|G18UU-17326,6,6,32.367688,31.987041,31.998769,32.940933,32.955381,32.687182,32.377193,32.117461,32.725149,32.300989,33.268652,32.905742
G18UU-17384,gnl|A0A140FVX4|A0A140FVX4_PSEPK|G18UU-17384,11,11,30.199058,29.873081,30.353836,30.677022,30.754517,28.513287,30.562697,30.099307,29.957874,30.750098,31.078473,30.515725
