In [2]:
base_table = "spectronaut_test_input.tsv"


names = ["reference", "channel4", "channel8"]

table_endings = ["channel0", "channel4", "channel8"]
tables = [base_table.replace(".tsv", f"{x}.tsv") for x in table_endings]

table2names = dict(zip(tables, names))


In [4]:
class TableInfo():
    def __init__(self, formatted_df, filename):
        self.formatted_df = formatted_df
        self.name = table2names.get(filename)


In [5]:
import pandas as pd
import directlfq.utils as lfq_utils

def merge_tables(tables, input_type):
    list_of_formatted_tables = get_list_of_aq_formatted_tables(tables, input_type)
    merged_df = iteratively_merge_tables(list_of_formatted_tables)    
    return merged_df

def get_list_of_aq_formatted_tables(tables, input_type):
    list_of_reformatted_tables = []
    for table_file in tables:
        reformatted_table = lfq_utils.import_data(table_file, input_type_to_use=input_type)
        reformatted_table = reformatted_table.set_index(["protein", "ion"])
        tableinfo = TableInfo(reformatted_table, table_file)
        list_of_reformatted_tables.append(tableinfo)
    return list_of_reformatted_tables

def iteratively_merge_tables(list_of_tableinfos):
    merged_tableinfo = list_of_tableinfos[0]
    merged_df = merged_tableinfo.formatted_df.merge(list_of_tableinfos[1].formatted_df, left_index=True, right_index=True, suffixes = ("_" + merged_tableinfo.name, "_" + list_of_tableinfos[1].name))
    for tableinfo in list_of_tableinfos[2:]:
        merged_df = merged_df.merge(tableinfo.formatted_df, left_index=True, right_index=True, suffixes = ("", "_" + tableinfo.name))
    merged_df = merged_df.reset_index()
    return merged_df


merged_df_precursor = merge_tables(tables, input_type = "spectronaut_precursor_v3")
merged_df_fragion = merge_tables(tables, input_type="spectronaut_fragion_isotopes")


using input type spectronaut_precursor_v3
using input type spectronaut_precursor_v3
using input type spectronaut_precursor_v3
using input type spectronaut_fragion_isotopes
using input type spectronaut_fragion_isotopes
using input type spectronaut_fragion_isotopes


In [6]:
display(merged_df_precursor)
display(merged_df_fragion)


Unnamed: 0,protein,ion,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_01_S5-A1_1_3970.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_02_S5-A2_1_3972.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_03_S5-A3_1_3974.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_04_S5-A4_1_3976.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_05_S5-A5_1_3978.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_01_S5-A6_1_3971.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_02_S5-A7_1_3973.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_03_S5-A8_1_3975.htrms_reference,...,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_01_S5-A1_1_3970.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_02_S5-A2_1_3972.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_03_S5-A3_1_3974.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_04_S5-A4_1_3976.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_05_S5-A5_1_3978.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_01_S5-A6_1_3971.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_02_S5-A7_1_3973.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_03_S5-A8_1_3975.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_04_S5-A9_1_3977.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_05_S5-A10_1_3979.htrms
0,A0AVT1,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,2286.740234,1405.371460,1085.474854,1017.423218,1030.347046,1470.744751,897.283508,893.246582,...,3684.078125,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,A0FGR8-2,SEQ_ALALLEDEER_MOD__[DimethNter0]ALALLEDEER__C...,426.730560,513.639648,567.912354,336.377502,465.916840,517.216919,342.002441,382.314178,...,9.454959,0.000000,8.381450,0.000000,19.710604,0.000000,0.000000,0.000000,0.000000,15.228233
2,A0FGR8-2,SEQ_AQPPEAGPQGLHDLGR_MOD__[DimethNter0]AQPPEAG...,333.435089,334.794525,329.714142,246.291016,257.380920,311.572784,191.556625,210.649536,...,708.308472,697.964111,436.841339,561.192688,537.010986,506.143005,401.856232,310.376953,460.975006,778.023315
3,A0MZ66;A0MZ66-3,SEQ_LTQQLEEER_MOD__[DimethNter0]LTQQLEEER__CHA...,183.873199,159.733276,133.874802,134.639267,150.518158,214.311432,129.136200,136.732132,...,8.063574,12.477638,3.168269,6.407328,2.213550,15.228143,8.852160,16.570559,1.000000,28.330698
4,A0MZ66;A0MZ66-3,SEQ_SLDPENSETELER_MOD__[DimethNter0]SLDPENSETE...,415.404449,532.641968,552.500183,370.406616,0.000000,374.900940,384.228577,383.424377,...,50.559669,119.071373,95.637672,0.000000,106.815147,77.611107,0.000000,70.594772,50.967033,81.565895
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,O00442;O00442-2,SEQ_QLNPINLTER_MOD__[DimethNter0]QLNPINLTER__C...,237.009888,216.567200,194.201553,174.680374,285.692657,180.972092,182.338211,191.153244,...,20.656593,10.876544,7.584823,0.000000,19.023218,12.513621,8.014167,1.000000,0.000000,20.006855
72,O00461,SEQ_EADPESEADR_MOD__[DimethNter0]EADPESEADR__C...,197.302536,314.107971,231.182526,151.646164,195.425293,223.070435,244.193710,229.652405,...,257.497253,0.000000,0.000000,184.611389,0.000000,309.165131,0.000000,0.000000,0.000000,0.000000
73,O00469;O00469-2,SEQ_IVGPEENLSQAEAR_MOD__[DimethNter0]IVGPEENLS...,203.338013,338.271210,0.000000,285.362335,0.000000,169.211899,0.000000,0.000000,...,56.087303,0.000000,0.000000,28.755648,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
74,O00469;O00469-2,SEQ_SEDYVDIVQGNR_MOD__[DimethNter0]SEDYVDIVQGN...,518.015503,653.323364,640.433350,468.780243,1122.676880,565.844727,454.170258,498.580505,...,75.779335,219.236588,248.220810,0.000000,0.000000,0.000000,0.000000,124.816360,221.186462,145.767319


Unnamed: 0,protein,ion,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_01_S5-A1_1_3970.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_02_S5-A2_1_3972.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_03_S5-A3_1_3974.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_04_S5-A4_1_3976.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_05_S5-A5_1_3978.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_01_S5-A6_1_3971.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_02_S5-A7_1_3973.htrms_reference,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_03_S5-A8_1_3975.htrms_reference,...,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_01_S5-A1_1_3970.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_02_S5-A2_1_3972.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_03_S5-A3_1_3974.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_04_S5-A4_1_3976.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_05_S5-A5_1_3978.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_01_S5-A6_1_3971.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_02_S5-A7_1_3973.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_03_S5-A8_1_3975.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_04_S5-A9_1_3977.htrms,20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_05_S5-A10_1_3979.htrms
0,A0AVT1,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,1004.868469,603.280090,445.194763,415.271332,245.877060,606.743103,238.688858,303.433228,...,896.399963,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000
1,A0AVT1,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,224.580536,174.074188,89.409363,49.097614,85.033630,164.441895,100.847687,87.084534,...,2733.390381,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000
2,A0AVT1,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,1497.709351,756.364929,544.862671,575.631836,416.449890,1239.316895,478.280884,445.614746,...,776.068848,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000
3,A0AVT1,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,13096.000000,11968.000000,12830.000000,13514.000000,10506.000000,14237.000000,15029.000000,12964.000000,...,7306.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000
4,A0AVT1,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,8806.000000,7736.000000,8164.000000,8659.000000,6360.000000,9388.000000,7854.000000,7313.000000,...,6076.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
676,O00469;O00469-2,SEQ_VLGQGEEWR_MOD__[DimethNter0]VLGQGEEWR__CHA...,180.673096,125.963722,62.861027,67.071159,147.609833,173.087799,48.575409,22.744345,...,0.000000,47.490856,0.0,0.0,0.0,0.0,23.666149,0.0,49.427193,5.302744
677,O00469;O00469-2,SEQ_VLGQGEEWR_MOD__[DimethNter0]VLGQGEEWR__CHA...,25318.000000,19857.000000,20694.000000,19568.000000,13152.000000,24730.000000,19503.000000,17176.000000,...,0.000000,5876.000000,0.0,0.0,0.0,0.0,7311.000000,0.0,6092.000000,7789.000000
678,O00469;O00469-2,SEQ_VLGQGEEWR_MOD__[DimethNter0]VLGQGEEWR__CHA...,15576.000000,14185.000000,15777.000000,14488.000000,9102.000000,16937.000000,11251.000000,12294.000000,...,0.000000,3507.000000,0.0,0.0,0.0,0.0,3456.000000,0.0,3037.000000,4213.000000
679,O00469;O00469-2,SEQ_VLGQGEEWR_MOD__[DimethNter0]VLGQGEEWR__CHA...,17484.000000,13168.000000,12748.000000,10853.000000,10420.000000,14693.000000,8366.000000,13704.000000,...,0.000000,5530.000000,0.0,0.0,0.0,0.0,4406.000000,0.0,3805.000000,6357.000000


In [7]:

merged_df_precursor.to_csv("merged_sn_precursors.tsv", sep="\t", index = None)
merged_df_fragion.to_csv("merged_sn_fragions.tsv", sep="\t", index = None)

## concatenate the replicates

In [8]:
import pandas as pd
merged_df_precursor = pd.read_csv("merged_sn_precursors.tsv", sep="\t")
merged_df_fragion = pd.read_csv("merged_sn_fragions.tsv", sep="\t")

In [9]:
display(merged_df_fragion.columns)

Index(['protein', 'ion',
       '20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_01_S5-A1_1_3970.htrms_reference',
       '20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_02_S5-A2_1_3972.htrms_reference',
       '20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_03_S5-A3_1_3974.htrms_reference',
       '20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_04_S5-A4_1_3976.htrms_reference',
       '20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c1_AID8_05_S5-A5_1_3978.htrms_reference',
       '20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_01_S5-A6_1_3971.htrms_reference',
       '20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_02_S5-A7_1_3973.htrms_reference',
       '20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_03_S5-A8_1_3975.htrms_reference',
       '20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_04_S5-A9_1_3977.htrms_reference',
       '20220730_TIMS06_MCT_SA_HeLa_whi40_M07_Ref0s4s8_c2_AID8_05_S5-A10_1_3979.htrms_reference',
    

In [10]:
def re_arrange_and_filter_dataframe(merged_df_input):
    re_arranged_merged_df = re_arrange_dataframe(merged_df_input)
    re_arranged_merged_df = add_precursor_column(re_arranged_merged_df)
    re_arranged_merged_df_filtered =  filter_out_ambigous_fragions(re_arranged_merged_df)
    ordered_headers = ["replicate", "protein", "precursor","ion", "reference", "target4", "target8"]
    re_arranged_merged_df_filtered = re_arranged_merged_df_filtered[ordered_headers]
    re_arranged_merged_df_filtered = re_arranged_merged_df_filtered.drop_duplicates()
    return re_arranged_merged_df_filtered

def re_arrange_dataframe(merged_df_input):
    list_of_sub_dfs = []
    experiment_columns = get_experiment_columns(merged_df_input)
    for experiment_column in experiment_columns:    
        channel_cols_of_experiment  = [x for x in merged_df_input.columns if x.startswith(experiment_column)]
        relevant_cols = ["protein", "ion"] + channel_cols_of_experiment
        replicate_id = experiment_column.split("_")[-1]
        sub_df = merged_df_input[relevant_cols]
        sub_df.loc[:,"replicate"] = replicate_id
        list_of_sub_dfs.append(sub_df)
        #replace string in column names with other string
        sub_df.columns = [x.replace(experiment_column, "") for x in sub_df.columns]
        list_of_sub_dfs.append(sub_df)

    re_arranged_df = pd.concat(list_of_sub_dfs, ignore_index=True)
    re_arranged_df = re_arranged_df.rename(columns = {".htrms_reference" : "reference",".htrms" : "target8",".htrms_channel4" : "target4"})

    return re_arranged_df

def get_experiment_columns(merged_df_input):
    columns = [x for x in merged_df_input.columns if x.startswith("202")]
    experiment_columns = list({x.split(".htrms")[0] for x in columns})
    experiment_columns.sort()
    return experiment_columns

def add_precursor_column(re_arranged_df_input):
    re_arranged_df_input.loc[:, "precursor"] = [parse_precursor_from_ion(x) for x in re_arranged_df_input["ion"]]
    return re_arranged_df_input

def parse_precursor_from_ion(ion):
    if "FRGION" in ion:
        return ion.split("FRGION")[0]
    elif "MS1ISOTOPES" in ion:
        return ion.split("MS1ISOTOPES")[0]
    else:
        return ion


def filter_out_ambigous_fragions(re_arranged_df_input):
    is_meaningful_ion = lambda x : check_if_ion_is_meaningful(x)
    re_arranged_df_input = re_arranged_df_input.loc[re_arranged_df_input.ion.apply(is_meaningful_ion)]
    return re_arranged_df_input

def check_if_ion_is_meaningful(ion):
    if "FRGION" not in ion and "MS1ISOTOPES" not in ion:
        return True
    if "MS1ISOTOPES" in ion:
        return True
    elif "K[DimethLys0]_" in ion: #if the end is labelled, all fragions are meaningful
        return True
    elif "FRGION_b" in ion:
        return True
    else:
        return False
    

In [11]:

re_arranged_df_precursor = re_arrange_and_filter_dataframe(merged_df_precursor)
re_arranged_df_fragion = re_arrange_and_filter_dataframe(merged_df_fragion)

display(re_arranged_df_precursor)
display(re_arranged_df_fragion)

print(re_arranged_df_precursor.iloc[1, 2])
print(re_arranged_df_fragion.iloc[1, 2])
print(re_arranged_df_fragion.iloc[5, 2])
print(re_arranged_df_fragion.iloc[19, 2])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


Unnamed: 0,replicate,protein,precursor,ion,reference,target4,target8
0,3970,A0AVT1,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,2286.740234,0.000000,3684.078125
1,3970,A0FGR8-2,SEQ_ALALLEDEER_MOD__[DimethNter0]ALALLEDEER__C...,SEQ_ALALLEDEER_MOD__[DimethNter0]ALALLEDEER__C...,426.730560,305.247131,9.454959
2,3970,A0FGR8-2,SEQ_AQPPEAGPQGLHDLGR_MOD__[DimethNter0]AQPPEAG...,SEQ_AQPPEAGPQGLHDLGR_MOD__[DimethNter0]AQPPEAG...,333.435089,510.888214,708.308472
3,3970,A0MZ66;A0MZ66-3,SEQ_LTQQLEEER_MOD__[DimethNter0]LTQQLEEER__CHA...,SEQ_LTQQLEEER_MOD__[DimethNter0]LTQQLEEER__CHA...,183.873199,41.861946,8.063574
4,3970,A0MZ66;A0MZ66-3,SEQ_SLDPENSETELER_MOD__[DimethNter0]SLDPENSETE...,SEQ_SLDPENSETELER_MOD__[DimethNter0]SLDPENSETE...,415.404449,0.000000,50.559669
...,...,...,...,...,...,...,...
1439,3979,O00442;O00442-2,SEQ_QLNPINLTER_MOD__[DimethNter0]QLNPINLTER__C...,SEQ_QLNPINLTER_MOD__[DimethNter0]QLNPINLTER__C...,192.209396,29.914993,20.006855
1440,3979,O00461,SEQ_EADPESEADR_MOD__[DimethNter0]EADPESEADR__C...,SEQ_EADPESEADR_MOD__[DimethNter0]EADPESEADR__C...,0.000000,0.000000,0.000000
1441,3979,O00469;O00469-2,SEQ_IVGPEENLSQAEAR_MOD__[DimethNter0]IVGPEENLS...,SEQ_IVGPEENLSQAEAR_MOD__[DimethNter0]IVGPEENLS...,198.013763,142.607895,0.000000
1442,3979,O00469;O00469-2,SEQ_SEDYVDIVQGNR_MOD__[DimethNter0]SEDYVDIVQGN...,SEQ_SEDYVDIVQGNR_MOD__[DimethNter0]SEDYVDIVQGN...,584.118286,136.258560,145.767319


Unnamed: 0,replicate,protein,precursor,ion,reference,target4,target8
0,3970,A0AVT1,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,1004.868469,0.000000,896.399963
1,3970,A0AVT1,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,224.580536,0.000000,2733.390381
2,3970,A0AVT1,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,1497.709351,0.000000,776.068848
3,3970,A0AVT1,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,13096.000000,0.000000,7306.000000
4,3970,A0AVT1,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARG...,8806.000000,0.000000,6076.000000
...,...,...,...,...,...,...,...
12934,3979,O00469;O00469-2,SEQ_VLGQGEEWR_MOD__[DimethNter0]VLGQGEEWR__CHA...,SEQ_VLGQGEEWR_MOD__[DimethNter0]VLGQGEEWR__CHA...,88.878075,142.715561,5.302744
12935,3979,O00469;O00469-2,SEQ_VLGQGEEWR_MOD__[DimethNter0]VLGQGEEWR__CHA...,SEQ_VLGQGEEWR_MOD__[DimethNter0]VLGQGEEWR__CHA...,22029.000000,10722.000000,7789.000000
12936,3979,O00469;O00469-2,SEQ_VLGQGEEWR_MOD__[DimethNter0]VLGQGEEWR__CHA...,SEQ_VLGQGEEWR_MOD__[DimethNter0]VLGQGEEWR__CHA...,13650.000000,12502.000000,4213.000000
12937,3979,O00469;O00469-2,SEQ_VLGQGEEWR_MOD__[DimethNter0]VLGQGEEWR__CHA...,SEQ_VLGQGEEWR_MOD__[DimethNter0]VLGQGEEWR__CHA...,12860.000000,6305.000000,6357.000000


SEQ_ALALLEDEER_MOD__[DimethNter0]ALALLEDEER__CHARGE_2_
SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARGE_2_
SEQ_YYFSHDTD_MOD__[DimethNter0]YYFSHDTD__CHARGE_2_
SEQ_AQPPEAGPQGLHDLGR_MOD__[DimethNter0]AQPPEAGPQGLHDLGR__CHARGE_3_


In [12]:
re_arranged_df_precursor.to_csv("re_arranged_merged_sn_precursors.tsv", sep="\t", index = None)
re_arranged_df_fragion.to_csv("re_arranged_merged_sn_fragions.tsv", sep="\t", index = None)



In [13]:
concatted_precursor_fragion = pd.concat([re_arranged_df_precursor, re_arranged_df_fragion], ignore_index=True)
concatted_precursor_fragion.to_csv("re_arranged_merged_sn_concatted_precursor_fragion.tsv", sep="\t", index = None)

In [12]:
import pandas as pd
import numpy as np

input_file = "../Spectronaut/re_arranged_merged_sn_spikein_concatted_precursor_fragion.tsv"
input_df = pd.read_csv(input_file, sep="\t")
input_df = input_df.set_index("precursor")
precursor1 = input_df.index.unique()[3]
precursor1_df = input_df.loc[precursor1]
print(len(precursor1_df.index))
precursor1_df = precursor1_df[[ 3970 == x for x in precursor1_df["replicate"].values]]
precursor1_df = precursor1_df.replace(0, np.nan)
display(precursor1_df)
#get names of numeric columns present in the dataframe
numeric_cols = precursor1_df.select_dtypes(include=[np.number]).columns
#remove element from numeric_cols
numeric_cols = numeric_cols.drop("replicate")

#convert the numeric columns to log2
precursor1_df[numeric_cols] = np.log2(precursor1_df[numeric_cols])

display(precursor1_df)
precursor1_df.to_csv("../Spectronaut/test_df_w_one_precursor.tsv", sep="\t")


FileNotFoundError: [Errno 2] No such file or directory: '../Spectronaut/re_arranged_merged_sn_spikein_concatted_precursor_fragion.tsv'

In [3]:
input_file = "../Spectronaut/re_arranged_merged_sn_spikein_concatted_precursor_fragion.tsv"
input_df = pd.read_csv(input_file, sep="\t")
input_df = input_df.set_index("precursor")
precursor1 = input_df.index.unique()[:10]
precursor1_df = input_df.loc[precursor1]

display(precursor1_df)
precursor1_df.to_csv("../Spectronaut/test_df_w_many_precursors.tsv", sep="\t")


NameError: name 'pd' is not defined