In [1]:
import re
import time

# skip if neutral peptide_mass > MZ_MAX(3000.0)
MZ_MAX = 3000
# skip if length > MAX_LEN
MAX_LEN = 30

def inspect_mgf_file(fpath):
    start_line = "BEGIN IONS"
    end_line = "END IONS"
    spectra_location = []
    with open(fpath, mode="r") as f:
        line = True
        while line:
            _location = f.tell()
            line = f.readline()
            if start_line in line:
                spectra_location.append(_location)
                while end_line not in line:
                    line = f.readline()
    return spectra_location

start_time = time.time()
spectra_location = inspect_mgf_file('training_datasets_raw_data/Kaiko/Biodiversity_A_cryptum_FeTSB_anaerobic_1_01Jun16_Pippin_16-03-39.mgf')
end_time = time.time()
print('Num:{0}, time:{1}'.format(len(spectra_location), end_time-start_time))

Num:30296, time:1.9562349319458008


In [2]:
from pyteomics import mzml,auxiliary

import gzip

def inspect_mzML_file(fpath):
    spectra = []
    with gzip.open(fpath, 'rb') as f:
        for obj in mzml.read(f):
            spectra.append(obj)
    return spectra

start_time = time.time()

spectra = inspect_mzML_file('training_datasets_raw_data/Kaiko/Biodiversity_A_cryptum_FeTSB_anaerobic_1_01Jun16_Pippin_16-03-39.mzML.gz')
end_time = time.time()
print('Num:{0}, time:{1}'.format(len(spectra), end_time-start_time))

Num:36331, time:29.759533166885376


In [3]:
spectra[1003]['ms level']

1

In [5]:
import pandas as pd
import sys
def get_annotated_pepseq(fpath, q_value_threshold=0.001):
    # read sequences
    seqs = pd.read_csv(fpath, sep='\t')
    print('ori:', seqs.shape[0])
    seqs = seqs[seqs.QValue<=q_value_threshold]  # filter by q_values
    print('after filtering with q<={0}:{1}'.format(q_value_threshold, seqs.shape[0]))
    seqs = seqs[~seqs.Protein.str.startswith('XXX_')]  # filter out the decoys
    print('after filtering out decoys:{0}'.format(seqs.shape[0]))
    
    # extract AAAAAAAAAAAAAAA in X.AAAAAAAAAAAAAAA.X
    temp = pd.DataFrame(seqs.Peptide.str.split('.').tolist(), columns=['s','pepseq','e'])
    seqs['pepseq'] = list(temp.pepseq)
    
    # filter out the sequences containing unregistered letters
    seqs = seqs[~seqs.pepseq.str.contains('[\\?XBZJ\\+-]')]
    print('after filtering unregistered modifications:{0}'.format(seqs.shape[0]))
    
    scan_ids = list(seqs.Scan)
    n_scans = len(scan_ids)
    print('number of instances from an annotation file:', n_scans)
    sys.stdout.flush()
    return seqs
annotated = get_annotated_pepseq('training_datasets_raw_data/Kaiko/Biodiversity_A_cryptum_FeTSB_anaerobic_1_01Jun16_Pippin_16-03-39_msgfdb_fht.txt')

ori: 29447
after filtering with q<=0.001:7324
after filtering out decoys:7317
after filtering unregistered modifications:7317
number of instances from an annotation file: 7317


In [6]:
annotated.head()

Unnamed: 0,ResultID,Scan,FragMethod,SpecIndex,Charge,PrecursorMZ,DelM,DelM_PPM,MH,Peptide,...,NTT,DeNovoScore,MSGFScore,MSGFDB_SpecEValue,Rank_MSGFDB_SpecEValue,EValue,QValue,PepQValue,IsotopeError,pepseq
0,1,31013,HCD,1,4,1110.3257,-0.01506,-3.39471,4437.292079,R.EASLPSLSEITITKEYDVASGDLLKEALSGNVQEVDIVFTR.T,...,2,302,283,7.47621e-41,1,1.729275e-34,0.0,0.0,1,EASLPSLSEITITKEYDVASGDLLKEALSGNVQEVDIVFTR
1,2,28498,HCD,2,5,1049.7239,-0.017557,-3.349563,5242.600824,K.SGIVWNASTLDKFLANPQADVPGTKMPYM*GMANATDRADVVAY...,...,2,268,220,4.4346620000000004e-39,1,1.0276370000000001e-32,0.0,0.0,2,SGIVWNASTLDKFLANPQADVPGTKMPYM*GMANATDRADVVAYLQ...
2,3,29226,HCD,3,4,982.52234,0.037197,9.479435,3925.023426,M.PESQDNQQLLQLTAQIVSAHVSHNSVSAEMLPALIR.D,...,1,294,283,9.543878e-39,1,2.204682e-32,0.0,0.0,2,PESQDNQQLLQLTAQIVSAHVSHNSVSAEMLPALIR
3,4,34878,HCD,4,4,944.28076,-0.006215,-1.647994,3772.100545,R.NLIGIDVLPVVGANVYDILQHDTLAITAAGLEGLKR.R,...,2,290,279,3.7934669999999995e-38,1,8.763096e-32,0.0,0.0,2,NLIGIDVLPVVGANVYDILQHDTLAITAAGLEGLKR
4,5,33699,HCD,5,4,911.7534,-0.000362,-0.099315,3641.985448,R.KAMLEDIAILTGGQVISEDLGIKLETVTLNMLGR.A,...,2,273,268,1.834118e-37,1,4.234549e-31,0.0,0.0,2,KAMLEDIAILTGGQVISEDLGIKLETVTLNMLGR


In [7]:
for spectrum in spectra:
    if 'scan=2031' in spectrum['id']:
        print(spectrum)
        print(spectrum['m/z array'])
        print(spectrum['intensity array'])
        print(spectrum['scanList']['scan'][0]['scan start time'])
        selectedIon = spectrum['precursorList']['precursor'][0]['selectedIonList']['selectedIon'][0]
        print(selectedIon['selected ion m/z'])
        print(selectedIon['charge state'])
        print(selectedIon['peak intensity'])
        break

{'index': 2030, 'id': 'controllerType=0 controllerNumber=1 scan=2031', 'defaultArrayLength': 60, 'MSn spectrum': '', 'ms level': 2, 'positive scan': '', 'centroid spectrum': '', 'base peak m/z': 129.10241306433727, 'base peak intensity': 12147.271, 'total ion current': 95457.688, 'lowest observed m/z': 119.02627673103923, 'highest observed m/z': 1198.9550439485904, 'scanList': {'count': 1, 'no combination': '', 'scan': [{'scan start time': 7.8527942, 'filter string': 'FTMS + c NSI d Full ms2 478.27@hcd30.00 [100.00-1975.00]', 'preset scan configuration': 2.0, 'ion injection time': 150.000005960464, '[Thermo Trailer Extra]Monoisotopic M/Z:': 0.0, 'scanWindowList': {'count': 1, 'scanWindow': [{'scan window lower limit': 100.0, 'scan window upper limit': 1975.0}]}}]}, 'precursorList': {'count': 1, 'precursor': [{'spectrumRef': 'controllerType=0 controllerNumber=1 scan=2028', 'isolationWindow': {'isolation window target m/z': 478.2704070130128, 'isolation window lower offset': 1.0, 'isolat

In [5]:
import glob
import os
import sys
import time
import os.path

def generate_mgf(mzml_spectra, scan_ids, pepseqs, charges, file_index=0, out_file='out.mgf'):
    num_spectra = 0
    with open(out_file, 'w') as f:
        for spectrum in mzml_spectra:
            scan = int(spectrum['id'].split('scan=')[1])
            if scan in scan_ids:
                try:
                    i = scan_ids.index(scan)
                    mz_arr = spectrum['m/z array']
                    int_arr = spectrum['intensity array']
                    rtsec = 60.0*(spectrum['scanList']['scan'][0]['scan start time'])
                    selectedIon = spectrum['precursorList']['precursor'][0]['selectedIonList']['selectedIon'][0]

                    pepseq = pepseqs[i]
                    pepseq = pepseq.replace('M*','M(+15.99)')

                    assert len(mz_arr) == len(int_arr), "[ERR] Wrong data format: len(mz_arr) != len(int_arr)"

                    print("BEGIN IONS", file=f)
                    print("TITLE={0}.{1}".format(file_index, scan), file=f)
                    print("PEPMASS={0}".format(selectedIon['selected ion m/z']), file=f)
                    # sometimes they don't have a charge info. if so, we use the annotation file
                    if 'charge state' in selectedIon:
                        print("CHARGE={0:d}+".format(int(selectedIon['charge state'])), file=f)
                    else:
                        print("CHARGE={0:d}+".format(charges[i]), file=f)
                    print("SCANS={0}:{1}".format(file_index, scan), file=f)
                    print("RTINSECONDS={0}".format(rtsec), file=f)
                    print("SEQ={0}".format(pepseq), file=f)
                    for i in range(len(mz_arr)):
                        print("{0} {1}".format(mz_arr[i], int_arr[i]), file=f)
                    print("END IONS", file=f)
                    num_spectra += 1
                except:
#                     print(scan, mzml_spectra[spectrum['precursorList']['precursor'][0]['spectrumRef']])
                    print('[ERR]', scan, spectrum)
#                     raise Exception
                    return num_spectra
                    
        return num_spectra

def generate_mgf_without_annotation(mzml_spectra, file_index=0, out_file='out.mgf'):
    num_spectra = 0
    with open(out_file, 'w') as f:
        for spectrum in mzml_spectra:
            if spectrum['ms level'] != 2:
                continue
            scan = int(spectrum['id'].split('scan=')[1])
            try:
            
                mz_arr = spectrum['m/z array']
                int_arr = spectrum['intensity array']
                rtsec = 60.0*(spectrum['scanList']['scan'][0]['scan start time'])
                selectedIon = spectrum['precursorList']['precursor'][0]['selectedIonList']['selectedIon'][0]

                assert len(mz_arr) == len(int_arr), "[ERR] Wrong data format: len(mz_arr) != len(int_arr)"

                print("BEGIN IONS", file=f)
                print("TITLE={0}.{1}".format(file_index, scan), file=f)
                print("PEPMASS={0}".format(selectedIon['selected ion m/z']), file=f)
                # sometimes they don't have a charge info. if so, we use the annotation file
                if 'charge state' in selectedIon:
                    print("CHARGE={0:d}+".format(int(selectedIon['charge state'])), file=f)
                else:
                    print("CHARGE={0:d}+".format(999), file=f)
                print("SCANS={0}:{1}".format(file_index, scan), file=f)
                print("RTINSECONDS={0}".format(rtsec), file=f)
                print("SEQ=UNKNOWN", file=f)
                for i in range(len(mz_arr)):
                    print("{0} {1}".format(mz_arr[i], int_arr[i]), file=f)
                print("END IONS", file=f)
                num_spectra += 1
            except:
                print('[ERR]', scan, spectrum)
                continue
                    
        return num_spectra

def generate_mgf_files(data_dir, dest_dir='./'):
    # collect mzML.gz files
    mzML_files = glob.glob(data_dir + "/*.mzML.gz")
    
    mzML_log_handler = open(dest_dir + '/mgf_list.log', 'w')
    print("id\tmgf_file\tnum_scans\ttotal_scans", file=mzML_log_handler)
    
    start_time = time.time()
    num_mzML_files = len(mzML_files)
    total_scans = 0
    for i, mzML_file in enumerate(mzML_files):
#         if i < 207: continue  ## for debugging
#         if i > 207: break
        common_name = os.path.basename(mzML_file).rsplit('.mzML.gz')[0]
    
        if os.path.exists(dest_dir + '/' + common_name + '.mgf'):
            print('[{0:3d}/{1:3d}] {2}, Already exists' \
                  .format(i+1,
                          num_mzML_files,
                          common_name))
            continue
            
    
        seq_file = glob.glob(data_dir + '/' + common_name + "*.txt")
        msg = ""
        scan_ids = []
        num_spectra = 0
        mzml_spectra = inspect_mzML_file(mzML_file)
        
        if len(seq_file) == 1:    
            annotated = get_annotated_pepseq(seq_file[0])
            scan_ids = list(annotated.Scan)
            pepseqs = list(annotated.pepseq)
            charges = list(annotated.Charge)
            num_scans = len(scan_ids)
            num_spectra = generate_mgf(mzml_spectra,
                                       scan_ids,
                                       pepseqs,
                                       charges,
                                       file_index = i,
                                       out_file=dest_dir + '/' + common_name + '.mgf')
        else:
            num_spectra = generate_mgf_without_annotation(mzml_spectra,
                                                          file_index=i,
                                                          out_file=dest_dir + '/' + common_name + '.mgf')
            num_scans = num_spectra
        total_scans += num_spectra
        msg = "SUCCESS"
        print('[{0:3d}/{1:3d}] {2}, {3:d}/{4:d}/{5:d}, {6:.2f}sec' \
                  .format(i+1,
                          num_mzML_files,
                          common_name,
                          num_spectra,
                          num_scans,
                          total_scans,
                          time.time()-start_time))
        print("{0}\t{1}\t{2}\t{3}".format(i, common_name, num_spectra, total_scans), file=mzML_log_handler)
        sys.stdout.flush()

generate_mgf_files('/Users/leej324/Downloads/mzml_bio/', '/Volumes/MSSHARE/Joonyong/KAIKO_Data/Unknown_Biodiversity/')

[  1/ 17] Biodiversity_244EA27_2767802438_01_31Aug18_Merry_18-07-12, 52132/52132/52132, 116.00sec
[  2/ 17] Biodiversity_244EA27_2767802438_02_31Aug18_Merry_18-07-12, 52104/52104/104236, 227.90sec
[  3/ 17] Biodiversity_244EA27_2767802438_03_31Aug18_Merry_18-07-12, 54804/54804/159040, 347.76sec
[  4/ 17] Biodiversity_EB88_2767802315_01_31Aug18_Merry_18-07-12, 50139/50139/209179, 454.51sec
[  5/ 17] Biodiversity_EB88_2767802315_02_31Aug18_Merry_18-07-12, 52136/52136/261315, 569.21sec
[  6/ 17] Biodiversity_GAS188_2693429787_01_31Aug18_Merry_18-07-12, 49195/49195/310510, 672.17sec
[  7/ 17] Biodiversity_GAS188_2693429787_02_31Aug18_Merry_18-07-12, 49736/49736/360246, 776.00sec
[  8/ 17] Biodiversity_GAS188_2693429787_03_31Aug18_Merry_18-07-12, 52919/52919/413165, 892.01sec
[  9/ 17] Biodiversity_GAS231_2690315678_01_31Aug18_Merry_18-07-12, 52698/52698/465863, 1008.66sec
[ 10/ 17] Biodiversity_GAS231_2690315678_02_31Aug18_Merry_18-07-12, 53358/53358/519221, 1126.90sec
[ 11/ 17] Biodiversi

['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A']

In [37]:
mzml_spectra = inspect_mzML_file('/Volumes/MSSHARE/Joonyong/KAIKO_Data/NoGenomeOrgs/Biodiversity_M_invictum_R2A_aerobic_2_23Nov16_Pippin_16-09-11.mzML.gz')
num_spectra = generate_mgf_without_annotation(mzml_spectra, out_file='/Volumes/MSSHARE/Joonyong/KAIKO_Data/NoGenomeOrgs/Biodiversity_M_invictum_R2A_aerobic_2_23Nov16_Pippin_16-09-11.mgf')

In [38]:
num_spectra

43332

In [18]:
import pandas as pd
def read_mgf_list_file(f):
    return pd.read_csv(f, sep='\t').mgf_file.tolist()
input_mgf_files = "/Users/leej324/Documents/projects/denovo_deeplearning/training_datasets_raw_data/deepnovo_mgf/mgf_list.log"
input_files_train = [i for i in range(18)]
input_files_valid = [i+18 for i in range(5)]
input_files_test = [i+23 for i in range(100)]

mgf_files = read_mgf_list_file(input_mgf_files)
print(mgf_files)
print(input_files_train)
input_files_train = [mgf_files[i] for i in input_files_train]
print(input_files_train)

['Biodiversity_A_cryptum_FeTSB_anaerobic_1_01Jun16_Pippin_16-03-39', 'Biodiversity_A_cryptum_FeTSB_anaerobic_2_01Jun16_Pippin_16-03-39', 'Biodiversity_A_cryptum_FeTSB_anaerobic_3_01Jun16_Pippin_16-03-39', 'Biodiversity_A_faecalis_LB_aerobic_01_26Feb16_Arwen_16-01-01', 'Biodiversity_A_faecalis_LB_aerobic_02_26Feb16_Arwen_16-01-01', 'Biodiversity_A_faecalis_LB_aerobic_03_26Feb16_Arwen_16-01-01', 'Biodiversity_A_tumefaciens_R2A_aerobic_1_23Nov16_Pippin_16-09-11', 'Biodiversity_A_tumefaciens_R2A_aerobic_2_23Nov16_Pippin_16-09-11', 'Biodiversity_A_tumefaciens_R2A_aerobic_3_23Nov16_Pippin_16-09-11', 'Biodiversity_B_bifidum_CMcarb_anaerobic_01_26Feb16_Arwen_16-01-01', 'Biodiversity_B_bifidum_CMcarb_anaerobic_02_26Feb16_Arwen_16-01-01', 'Biodiversity_B_bifidum_CMcarb_anaerobic_03_26Feb16_Arwen_16-01-01', 'Biodiversity_B_cereus_ATCC14579_LB_aerobic_1_17July16_Samwise_16-04-10', 'Biodiversity_B_cereus_ATCC14579_LB_aerobic_2_17July16_Samwise_16-04-10', 'Biodiversity_B_cereus_ATCC14579_LB_aerobic_

In [1]:
# checking the newly added mgf files
import pandas as pd
mgf_tab = pd.read_csv('/Volumes/MSSHARE/Joonyong/mgf_list_v3.log', sep='\t')

In [2]:
mgf_tab.head()

Unnamed: 0,id,mgf_file,num_scans,total_scans,species,num_peptides,total_peptides
0,0,Biodiversity_A_cryptum_FeTSB_anaerobic_1_01Jun...,7317,7317,Acidiphilium_cryptum_JF-5,6659,6659
1,1,Biodiversity_A_cryptum_FeTSB_anaerobic_2_01Jun...,9503,16820,Acidiphilium_cryptum_JF-5,8532,15191
2,2,Biodiversity_A_cryptum_FeTSB_anaerobic_3_01Jun...,8050,24870,Acidiphilium_cryptum_JF-5,7379,22570
3,3,Biodiversity_A_faecalis_LB_aerobic_01_26Feb16_...,16382,41252,Alcaligenes_faecalis,15496,38066
4,4,Biodiversity_A_faecalis_LB_aerobic_02_26Feb16_...,16256,57508,Alcaligenes_faecalis,15367,53433


In [3]:
# read mgf files
for row in mgf_tab.itertuples():
    input_file = "/Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/{0}.mgf".format(row.mgf_file)
    with open(input_file, mode="r") as file_handle:
        file_handle.readline()
        _id = file_handle.readline().split("=")[1].split(".")[0] 
        if row.id != int(_id):
            print(row.id, _id, input_file, row.num_scans)

223 246 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_CH4_B1_T1_07_QE_23Mar18_Oak_18-01-07.mgf 22915
224 247 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_CH4_B1_T2_08_QE_23Mar18_Oak_18-01-07.mgf 23035
225 248 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_CH4_B2_T1_09_QE_23Mar18_Oak_18-01-07.mgf 19918
226 249 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_CH4_B2_T2_10_QE_23Mar18_Oak_18-01-07.mgf 19703
227 250 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_CH4_B3_T1_11_QE_23Mar18_Oak_18-01-07.mgf 22030
228 251 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_CH4_B3_T2_12_QE_23Mar18_Oak_18-01-07.mgf 21451
229 252 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_MeOH_B1_T1_01_QE_23Mar18_Oak_18-01-07.mgf 19885
230 253 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_MeOH_B1_T2_02_QE_23Mar18_Oak_18-01-07.mgf 20039
231 254 /Volumes/MSSHARE/Joonyong/KAIKO_Data/O

In [4]:
# file replacement
import fileinput

# read mgf files
for row in mgf_tab.itertuples():
    input_file = "/Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/{0}.mgf".format(row.mgf_file)
    with open(input_file, mode="r") as file_handle:
        file_handle.readline()
        _id = file_handle.readline().split("=")[1].split(".")[0] 
        if row.id != int(_id): 
            print(row.id, _id, input_file, row.num_scans)
            with fileinput.FileInput(input_file, inplace=True, backup='.bak') as file:
                for line in file:
                    _t = line.replace("TITLE={0}.".format(_id), "TITLE={0}.".format(row.id))
                    _t = _t.replace("SCANS={0}:".format(_id), "SCANS={0}:".format(row.id))
                    print(_t, end='')

223 246 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_CH4_B1_T1_07_QE_23Mar18_Oak_18-01-07.mgf 22915
224 247 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_CH4_B1_T2_08_QE_23Mar18_Oak_18-01-07.mgf 23035
225 248 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_CH4_B2_T1_09_QE_23Mar18_Oak_18-01-07.mgf 19918
226 249 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_CH4_B2_T2_10_QE_23Mar18_Oak_18-01-07.mgf 19703
227 250 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_CH4_B3_T1_11_QE_23Mar18_Oak_18-01-07.mgf 22030
228 251 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_CH4_B3_T2_12_QE_23Mar18_Oak_18-01-07.mgf 21451
229 252 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_MeOH_B1_T1_01_QE_23Mar18_Oak_18-01-07.mgf 19885
230 253 /Volumes/MSSHARE/Joonyong/KAIKO_Data/Original_mgf/M_alcali_copp_MeOH_B1_T2_02_QE_23Mar18_Oak_18-01-07.mgf 20039
231 254 /Volumes/MSSHARE/Joonyong/KAIKO_Data/O

In [27]:
# file replacement
import fileinput

folder = "DeepNovoRun"

# read mgf files
for row in mgf_tab.itertuples():
    input_file = "/Volumes/MSSHARE/Joonyong/{0}/mgf_test/{1}_out.txt".format(folder, row.mgf_file)
    with open(input_file, mode="r") as file_handle:
        file_handle.readline()
        _id = file_handle.readline().split("\t")[0].split(":")[0] 
        if row.id != int(_id): 
            print(row.id, _id, input_file, row.num_scans)
            with fileinput.FileInput(input_file, inplace=True, backup='.bak') as file:
                for line in file:
                    _t = line.replace("{0}:".format(_id), "{0}:".format(row.id))
                    print(_t, end='')

220 241 /Volumes/MSSHARE/Joonyong/DeepNovoRun/mgf_test/QC_Shew_13_05_500ng_2_100uL_5hr_30Mar14_Samwise_13-07-17_out.txt 50697
221 242 /Volumes/MSSHARE/Joonyong/DeepNovoRun/mgf_test/QC_Shew_13_05_500ng_2_5hr_19Mar14_Samwise_13-07-17_out.txt 54539
222 243 /Volumes/MSSHARE/Joonyong/DeepNovoRun/mgf_test/QC_Shew_13_05_500ng_2_5hr_24Mar14_Samwise_13-07-17_out.txt 54232
223 248 /Volumes/MSSHARE/Joonyong/DeepNovoRun/mgf_test/YJ_Cc_WT1_C_P_9Jan17_Pippin_16-09-11_out.txt 26569
224 249 /Volumes/MSSHARE/Joonyong/DeepNovoRun/mgf_test/YJ_Cc_WT1_IM_P_9Jan17_Pippin_16-09-11_out.txt 22557
225 250 /Volumes/MSSHARE/Joonyong/DeepNovoRun/mgf_test/YJ_Cc_WT1_OM_P_9Jan17_Pippin_16-09-11_out.txt 29444
226 251 /Volumes/MSSHARE/Joonyong/DeepNovoRun/mgf_test/YJ_Cc_WT1_P_Prot_9Jan17_Pippin_16-09-11_out.txt 25166
227 252 /Volumes/MSSHARE/Joonyong/DeepNovoRun/mgf_test/YJ_Cc_WT1_WC_P_9Jan17_Pippin_16-09-11_out.txt 21603
228 253 /Volumes/MSSHARE/Joonyong/DeepNovoRun/mgf_test/YJ_Cc_WT2_C_P_9Jan17_Pippin_16-09-11_out.tx