# Imports
Versions <br>
python - 3.6.7 <br>
Pandas - 0.23.4 <br>
Biopython - 1.73 <br>
Scipy - 1.2.0 <br>
seaborn - 0.9.0 (Needs to be verified on Jurriaan laptop) <br>
iupred2d - https://iupred2a.elte.hu/download <br>

pip install pandas==0.23.4

In [2]:
import ntpath
import datetime
import pandas as pd
from Bio import SeqIO
from scipy import signal
from iupred2a import iupred2a
from Bio.SeqUtils.ProtParam import ProteinAnalysis

In: path for a fasta file <br>
Out: pandas pickle file <br>
This code will take a fasta file and will generate a pandas table saved a a pickle file (.pkl) which contains features from the protein sequence.

# Statics

In [6]:
RESIDUES = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
            'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']

# Kyte & Doolittle {kd} index of hydrophobicity
HP = {'A': 1.8, 'R':-4.5, 'N':-3.5, 'D':-3.5, 'C': 2.5,
      'Q':-3.5, 'E':-3.5, 'G':-0.4, 'H':-3.2, 'I': 4.5,
      'L': 3.8, 'K':-3.9, 'M': 1.9, 'F': 2.8, 'P':-1.6,
      'S':-0.8, 'T':-0.7, 'W':-0.9, 'Y':-1.3, 'V': 4.2, 'U': 0.0}

# Classes

In [5]:
import time
class IuPred:
    def __init__(self, glob, short, long):
        self.glob = glob
        self.short = short
        self.long = long


class HydroPhobicIndex:
    def __init__(self, hpilist):
        self.hpilist = hpilist


class MakeMatrix:
    def __init__(self, dbfasta):  
        self.df = pd.DataFrame()
        executables = [
            'self.fasta2df(dbfasta)',
            'self.amino_acid_analysis()',
            'self.idr_iupred()',
            'self.hydrophobic()',
            'self.add_iupred_features()',
            'self.add_hydrophobic_features()',
            'self.add_biochemical_combinations()',
            'self.add_lowcomplexity_features()' ,
        ]        
        for e in executables:
            start = time.time()     
            exec(e)
            end = time.time()        
            print(str(round(end - start, 2))+'s '+e)           
#         self.fasta2df(dbfasta)
#         self.amino_acid_analysis()
#         self.idr_iupred()     
#         self.hydrophobic()     
#         self.add_iupred_features()      
#         self.add_hydrophobic_features()     
#         self.add_biochemical_combinations()       
#         self.add_lowcomplexityscore()
#         self.add_lowcomplexity_features()             
              

    def fasta2df(self, dbfasta):
        rows = list()
        with open(dbfasta) as f:
            for record in SeqIO.parse(dbfasta, 'fasta'):
                seqdict = dict()
                seq = str(record.seq)
                id = record.description.split('|')
                uniprot_id = id[1]
                name = id[2].split(' ')[0]
                rows.append([name, uniprot_id, seq])
        self.df = pd.DataFrame(rows, columns=['protein_name', 'uniprot_id', 'sequence'])

    def set_identifiers(self, fastas):
        self.df['llps'] = 0
        for i in fastas:
            fname = ntpath.basename(i).split('.')[0]
            self.df[fname] = 0
            with open(i) as f:
                for record in SeqIO.parse(f, 'fasta'):
                    id_split = record.id.split('|')
                    protid = (id_split[1])
                    self.df.loc[self.df['uniprot_id'] == protid, fname] = 1
                    self.df.loc[self.df['uniprot_id'] == protid, 'llps'] = 1

    def idr_iupred(self):
        self.df['iupred'] = object
        for index, row in self.df.iterrows():
            seq = row['sequence']
            glob = iupred2a.iupred(str(seq), 'glob')
            short = iupred2a.iupred(str(seq), 'short')
            long = iupred2a.iupred(str(seq), 'long')
            idrpred = IuPred(glob, short, long)
            self.df.at[index, 'iupred'] = idrpred
            
    def hydrophobic(self):
        for index, row in self.df.iterrows():
            hpilst = pd.Series(list(row['sequence'])).map(HP).tolist()
            self.df.loc[index, 'HydroPhobicIndex'] = HydroPhobicIndex(hpilst)
            
    def amino_acid_analysis(self):
        for index, row in self.df.iterrows():
            seq = row['sequence']
            seqanalysis = ProteinAnalysis(seq)
            acidist = seqanalysis.get_amino_acids_percent()       
            for i in RESIDUES:            
                self.df.loc[index, 'fraction_' + i] = acidist[i]
            self.df.loc[index, 'length'] = len(seq)
            self.df.loc[index, 'IEP'] = seqanalysis.isoelectric_point()
            if 'X' not in seq and 'B' not in seq:
                self.df.loc[index, 'molecular_weight'] = seqanalysis.molecular_weight()
            if 'U' not in seq and 'X' not in seq and 'B' not in seq:
                self.df.loc[index, 'gravy'] = seqanalysis.gravy()
            self.df.loc[index, 'aromaticity'] = seqanalysis.aromaticity()
            self.df.loc[index, 'alpha_helix'] = seqanalysis.secondary_structure_fraction()[0]
            self.df.loc[index, 'beta_turn'] = seqanalysis.secondary_structure_fraction()[1]
            self.df.loc[index, 'beta_sheet'] = seqanalysis.secondary_structure_fraction()[2]           

    def add_iupred_features(self):
        for index, row in self.df.iterrows():
            idr = row['iupred'].glob[0]
            self.df.loc[index, 'idr_percetage'] = sum(i > .5 for i in list(idr))
            self.df.loc[index, 'idr_percetage1'] = sum(i > .5 for i in list(idr)) / len(str(row['sequence']))
            self.df.loc[index, 'idr_percetage2'] = sum(i > .6 for i in list(idr)) / len(str(row['sequence']))
            self.df.loc[index, 'idr_percetage3'] = sum(i > .7 for i in list(idr)) / len(str(row['sequence']))
            self.df.loc[index, 'idr_percetage4'] = sum(i > .8 for i in list(idr)) / len(str(row['sequence']))
            self.df.loc[index, 'idr_percetage4'] = sum(i > .9 for i in list(idr)) / len(str(row['sequence']))

    def add_hydrophobic_features(self):
        hpi0, hpi1, hpi2, hpi3, hpi4, hpi5 = list(), list(), list(), list(), list(), list() 
        for index, row in self.df.iterrows():
            sw = convolve_signal(row['HydroPhobicIndex'].hpilist, window=30)
            hpi0.append(sum(i < -1.5 for i in sw) / len(sw))
            # self.df.loc[index, 'hpi_<-1.5_frac'] = hpi
            hpi1.append(sum(i < -2.0 for i in sw) / len(sw))
            # self.df.loc[index, 'hpi_<-2.0_frac'] = hpi
            hpi2.append(sum(i < -2.5 for i in sw) / len(sw))
            # self.df.loc[index, 'hpi_<-2.5_frac'] = hpi
            hpi3.append(sum(i < -1.5 for i in sw))
            # self.df.loc[index, 'hpi_<-1.5'] = hpi
            hpi4.append( sum(i < -2.0 for i in sw))
            # self.df.loc[index, 'hpi_<-2.0'] = hpi
            hpi5.append(sum(i < -2.5 for i in sw))
            # self.df.loc[index, 'hpi_<-2.5'] = hpi
            
            
        self.df['hpi_<-1.5_frac'] = hpi0
        self.df['hpi_<-2.0_frac'] = hpi1
        self.df['hpi_<-2.5_frac'] = hpi2
        self.df['hpi_<-1.5'] = hpi3
        self.df['hpi_<-2.0'] = hpi4
        self.df['hpi_<-2.5'] = hpi5
            

    def add_biochemical_combinations(self):
        df = self.df
        df = df.assign(Asx=df['fraction_D'] + df['fraction_N'])
        df = df.assign(Glx=df['fraction_E'] + df['fraction_Q'])
        df = df.assign(Xle=df['fraction_I'] + df['fraction_L'])
        df = df.assign(Pos_charge=df['fraction_K'] + df['fraction_R'] + df['fraction_H'])
        df = df.assign(Neg_charge=df['fraction_D'] + df['fraction_E'])
        df = df.assign(Aromatic=df['fraction_F'] + df['fraction_W'] + df['fraction_Y'] + df['fraction_H'])
        df = df.assign(Alipatic=df['fraction_V'] + df['fraction_I'] + df['fraction_L'] + df['fraction_M'])
        df = df.assign(Small=df['fraction_P'] + df['fraction_G'] + df['fraction_A'] + df['fraction_S'])
        df = df.assign(Hydrophilic=(df['fraction_S'] + df['fraction_T'] + df['fraction_H'] + 
                                    df['fraction_N'] + df['fraction_Q'] + df['fraction_E'] +
                                    df['fraction_D'] + df['fraction_K'] + df['fraction_R']))
        df = df.assign(Hydrophobic= (df['fraction_V'] + df['fraction_I'] + df['fraction_L'] +
                                     df['fraction_F'] + df['fraction_W'] + df['fraction_Y'] +
                                     df['fraction_M']))
        self.df = df
        del df
        
    def add_lowcomplexityscore(self):
        lcs_window = 20
        lcs_cutoff = 7
        for index, row in self.df.iterrows():
            seq = str(row['sequence'])
            if len(seq) > lcs_window+1:
                sig = list()
                for i in range(len(seq)):
                    window = (seq[i: i+lcs_window])
                    if len(window) == lcs_window:
                        acid_comp = len(list(set(window)))
                        sig.append(acid_comp)
                score = sum([1 if i <= 7 else 0 for i in sig])
                self.df.loc[index, 'lcs_score'] = score
                self.df.loc[index, 'lcs_fraction'] = score / len(sig)
                
                
    def add_lowcomplexity_features(self):
        n_window = 20
        cutoff = 7       
        n_halfwindow = int(n_window / 2) 
        
        lcs_lowest_complexity = list()
        lcs_scores = list()
        lcs_fractions = list()       
        for index, row in self.df.iterrows():
            
            # Determine low complexity for each amino acids
            seq = str(row['sequence'])
            lcs_acids = list()
            sig = list()
            for i in range(len(seq)):
                if i < n_halfwindow:
                    peptide = seq[:n_window]        
                elif i+n_halfwindow > int(len(seq)):
                    peptide = seq[-n_window:]        
                else:
                    peptide = seq[i-n_halfwindow:i+n_halfwindow]       
                complexity = (len(set(peptide)))    
                if complexity <= 7:
                    # Add amino acids with a complexity from 7 or lower to list.
                    lcs_acids.append(seq[i])
                sig.append(complexity)
            
            # Adding low complexity scores to list
            lcs_lowest_complexity.append(min(sig))
            lcs_scores.append(len(lcs_acids))
            lcs_fractions.append(len(lcs_acids) / len(seq))


            # Add fraction scores to dataframe
            if len(lcs_acids) >= n_window:
                for i in RESIDUES:
                    self.df.loc[index ,i+'_lcf'] = lcs_acids.count(i) / len(lcs_acids)
            
            
        # Add low complexity scores lists to dataframe as a column
        self.df['lcs_fractions'] = lcs_fractions
        self.df['lcs_scores'] = lcs_scores
        self.df['lcs_lowest_complexity'] = lcs_lowest_complexity


def convolve_signal(sig, window=25):
    win = signal.hann(window)
    sig = signal.convolve(sig, win, mode='same') / sum(win)
    return sig


def average(l):
    return sum(l) / len(l)

In [7]:
def main(name, fasta_path):
    data = MakeMatrix(fasta_path)
   
    # Write data to .csv
    now = datetime.datetime.now()
    date = (str(now.day) + '-' + str(now.month)  + '-' +  str(now.year))   
    data.df.to_pickle('featured_data\\' + name + '_llps_f2f_' + date + '.pkl')
    print('Generated file: ' + name + '_llps_f2f_' + date + '.pkl')

# Homo sapiens

In [9]:
# Human
name = 'human'
#dbfasta = "C:\\guido\\Vermeulen lab\\students\\Jurriaan\\software\\fasta\\uniprot_reviewed_9606_140119.fasta"
dbfasta = "C:\\Users\\gvanmierlo\\Desktop\\LLPS\\fasta\\uniprot_reviewed_9606_140119.fasta"
main(name, dbfasta)

0.51s self.fasta2df(dbfasta)
227.65s self.amino_acid_analysis()
1445.33s self.idr_iupred()
31.27s self.hydrophobic()
55.32s self.add_iupred_features()
273.66s self.add_hydrophobic_features()
0.23s self.add_biochemical_combinations()
57.33s self.add_lowcomplexity_features()
Generated file: human_llps_f2f_20-11-2019.pkl


# Mus musculus

In [7]:
name = 'mus_musculus'
dbfasta = '/home/jurriaan/proteindb/mus_musculus_swiss/uniprot-mus-musculus_030519.fasta'
main(name, dbfasta)

0.66s self.fasta2df(dbfasta)
42.62s self.add_lowcomplexity_features()
Generated file: mus_musculus_llps_f2f_5-6-2019.pkl
      protein_name uniprot_id  \
0      AAKG1_MOUSE     O54950   
1      ABD12_MOUSE     Q99LR1   
2      ABHD1_MOUSE     Q9QZC8   
3      ABHEB_MOUSE     Q8VCR7   
4      ABCD3_MOUSE     P55096   
5      ACBD7_MOUSE     Q9D258   
6      ABLM1_MOUSE     Q8K4G5   
7      ABHD8_MOUSE     Q8R0P8   
8      ACBG2_MOUSE     Q2XU92   
9      ACAP1_MOUSE     Q8K2H4   
10     ACHB4_MOUSE     Q8R493   
11      ACM3_MOUSE     Q9ERZ3   
12     ACKR3_MOUSE     P56485   
13     ACSL1_MOUSE     P41216   
14     ADAM2_MOUSE     Q60718   
15     ADA18_MOUSE     Q9R157   
16     ABCBA_MOUSE     Q9JI39   
17     ACAD8_MOUSE     Q9D7B6   
18     ACHA6_MOUSE     Q9R0W9   
19     ACHA7_MOUSE     P49582   
20     ACOX1_MOUSE     Q9R0H0   
21      ACLY_MOUSE     Q91V92   
22      ACPM_MOUSE     Q9CR21   
23      ACTH_MOUSE     P63268   
24     ADA29_MOUSE     Q811Q4   
25     ACSF4_MOUSE   

# Caenorhabditis elegans

In [8]:
name = 'celegans'
fasta_path = '/home/jurriaan/proteindb/caenorhabditis_elegans_swiss/c_elegans_070519.fasta'
main(name, fasta_path)

0.1s self.fasta2df(dbfasta)
7.77s self.add_lowcomplexity_features()
Generated file: celegans_llps_f2f_5-6-2019.pkl
      protein_name  uniprot_id  \
0       ADAS_CAEEL      O45218   
1       ACM2_CAEEL      Q09388   
2       ACM1_CAEEL      Q18007   
3       ANI1_CAEEL      Q9XTT4   
4      ANKL1_CAEEL      G5EGA3   
5      ANKL2_CAEEL      H2KZB2   
6       AP1M_CAEEL      P35602   
7       AMT1_CAEEL      P54145   
8       AMPL_CAEEL      P34629   
9       APR1_CAEEL      Q21227   
10     ALLO1_CAEEL      Q9U389   
11      ANI3_CAEEL      Q9XWN6   
12      AP4A_CAEEL      Q9U2M7   
13      ALF2_CAEEL      P46563   
14     ALH13_CAEEL      P54889   
15      AMX1_CAEEL      Q21988   
16      AGE1_CAEEL      Q94125   
17     AL7A1_CAEEL      P46562   
18      ALXA_CAEEL      P34552   
19      ADH2_CAEEL      O45687   
20      AHA1_CAEEL      O02219   
21      ALF1_CAEEL      P54216   
22      AEX2_CAEEL      G5ECD9   
23      AEX5_CAEEL      P91863   
24      ADT1_CAEEL      G5ECS8   
2

# Caenorhabditis elegans + 2

In [9]:
name = 'celegans_2e'
fasta_path = '/home/jurriaan/proteindb/caenorhabditis_elegans_swiss/c_elegans_2extra_070519.fasta'
main(name, fasta_path)

0.1s self.fasta2df(dbfasta)
7.67s self.add_lowcomplexity_features()
Generated file: celegans_2e_llps_f2f_5-6-2019.pkl
      protein_name  uniprot_id  \
0       ADAS_CAEEL      O45218   
1       ACM2_CAEEL      Q09388   
2       ACM1_CAEEL      Q18007   
3       ANI1_CAEEL      Q9XTT4   
4      ANKL1_CAEEL      G5EGA3   
5      ANKL2_CAEEL      H2KZB2   
6       AP1M_CAEEL      P35602   
7       AMT1_CAEEL      P54145   
8       AMPL_CAEEL      P34629   
9       APR1_CAEEL      Q21227   
10     ALLO1_CAEEL      Q9U389   
11      ANI3_CAEEL      Q9XWN6   
12      AP4A_CAEEL      Q9U2M7   
13      ALF2_CAEEL      P46563   
14     ALH13_CAEEL      P54889   
15      AMX1_CAEEL      Q21988   
16      AGE1_CAEEL      Q94125   
17     AL7A1_CAEEL      P46562   
18      ALXA_CAEEL      P34552   
19      ADH2_CAEEL      O45687   
20      AHA1_CAEEL      O02219   
21      ALF1_CAEEL      P54216   
22      AEX2_CAEEL      G5ECD9   
23      AEX5_CAEEL      P91863   
24      ADT1_CAEEL      G5ECS8  

# Saccharomyces cerevisiae

In [10]:
# Saccharomyces cerevisiae
name = 'yeast'
proteome = '/home/jurriaan/proteindb/yeast_swiss/saccharomyces_cerevisiae_030419.fasta'
main(name, proteome)

0.36s self.fasta2df(dbfasta)
10.83s self.add_lowcomplexity_features()
Generated file: yeast_llps_f2f_5-6-2019.pkl
     protein_name uniprot_id  \
0     AB140_YEAST     Q08641   
1      ACEA_YEAST     P28240   
2      ACK1_YEAST     Q07622   
3      ACL4_YEAST     Q03771   
4      ADA2_YEAST     Q02336   
5      AMPD_YEAST     P15274   
6      AQR1_YEAST     P53943   
7      ANT1_YEAST     Q06497   
8     AP1S1_YEAST     P35181   
9      ANS1_YEAST     P38832   
10     APC2_YEAST     Q12440   
11     APM2_YEAST     P38700   
12     AMYG_YEAST     P08019   
13    AP1B1_YEAST     P36000   
14     APJ1_YEAST     P53940   
15     AP3S_YEAST     P47064   
16     AMN1_YEAST     P38285   
17     AMPL_YEAST     P14904   
18     AP2B_YEAST     P27351   
19    AP1G1_YEAST     Q12028   
20     AP2A_YEAST     P38065   
21    APQ12_YEAST     P40532   
22     AQY2_YEAST     P0CD90   
23    AP18B_YEAST     P53309   
24     APC4_YEAST     Q04601   
25     ALLA_YEAST     P32459   
26    AP18A_YEAST     

# Bos Taurus

In [11]:
name = 'bos_taurus'
proteome = '/home/jurriaan/proteindb/bos_taurus_swiss/uniprot-bos_taurus_030519.fasta'
main(name, proteome)

0.44s self.fasta2df(dbfasta)
10.12s self.add_lowcomplexity_features()
Generated file: bos_taurus_llps_f2f_5-6-2019.pkl
     protein_name uniprot_id  \
0     ADA10_BOVIN     Q10741   
1       AMD_BOVIN     P10731   
2     APEX1_BOVIN     P23196   
3     ANXA1_BOVIN     P46193   
4     ANXA2_BOVIN     P04272   
5     ANXA6_BOVIN     P79134   
6     APOA1_BOVIN     P15497   
7      AQP1_BOVIN     P47865   
8      AOC3_BOVIN     Q9TTK6   
9     ANXA4_BOVIN     P13214   
10    ANXA5_BOVIN     P81287   
11     AOCX_BOVIN     Q29437   
12    ADIPO_BOVIN     Q3Y5Z3   
13     ADSV_BOVIN     Q28046   
14     ALDR_BOVIN     P16116   
15     ADRO_BOVIN     P08165   
16    AK1A1_BOVIN     Q3ZCJ2   
17    ADCY1_BOVIN     P19754   
18    AGRL1_BOVIN     O97831   
19     ACTB_BOVIN     P60712   
20      ADX_BOVIN     P00257   
21    AL1A1_BOVIN     P48644   
22    ADRB2_BOVIN     Q28044   
23    ACSM1_BOVIN     Q9BEA2   
24     ACON_BOVIN     P20004   
25     ACTA_BOVIN     P62739   
26    ACVR1_BOVIN

# Arabidopsis Thaliana

In [12]:
name = 'arabidopsis_thaliana'
proteome = '/home/jurriaan/proteindb/arabidopsis_thaliana_swiss/arabidopsis_thaliana_030519.fasta'
main(name, proteome)

0.36s self.fasta2df(dbfasta)
23.2s self.add_lowcomplexity_features()
Generated file: arabidopsis_thaliana_llps_f2f_5-6-2019.pkl
      protein_name  uniprot_id  \
0       ABC1_ARATH      Q9SBB2   
1      AAE12_ARATH      Q9SS00   
2       AB1I_ARATH      Q9C8T1   
3      AB1K8_ARATH      Q93Y08   
4      AB16G_ARATH      Q9M2V7   
5      AB20I_ARATH      Q9LZ98   
6      AB27G_ARATH      Q9FT51   
7      ACA13_ARATH      Q9LIK7   
8      ACOX2_ARATH      O65201   
9      ACH10_ARATH      Q9LSW6   
10      ACR5_ARATH      Q9ZPQ8   
11     ACT11_ARATH      P53496   
12      ADF5_ARATH      Q9ZNT3   
13      ADF6_ARATH      Q9ZSK2   
14     ACBP3_ARATH      Q9STX1   
15      ACCD_ARATH      P56765   
16      ACEA_ARATH      P28297   
17     ACPM1_ARATH      P53665   
18     ACPM2_ARATH      O80800   
19     ACPM3_ARATH      Q9FGJ4   
20     ACO2M_ARATH      Q94A28   
21      ACR8_ARATH      Q9LNA5   
22      ACR9_ARATH      O80644   
23      ACT2_ARATH      Q96292   
24      ACR3_ARATH    

# Drosophila Melongaster

In [13]:
name = 'drosophila'
proteome = '/home/jurriaan/proteindb/drosophila_swiss/uniprot-drosohpila_030519.fasta'
main(name, proteome)

0.09s self.fasta2df(dbfasta)
11.65s self.add_lowcomplexity_features()
Generated file: drosophila_llps_f2f_5-6-2019.pkl
     protein_name uniprot_id  \
0     AC78C_DROME     Q9VP76   
1      AMYB_DROME     P81641   
2      ANTP_DROME     P02833   
3      ANM5_DROME     Q9U6Y9   
4      ARC1_DROME     Q7K1U0   
5      AP3D_DROME     P54362   
6      APTE_DROME     P29673   
7      APLP_DROME     Q9V496   
8       ALF_DROME     P07764   
9      ANLN_DROME     Q9V4P1   
10     AGO3_DROME     Q7PLK0   
11     AGO2_DROME     Q9VUQ5   
12    AK200_DROME     Q9VLL3   
13    AJUBA_DROME     Q9VY77   
14     AFF4_DROME     Q9VQI9   
15     ADAR_DROME     Q9NII1   
16     AKT1_DROME     Q8INB9   
17    AKIRN_DROME     Q9VS59   
18     ACER_DROME     Q9VLJ6   
19     ACES_DROME     P07140   
20     ACTN_DROME     P18091   
21     ADHX_DROME     P46415   
22      ADT_DROME     Q26365   
23      ADH_DROME     P00334   
24     ADRO_DROME     Q9V3T9   
25      ABL_DROME     P00522   
26     7UP1_DROME

# Schizosaccharomyces Pombe

In [14]:
name = 'spombe'
proteome = '/home/jurriaan/proteindb/spombe_swiss/uniprot_spombe_080519.fasta'
main(name, proteome)

0.23s self.fasta2df(dbfasta)
6.47s self.add_lowcomplexity_features()
Generated file: spombe_llps_f2f_5-6-2019.pkl
     protein_name uniprot_id  \
0      ABC1_SCHPO     Q92337   
1      AATM_SCHPO     O94320   
2      ACSA_SCHPO     P78773   
3      ADG3_SCHPO     O74851   
4      ANM1_SCHPO     Q9URX7   
5     AP1B1_SCHPO     O43079   
6       AP1_SCHPO     Q01663   
7      AMT3_SCHPO     Q9P7F3   
8      ANP1_SCHPO     O74745   
9      AP2S_SCHPO     Q9Y7L6   
10      ALO_SCHPO     Q9HDX8   
11     ALR1_SCHPO     O59828   
12    ALP16_SCHPO     P87244   
13     AMT2_SCHPO     Q9US00   
14    AP1M1_SCHPO     Q9HFE5   
15     AMY1_SCHPO     O74922   
16     AP2M_SCHPO     Q09718   
17     APC5_SCHPO     Q9P4W7   
18    APC13_SCHPO     O74358   
19     APC3_SCHPO     P10505   
20     AP3S_SCHPO     Q09905   
21     APP1_SCHPO     Q9P7E8   
22    APQ12_SCHPO     O94353   
23     AMY4_SCHPO     O42918   
24     APH1_SCHPO     P49776   
25     APTX_SCHPO     O74859   
26     ALP4_SCHPO     