## **Aula 10 - ML e Bioinformática**

In [49]:
!pip install biopython bioservices pandas numpy scikit-learn



In [50]:
from bioservices.uniprot import UniProt
from Bio import SeqIO
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import pandas as pd
import io

In [51]:
uniprot = UniProt()

membrane_proteins = uniprot.search('locations:(location:"Membrane [SL-0162]") taxonomy:"Bacteria [2]"', frmt='txt', limit=1000)
cytoplasm_proteins = uniprot.search('locations:(location:"Cytoplasm [SL-0086]") taxonomy:"Bacteria [2]"', frmt='txt', limit=1000)

membrane_proteins_buffer = io.StringIO()
cytoplasm_proteins_buffer = io.StringIO()

membrane_proteins_buffer.write(membrane_proteins)
cytoplasm_proteins_buffer.write(cytoplasm_proteins)

membrane_proteins_buffer.seek(0)
cytoplasm_proteins_buffer.seek(0)

0

In [52]:
membrane_proteins_parser = SeqIO.parse(membrane_proteins_buffer,'swiss')
cytoplasm_proteins_parser = SeqIO.parse(cytoplasm_proteins_buffer,'swiss')

df_membrane_proteins = pd.DataFrame()
df_cytoplasm_proteins = pd.DataFrame()

for record in membrane_proteins_parser:
  protein_analysis = ProteinAnalysis(str(record.seq))
  aminoacid_composition = protein_analysis.get_amino_acids_percent()
  df_membrane_proteins = df_membrane_proteins.append([aminoacid_composition], ignore_index=True)

for record in cytoplasm_proteins_parser:
  protein_analysis = ProteinAnalysis(str(record.seq))
  aminoacid_composition = protein_analysis.get_amino_acids_percent()
  df_cytoplasm_proteins = df_cytoplasm_proteins.append([aminoacid_composition], ignore_index=True)

In [53]:
df_cytoplasm_proteins[['A', 'C']]

Unnamed: 0,A,C
0,0.072727,0.000000
1,0.034783,0.008696
2,0.049242,0.018939
3,0.098039,0.013072
4,0.094862,0.019763
...,...,...
995,0.113074,0.007067
996,0.113074,0.007067
997,0.113074,0.007067
998,0.184466,0.008091


In [54]:
df_cytoplasm_proteins.drop(['A'], axis=1)

Unnamed: 0,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y
0,0.000000,0.069697,0.124242,0.027273,0.057576,0.012121,0.057576,0.036364,0.133333,0.015152,0.039394,0.024242,0.045455,0.103030,0.039394,0.051515,0.060606,0.009091,0.021212
1,0.008696,0.052174,0.086957,0.043478,0.000000,0.008696,0.095652,0.156522,0.113043,0.008696,0.078261,0.008696,0.043478,0.034783,0.078261,0.052174,0.043478,0.000000,0.052174
2,0.018939,0.079545,0.049242,0.049242,0.068182,0.041667,0.060606,0.045455,0.106061,0.026515,0.041667,0.053030,0.060606,0.049242,0.037879,0.053030,0.049242,0.026515,0.034091
3,0.013072,0.078431,0.065359,0.032680,0.091503,0.019608,0.039216,0.019608,0.124183,0.026144,0.026144,0.052288,0.045752,0.052288,0.032680,0.052288,0.071895,0.006536,0.052288
4,0.019763,0.083004,0.023715,0.019763,0.098814,0.027668,0.055336,0.019763,0.090909,0.015810,0.043478,0.067194,0.035573,0.059289,0.055336,0.067194,0.090909,0.011858,0.019763
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.095406,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.045936,0.010601,0.042403
996,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.095406,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.045936,0.010601,0.042403
997,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.095406,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.045936,0.010601,0.042403
998,0.008091,0.071197,0.035599,0.019417,0.127832,0.012945,0.045307,0.012945,0.085761,0.016181,0.016181,0.056634,0.008091,0.084142,0.045307,0.042071,0.103560,0.011327,0.012945


In [55]:
df_cytoplasm_proteins.drop([1], axis=0)

Unnamed: 0,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y
0,0.072727,0.000000,0.069697,0.124242,0.027273,0.057576,0.012121,0.057576,0.036364,0.133333,0.015152,0.039394,0.024242,0.045455,0.103030,0.039394,0.051515,0.060606,0.009091,0.021212
2,0.049242,0.018939,0.079545,0.049242,0.049242,0.068182,0.041667,0.060606,0.045455,0.106061,0.026515,0.041667,0.053030,0.060606,0.049242,0.037879,0.053030,0.049242,0.026515,0.034091
3,0.098039,0.013072,0.078431,0.065359,0.032680,0.091503,0.019608,0.039216,0.019608,0.124183,0.026144,0.026144,0.052288,0.045752,0.052288,0.032680,0.052288,0.071895,0.006536,0.052288
4,0.094862,0.019763,0.083004,0.023715,0.019763,0.098814,0.027668,0.055336,0.019763,0.090909,0.015810,0.043478,0.067194,0.035573,0.059289,0.055336,0.067194,0.090909,0.011858,0.019763
5,0.072340,0.014894,0.057447,0.075532,0.032979,0.086170,0.026596,0.059574,0.051064,0.102128,0.019149,0.035106,0.050000,0.032979,0.070213,0.056383,0.056383,0.071277,0.003191,0.026596
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.113074,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.095406,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.045936,0.010601,0.042403
996,0.113074,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.095406,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.045936,0.010601,0.042403
997,0.113074,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.095406,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.045936,0.010601,0.042403
998,0.184466,0.008091,0.071197,0.035599,0.019417,0.127832,0.012945,0.045307,0.012945,0.085761,0.016181,0.016181,0.056634,0.008091,0.084142,0.045307,0.042071,0.103560,0.011327,0.012945


In [56]:
df_cytoplasm_proteins

Unnamed: 0,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y
0,0.072727,0.000000,0.069697,0.124242,0.027273,0.057576,0.012121,0.057576,0.036364,0.133333,0.015152,0.039394,0.024242,0.045455,0.103030,0.039394,0.051515,0.060606,0.009091,0.021212
1,0.034783,0.008696,0.052174,0.086957,0.043478,0.000000,0.008696,0.095652,0.156522,0.113043,0.008696,0.078261,0.008696,0.043478,0.034783,0.078261,0.052174,0.043478,0.000000,0.052174
2,0.049242,0.018939,0.079545,0.049242,0.049242,0.068182,0.041667,0.060606,0.045455,0.106061,0.026515,0.041667,0.053030,0.060606,0.049242,0.037879,0.053030,0.049242,0.026515,0.034091
3,0.098039,0.013072,0.078431,0.065359,0.032680,0.091503,0.019608,0.039216,0.019608,0.124183,0.026144,0.026144,0.052288,0.045752,0.052288,0.032680,0.052288,0.071895,0.006536,0.052288
4,0.094862,0.019763,0.083004,0.023715,0.019763,0.098814,0.027668,0.055336,0.019763,0.090909,0.015810,0.043478,0.067194,0.035573,0.059289,0.055336,0.067194,0.090909,0.011858,0.019763
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.113074,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.095406,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.045936,0.010601,0.042403
996,0.113074,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.095406,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.045936,0.010601,0.042403
997,0.113074,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.095406,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.045936,0.010601,0.042403
998,0.184466,0.008091,0.071197,0.035599,0.019417,0.127832,0.012945,0.045307,0.012945,0.085761,0.016181,0.016181,0.056634,0.008091,0.084142,0.045307,0.042071,0.103560,0.011327,0.012945


In [57]:
df_membrane_proteins

Unnamed: 0,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y
0,0.118357,0.019324,0.026570,0.016908,0.043478,0.079710,0.012077,0.067633,0.021739,0.159420,0.031401,0.048309,0.041063,0.019324,0.033816,0.077295,0.057971,0.101449,0.012077,0.012077
1,0.094982,0.000000,0.055556,0.057348,0.016129,0.103943,0.017921,0.035842,0.034050,0.071685,0.014337,0.066308,0.060932,0.051971,0.044803,0.094982,0.103943,0.062724,0.001792,0.010753
2,0.120419,0.020942,0.073298,0.047120,0.036649,0.099476,0.026178,0.036649,0.041885,0.062827,0.026178,0.041885,0.047120,0.010471,0.062827,0.057592,0.068063,0.089005,0.020942,0.010471
3,0.104895,0.009324,0.023310,0.027972,0.051282,0.102564,0.013986,0.109557,0.032634,0.144522,0.025641,0.023310,0.051282,0.011655,0.023310,0.053613,0.060606,0.100233,0.009324,0.020979
4,0.101124,0.000000,0.112360,0.044944,0.033708,0.078652,0.011236,0.089888,0.134831,0.067416,0.022472,0.011236,0.011236,0.078652,0.011236,0.067416,0.067416,0.044944,0.011236,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.078801,0.006659,0.058824,0.110988,0.029967,0.064373,0.027747,0.062153,0.059933,0.089900,0.028857,0.043285,0.025527,0.049945,0.069922,0.051054,0.048835,0.056604,0.007769,0.028857
996,0.058608,0.000000,0.062271,0.051282,0.034799,0.078755,0.010989,0.054945,0.065934,0.109890,0.031136,0.042125,0.056777,0.054945,0.049451,0.067766,0.060440,0.073260,0.009158,0.027473
997,0.094077,0.006969,0.059233,0.052265,0.013937,0.069686,0.013937,0.045296,0.080139,0.101045,0.045296,0.041812,0.031359,0.048780,0.045296,0.073171,0.045296,0.087108,0.006969,0.038328
998,0.070707,0.010101,0.020202,0.035354,0.090909,0.095960,0.005051,0.080808,0.030303,0.136364,0.045455,0.045455,0.030303,0.015152,0.025253,0.060606,0.035354,0.131313,0.005051,0.030303


In [58]:
df_membrane_proteins['membrane'] = True
df_cytoplasm_proteins['membrane'] = False

In [59]:
df = pd.concat([df_membrane_proteins, df_cytoplasm_proteins])

In [60]:
df

Unnamed: 0,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y,membrane
0,0.118357,0.019324,0.026570,0.016908,0.043478,0.079710,0.012077,0.067633,0.021739,0.159420,0.031401,0.048309,0.041063,0.019324,0.033816,0.077295,0.057971,0.101449,0.012077,0.012077,True
1,0.094982,0.000000,0.055556,0.057348,0.016129,0.103943,0.017921,0.035842,0.034050,0.071685,0.014337,0.066308,0.060932,0.051971,0.044803,0.094982,0.103943,0.062724,0.001792,0.010753,True
2,0.120419,0.020942,0.073298,0.047120,0.036649,0.099476,0.026178,0.036649,0.041885,0.062827,0.026178,0.041885,0.047120,0.010471,0.062827,0.057592,0.068063,0.089005,0.020942,0.010471,True
3,0.104895,0.009324,0.023310,0.027972,0.051282,0.102564,0.013986,0.109557,0.032634,0.144522,0.025641,0.023310,0.051282,0.011655,0.023310,0.053613,0.060606,0.100233,0.009324,0.020979,True
4,0.101124,0.000000,0.112360,0.044944,0.033708,0.078652,0.011236,0.089888,0.134831,0.067416,0.022472,0.011236,0.011236,0.078652,0.011236,0.067416,0.067416,0.044944,0.011236,0.000000,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.113074,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.095406,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.045936,0.010601,0.042403,False
996,0.113074,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.095406,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.045936,0.010601,0.042403,False
997,0.113074,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.095406,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.045936,0.010601,0.042403,False
998,0.184466,0.008091,0.071197,0.035599,0.019417,0.127832,0.012945,0.045307,0.012945,0.085761,0.016181,0.016181,0.056634,0.008091,0.084142,0.045307,0.042071,0.103560,0.011327,0.012945,False


In [61]:
X = df.drop(['membrane'], axis=1)
y = df['membrane']

In [62]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [63]:
X_train

Unnamed: 0,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y
92,0.146853,0.006993,0.055944,0.097902,0.055944,0.062937,0.013986,0.062937,0.062937,0.055944,0.027972,0.020979,0.027972,0.020979,0.069930,0.062937,0.041958,0.090909,0.000000,0.013986
897,0.194030,0.000000,0.029851,0.014925,0.029851,0.044776,0.014925,0.029851,0.059701,0.149254,0.059701,0.029851,0.059701,0.029851,0.044776,0.014925,0.044776,0.089552,0.044776,0.014925
893,0.088692,0.004435,0.031042,0.048780,0.068736,0.130820,0.028825,0.064302,0.026608,0.099778,0.024390,0.035477,0.053215,0.017738,0.039911,0.057650,0.048780,0.066519,0.037694,0.026608
730,0.087209,0.000000,0.058140,0.093023,0.046512,0.052326,0.029070,0.052326,0.058140,0.093023,0.046512,0.029070,0.034884,0.052326,0.034884,0.081395,0.023256,0.069767,0.017442,0.040698
402,0.072202,0.025271,0.046931,0.075812,0.032491,0.086643,0.025271,0.036101,0.036101,0.104693,0.028881,0.032491,0.043321,0.050542,0.072202,0.061372,0.046931,0.097473,0.010830,0.014440
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
708,0.093750,0.031250,0.000000,0.062500,0.218750,0.000000,0.000000,0.187500,0.062500,0.031250,0.031250,0.000000,0.062500,0.000000,0.062500,0.000000,0.093750,0.031250,0.000000,0.031250
398,0.107200,0.000000,0.080000,0.081600,0.019200,0.092800,0.012800,0.059200,0.062400,0.072000,0.014400,0.032000,0.036800,0.044800,0.052800,0.054400,0.068800,0.091200,0.004800,0.012800
325,0.081818,0.027273,0.009091,0.018182,0.045455,0.109091,0.009091,0.136364,0.009091,0.145455,0.036364,0.018182,0.045455,0.018182,0.027273,0.072727,0.063636,0.045455,0.036364,0.045455
511,0.113074,0.007067,0.070671,0.049470,0.053004,0.067138,0.038869,0.049470,0.063604,0.091873,0.028269,0.042403,0.067138,0.028269,0.028269,0.060071,0.042403,0.045936,0.010601,0.042403


In [64]:
X_test

Unnamed: 0,A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y
744,0.113074,0.007067,0.070671,0.049470,0.053004,0.067138,0.038869,0.049470,0.063604,0.091873,0.028269,0.042403,0.067138,0.028269,0.028269,0.060071,0.042403,0.045936,0.010601,0.042403
343,0.082251,0.010101,0.057720,0.080808,0.046176,0.053391,0.008658,0.054834,0.064935,0.113997,0.008658,0.062049,0.025974,0.054834,0.062049,0.057720,0.054834,0.063492,0.007215,0.030303
26,0.119850,0.003745,0.067416,0.052434,0.044944,0.089888,0.014981,0.063670,0.044944,0.074906,0.022472,0.052434,0.033708,0.026217,0.071161,0.033708,0.071161,0.074906,0.007491,0.029963
709,0.093640,0.026502,0.054770,0.060071,0.037102,0.065371,0.026502,0.049470,0.047703,0.114841,0.024735,0.024735,0.053004,0.040636,0.054770,0.056537,0.058304,0.068905,0.014134,0.028269
357,0.103529,0.002353,0.032941,0.047059,0.028235,0.063529,0.032941,0.051765,0.037647,0.141176,0.016471,0.028235,0.063529,0.040000,0.075294,0.047059,0.061176,0.077647,0.014118,0.035294
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
584,0.102837,0.010638,0.024823,0.024823,0.085106,0.113475,0.024823,0.074468,0.021277,0.117021,0.021277,0.031915,0.049645,0.017730,0.028369,0.039007,0.053191,0.060284,0.056738,0.042553
817,0.075075,0.003003,0.066066,0.057057,0.033033,0.051051,0.006006,0.051051,0.063063,0.102102,0.015015,0.063063,0.060060,0.018018,0.045045,0.072072,0.075075,0.084084,0.018018,0.042042
811,0.113074,0.007067,0.067138,0.053004,0.053004,0.074205,0.038869,0.049470,0.063604,0.091873,0.028269,0.042403,0.067138,0.028269,0.024735,0.053004,0.042403,0.049470,0.010601,0.042403
317,0.111455,0.004644,0.074303,0.034056,0.035604,0.113003,0.009288,0.027864,0.021672,0.111455,0.013932,0.047988,0.047988,0.063467,0.051084,0.078947,0.052632,0.035604,0.020124,0.044892


In [65]:
model = SVC()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(accuracy)

0.83


In [66]:
def predict_membrane_proteins(protein_sequence):
  protein_analysis = ProteinAnalysis(protein_sequence)
  aminoacid_composition = protein_analysis.get_amino_acids_percent()
  df = pd.DataFrame([aminoacid_composition])
  result = model.predict(df)
  return result

In [67]:
sequence = '''
MSQNTLKVHDLNEDAEFDENGVEVFDEKALVEQEPSDNDLAEEELLSQGATQRVLDATQL
YLGEIGYSPLLTAEEEVYFARRALRGDVASRRRMIESNLRLVVKIARRYGNRGLALLDLI
EEGNLGLIRAVEKFDPERGFRFSTYATWWIRQTIERAIMNQTRTIRLPIHIVKELNVYLR
TARELSHKLDHEPSAEEIAEQLDKPVDDVSRMLRLNERITSVDTPLGGDSEKALLDILAD
EKENGPEDTTQDDDMKQSIVKWLFELNAKQREVLARRFGLLGYEAATLEDVGREIGLTRE
RVRQIQVEGLRRLREILQTQGLNIEALFRE
'''

predict_membrane_proteins(sequence)

array([False])

In [68]:
sequence = '''
MKKLLPILIGLSLSGFSSLSQAENLMQVYQQARLSNPELRKSAADRDAAFEKINEARSPL
LPQLGLGADYTYSNGYRDANGINSNATSASLQLTQSIFDMSKWRALTLQEKAAGIQDVTY
QTDQQTLILNTATAYFNVLNAIDVLSYTQAQKEAIYRQLDQTTQRFNVGLVAITDVQNAR
AQYDTVLANEVTARNNLDNAVEQLRQITGNYYPELAALNVENFKTDKPQPVNALLKEAEK
RNLSLLQARLSQDLAREQIRQAQDGHLPTLDLTASTGISDTSYSGSKTRGAAGTQYDDSN
MGQNKVGLSFSLPIYQGGMVNSQVKQAQYNFVGASEQLESAHRSVVQTVRSSFNNINASI
SSINAYKQAVVSAQSSLDAMEAGYSVGTRTIVDVLDATTTLYNAKQELANARYNYLINQL
NIKSALGTLNEQDLLALNNALSKPVSTNPENVAPQTPEQNAIADGYAPDSPAPVVQQTSA
RTTTSNGHNPFRN
'''

predict_membrane_proteins(sequence)

array([ True])

## **Atividade Aula 10**

In [69]:
sequence = '''
MEKTYNPQDIEQPLYEHWEKQGYFKPNGDESQESFCIMIPPPNVTGSLHMGHAFQQTIMD
TMIRYQRMQGKNTLWQVGTDHAGIATQMVVERKIAAEEGKTRHDYGREAFIDKIWEWKAE
SGGTITRQMRRLGNSVDWERERFTMDEGLSNAVKEVFVRLYKEDLIYRGKRLVNWDPKLR
TAISDLEVENRESKGSMWHIRYPLADGAKTADGKDYLVVATTRPETLLGDTGVAVNPEDP
RYKDLIGKYVILPLVNRRIPIVGDEHADMEKGTGCVKITPAHDFNDYEVGKRHALPMINI
LTFDGDIRESAQVFDTKGNESDVYSSEIPAEFQKLERFAARKAVVAAVDALGLLEEIKPH
DLTVPYGDRGGVVIEPMLTDQWYVRADVLAKPAVEAVENGDIQFVPKQYENMYFSWMRDI
QDWCISRQLWWGHRIPAWYDEAGNVYVGRNEDEVRKENNLGADVVLRQDEDVLDTWFSSA
LWTFSTLGWPENTDALRQFHPTSVMVSGFDIIFFWIARMIMMTMHFIKDENGKPQVPFHT
VYMTGLIRDDEGQKMSKSKGNVIDPLDMVDGISLPELLEKRTGNMMQPQLADKIRKRTEK
QFPNGIEPHGTDALRFTLAALASTGRDINWDMKRLEGYRNFCNKLWNASRFVLMNTEGQD
CGFNGGEMTLSLADRWILAEFNQTIKAYREALDSFRFDIAAGILYEFTWNQFCDWYLELT
KPVMNGGTEAELRGTRHTLVTVLEGLLRLAHPIIPFITETIWQRVKVLCGITADTIMLQP
FPQYDASQVDEAALADTEWLKQAIVAVRNIRAEMNIAPGKPLELLLRGCSADAERRVNEN
RGFLQTLARLESITVLPADDKGPVSVTKIIDGAELLIPMAGLINKEDELARLAKEVAKIE
GEISRIENKLANEGFVARAPEAVIAKEREKLEGYAEAKAKLIEQQAVIAAL
'''
predict_membrane_proteins(sequence)

array([False])

In [70]:
sequence = '''
MPKPADHRNHAAVSTSVLSALFLGAGAALLSACSSPQHASTVPGTTPSIWTGSPAPSGLS
GHDEESPGAQSLTSTLTAPDGTKVATAKFEFANGYATVTIATTGVGKLTPGFHGLHIHQV
GKCEPNSVAPTGGAPGNFLSAGGHYHVPGHTGTPASGDLASLQVRGDGSAMLVTTTDAFT
MDDLLSGAKTAIIIHAGADNFANIPPERYVQVNGTPGPDETTLTTGDAGKRVACGVIGSG
'''
predict_membrane_proteins(sequence)

array([ True])

In [71]:
sequence = '''
MDKIEVRGARTHNLKNINLVIPRDKLIVVTGLSGSGKSSLAFDTLYAEGQRRYVESLSAY
ARQFLSLMEKPDVDHIEGLSPAISIEQKSTSHNPRSTVGTITEIHDYLRLLFARVGEPRC
PDHDVPLAAQTVSQMVDNVLSQPEGKRLMLLAPIIKERKGEHTKTLENLASQGYIRARID
GEVCDLSDPPKLELQKKHTIEVVVDRFKVRDDLTQRLAESFETALELSGGTAVVADMDDP
KAEELLFSANFACPICGYSMRELEPRLFSFNNPAGACPTCDGLGVQQYFDPDRVIQNPEL
SLAGGAIRGWDRRNFYYFQMLKSLADHYKFDVEAPWGSLSANVHKVVLYGSGKENIEFKY
MNDRGDTSIRRHPFEGVLHNMERRYKETESSAVREELAKFISNRPCASCEGTRLRREARH
VYVENTPLPAISDMSIGHAMEFFNNLKLAGQRAKIAEKILKEIGDRLKFLVNVGLNYLTL
SRSAETLSGGEAQRIRLASQIGAGLVGVMYVLDEPSIGLHQRDNERLLGTLIHLRDLGNT
VIVVEHDEDAIRAADHVIDIGPGAGVHGGEVVAEGPLEAIMAVPESLTGQYMSGKRKIEV
PKKRVPANPEKVLKLTGARGNNLKDVTLTLPVGLFTCITGVSGSGKSTLINDTLFPIAQR
QLNGATIAEPAPYRDIQGLEHFDKVIDIDQSPIGRTPRSNPATYTGVFTPVRELFAGVPE
SRARGYTPGRFSFNVRGGRCEACQGDGVIKVEMHFLPDIYVPCDQCKGKRYNRETLEIKY
KGKTIHEVLDMTIEEAREFFDAVPALARKLQTLMDVGLTYIRLGQSATTLSGGEAQRVKL
ARELSKRGTGQTLYILDEPTTGLHFADIQQLLDVLHKLRDQGNTIVVIEHNLDVIKTADW
IVDLGPEGGSGGGEILVSGTPETVAECEASHTARFLKPML
'''

predict_membrane_proteins(sequence)

array([False])