### BIO727P - Bioinformatics Software Development Group Project (2019/20)

#### AIM: To populate all the databases using SQLAlchemy

In [1]:
# Python version: Python 3.7.4

# import the required packages

# !pip install sqlalchemy
# !pip install pandas

from sqlalchemy import Column, Integer, String, ForeignKey
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy.orm import sessionmaker
import pandas as pd

In [2]:
# create engine that will allow us to communicate with the database
engine = create_engine('sqlite:///c:\\sqlite\\kinases.db', echo=True)

# create a session which is the middle ground to talk to our engine
Session = sessionmaker(bind=engine)
session = Session()

In [3]:
Base = declarative_base()

In [4]:
# define the structure of our database 
class HumanKinases(Base):
    __tablename__ = 'human_kinases'
    Entry_name = Column(String(15))
    UniProt_ID = Column(String(15), primary_key=True)
    Primary_Protein_Name = Column (String(100))
    Alternative_Protein_Name = Column(String(350)) 
    Gene_Symbol = Column(String(15))
    Alternative_Gene_Name = Column(String(60)) 
    Families = Column(String(175))
    AA_Seq = Column(String(34400))
    Molecular_Mass = Column(String(10))
    Subcellular_Location = Column(String(350))

In [5]:
class Phosphosites(Base):
    __tablename__ = 'phosphosites'
    GENE = Column(String(20))
    PROTEIN = Column(String(20))
    ACC_ID = Column(String(19))
    HU_CHR_LOC = Column(String(26))
    MOD_RSD = Column(String(6))
    SITE_GRP_ID = Column(String(10))
    MW_kD = Column(Integer)
    DOMAIN = Column(String(30)) 
    SITE_7_AA = Column(String(15))
    LT_LIT = Column(Integer)
    MS_LIT = Column(Integer)
    MS_CST = Column(Integer)
    CST_CAT = Column(String(141))
    PHOS_ID = Column(String(31))
    PHOS_ID2 = Column(String(26))
    PHOS_ID3 = Column(String(32))
    PHOS_ID4 = Column(String(25))
    PHOS_ID5 = Column(String(24), primary_key=True)
    SOURCE = Column (String(66))
    SEQUENCE = Column(String(3500)) # NOT INCLUDED THE LENGTH IN THE FILE
    PMID = Column(String(8))
    ISOFORM = Column(Integer)
    ID_PH = Column(String(9))

In [6]:
class Inhibitors(Base):
    __tablename__ = 'inhibitors'
    Inhibitor = Column (String(150), primary_key=True)
    Ki_nM = Column (Integer) # does entry n/a affect?
    IC50_nM = Column (Integer) # does entry n/a affect?
    Kd_nM = Column (Integer) # does entry n/a affect?
    EC50_nM = Column (Integer) # does entry n/a affect?
    POC = Column (Integer) # does entry n/a affect?
    Source = Column (String(15))
    IMG_URL = Column (String(100))
    ID_IN = Column (String(10))
    
#     inhibitors = relationship("Inhibitors", backref="human_kinases")

In [7]:
class KinasesPhosphosites(Base):
    __tablename__ = 'kinases_phosphosites'
    GENE = Column(String(13))
    KIN_ACC_ID = Column (String(9), ForeignKey('human_kinases.UniProt_ID') )
    SUB_ACC_ID = Column (String(17))
    IN_VIVO_RXN = Column(String(1))
    IN_VITRO_RXN = Column(String(1))
    CST_CAT = Column(String(141))
    PHOS_ID = Column(String(31)) # ForeignKey('phosphosites.PHOS_ID5'))
    PHOS_ID2 = Column(String(23))
    PHOS_ID3 = Column(String(29))
    PHOS_ID4 = Column(String(25))
    PHOS_ID5 = Column(String(23), ForeignKey('phosphosites.PHOS_ID5'))
    SOURCE = Column (String(64))
    SEQUENCE = Column (String(8797))
    PMID = Column (String(8))
    ID_KS = Column(String(9), primary_key=True)
    
#     kinases_phosphosites = relationship("KinasesPhosphosites", backref="human_kinases")
#     kinases_phosphosites = relationship("KinasesPhosphosites", backref="phosphosites")

In [8]:
class PhosphositesDiseases(Base):
    __tablename__ = 'phosphosites_diseases'
    DISEASE = Column(String(92))
    ALTERATION = Column(String(32)) # sometimes multiples separated by ";" ERROR in sql
    ACC_ID = Column(String(16))
    PMIDs = Column(String(8))
    LT_LIT = Column(Integer)
    MS_LIT = Column(Integer)
    MS_CST = Column(Integer)
    CST_CAT = Column(String(141)) # sometimes multiples separated by ";" ERROR in sql
    NOTES = Column(String(314))
    PHOS_ID = Column(String(22), ForeignKey('phosphosites.PHOS_ID5'))  # duplicates
    ID_PD = Column(String(9), primary_key=True)
    
#     kinases_phosphosites = relationship("PhosphositesDiseases", backref="phosphosites")

In [9]:
class InhibKin(Base):
    __tablename__ = 'inhib_kin'
    Kinase = Column(String(30), ForeignKey('human_kinases.UniProt_ID')) # this record MUST to have _HUMAN to match
    Inhibitor = Column (String(150), ForeignKey('inhibitors.Inhibitor'))
    ID_KI = Column (String(10), primary_key=True)
    
#     inhib_kin = relationship("InhibKin", backref="inhibitors")

In [10]:
# define all the relationships between tables

kinases_phosphosites = relationship("KinasesPhosphosites", backref="human_kinases")
inhibitors = relationship("Inhibitors", backref="human_kinases")

kinases_phosphosites = relationship("KinasesPhosphosites", backref="phosphosites")
kinases_phosphosites = relationship("PhosphositesDiseases", backref="phosphosites")

inhib_kin = relationship("InhibKin", backref="inhibitors")

In [11]:
Base.metadata.create_all(engine)

2020-01-23 13:38:57,755 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2020-01-23 13:38:57,757 INFO sqlalchemy.engine.base.Engine ()
2020-01-23 13:38:57,759 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2020-01-23 13:38:57,762 INFO sqlalchemy.engine.base.Engine ()
2020-01-23 13:38:57,767 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("human_kinases")
2020-01-23 13:38:57,769 INFO sqlalchemy.engine.base.Engine ()
2020-01-23 13:38:57,772 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info("human_kinases")
2020-01-23 13:38:57,773 INFO sqlalchemy.engine.base.Engine ()
2020-01-23 13:38:57,776 INFO sqlalchemy.engine.base.Engine PRAGMA main.table_info("phosphosites")
2020-01-23 13:38:57,777 INFO sqlalchemy.engine.base.Engine ()
2020-01-23 13:38:57,779 INFO sqlalchemy.engine.base.Engine PRAGMA temp.table_info("phosphosites")
2020-01-23 13:38:57,781 INFO sqlalchemy.engine.base.Engine

In [12]:
kinases=pd.read_csv("human_kinase_dataframe.csv")

phosphosites=pd.read_csv("phosphosites.csv")

inhibs=pd.read_csv("inhibitors.csv")

phospho_kinases=pd.read_csv("kinases_phosphosites.csv")

phospho_diseases=pd.read_csv("phosphosites_diseases.csv")

inhib_kinases=pd.read_csv("inhib_kin.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [13]:
print (phospho_kinases.iloc[0])

GENE                                                      EIF2AK1
KIN_ACC_ID                                                 Q9BQI3
SUB_ACC_ID                                                 P05198
IN_VIVO_RXN                                                      
IN_VITRO_RXN                                                    X
CST_CAT#                                   3597; 9721; 3398; 5199
PHOS_ID                                         EIF2S1_HUMAN(S52)
PHOS_ID2                                          EIF2-ALPHA(S52)
PHOS_ID3                                    EIF2-ALPHA_HUMAN(S52)
PHOS_ID4                                              EIF2S1(S52)
PHOS_ID5                                              P05198(S52)
SOURCE          http://www.phosphosite.org/uniprotAccAction?id...
SEQUENCE                                                      NaN
PMID                                                          NaN
KIN_ACC_ID_2                                               Q9BQI3
ID_KS     

In [14]:
for k in range(len(kinases)):
    record = HumanKinases(**{
        "Entry_name" : kinases.iloc[k, 0],
        "UniProt_ID" : kinases.iloc[k, 1],
        "Primary_Protein_Name" : kinases.iloc[k, 2],
        "Alternative_Protein_Name" : kinases.iloc[k, 3],
        "Gene_Symbol" : kinases.iloc[k, 4],
        "Alternative_Gene_Name" : kinases.iloc[k, 5],
        "Families" : kinases.iloc[k, 6],
        "AA_Seq" : kinases.iloc[k, 7],
        "Molecular_Mass" : kinases.iloc[k, 8],
        "Subcellular_Location" : kinases.iloc[k, 9]
    })
    session.add(record) # add all the records

session.commit() # commit all the records

session.close() # close the connection

print ("Finished")

2020-01-23 13:39:02,063 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-01-23 13:39:02,100 INFO sqlalchemy.engine.base.Engine INSERT INTO human_kinases ("Entry_name", "UniProt_ID", "Primary_Protein_Name", "Alternative_Protein_Name", "Gene_Symbol", "Alternative_Gene_Name", "Families", "AA_Seq", "Molecular_Mass", "Subcellular_Location") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2020-01-23 13:39:02,102 INFO sqlalchemy.engine.base.Engine (('AKT1_HUMAN', 'P31749', 'RAC-alpha serine/threonine-protein kinase       ', 'Protein kinase B, PKB, Protein kinase B alpha, PKB alpha, Proto-oncogene c-Akt, RAC-PK-alpha', 'AKT1', 'PKB, RAC', 'Protein kinase superfamily, AGC Ser/Thr protein kinase family, RAC subfamily', 'MSDVAIVKEGWLHKRGEYIKTWRPRYFLLKNDGTFIGYKERPQDVDQREAPLNNFSVAQCQLMKTERPRPNTFIIRCLQWTTVIERTFHVETPEEREEWTTAIQTVADGLKKQEEEEMDFRSGSPSDNSGAEEMEVSLAKPKHRVTMNE ... (182 characters truncated) ... WWGLGVVMYEMMCGRLPFYNQDHEKLFELILMEEIRFPRTLGPEAKSLLSGLLKKDPKQRLGGGSEDAKEIMQHRFFAGIVWQHVYEKKLSPPFKPQ

In [15]:
for p in range(len(phosphosites)):
    record = Phosphosites(**{
        "GENE" : phosphosites.iloc[p, 0],
        "PROTEIN" : phosphosites.iloc[p, 1],
        "ACC_ID" : phosphosites.iloc[p, 2],
        "HU_CHR_LOC" : phosphosites.iloc[p, 3],
        "MOD_RSD" : phosphosites.iloc[p, 4],
        "SITE_GRP_ID" : phosphosites.iloc[p, 5],
        "MW_kD" : phosphosites.iloc[p, 6],
        "DOMAIN" : phosphosites.iloc[p, 7],
        "SITE_7_AA" : phosphosites.iloc[p, 8],
        "LT_LIT" : phosphosites.iloc[p, 9],
        "MS_LIT" : phosphosites.iloc[p, 10],
        "MS_CST" : phosphosites.iloc[p, 11],
        "CST_CAT" : phosphosites.iloc[p, 12],
        "PHOS_ID" : phosphosites.iloc[p, 13],
        "PHOS_ID2" : phosphosites.iloc[p, 14],
        "PHOS_ID3" : phosphosites.iloc[p, 15],
        "PHOS_ID4" : phosphosites.iloc[p, 16],
        "PHOS_ID5" : phosphosites.iloc[p, 17],
        "SOURCE" : phosphosites.iloc[p, 18],
        "SEQUENCE" : phosphosites.iloc[p, 19],
        "PMID" : phosphosites.iloc[p, 20],
        "ISOFORM" : phosphosites.iloc[p, 21],
        "ID_PH" : phosphosites.iloc[p, 22]
    })
    session.add(record) # add all the records

session.commit() # commit all the records

session.close() # close the connection

print ("Finished")

2020-01-23 13:41:17,758 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-01-23 13:41:41,606 INFO sqlalchemy.engine.base.Engine INSERT INTO phosphosites ("GENE", "PROTEIN", "ACC_ID", "HU_CHR_LOC", "MOD_RSD", "SITE_GRP_ID", "MW_kD", "DOMAIN", "SITE_7_AA", "LT_LIT", "MS_LIT", "MS_CST", "CST_CAT", "PHOS_ID", "PHOS_ID2", "PHOS_ID3", "PHOS_ID4", "PHOS_ID5", "SOURCE", "SEQUENCE", "PMID", "ISOFORM", "ID_PH") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2020-01-23 13:41:41,608 INFO sqlalchemy.engine.base.Engine (('YWHAB', '14-3-3 beta', 'P31946', '20q13.12', 'T2', 15718712.0, 28.08, nan, '______MtMDksELV', nan, 3.0, 1.0, nan, 'YWHAB_HUMAN(T2)', '14-3-3 BETA(T2)', '14-3-3 BETA_HUMAN(T2)', 'YWHAB(T2)', 'P31946(T2)', 'http://www.phosphosite.org/uniprotAccAction?id=P31946', nan, nan, 0, 'PH0000001'), ('YWHAB', '14-3-3 beta', 'P31946', '20q13.12', 'S6', 15718709.0, 28.08, nan, '__MtMDksELVQkAk', nan, 8.0, nan, nan, 'YWHAB_HUMAN(S6)', '14-3-3 BETA(S6)', '14-3-3

In [16]:
for i in range(len(inhibs)):
    record = Inhibitors(**{
        "Inhibitor" : inhibs.iloc[i, 0],
        "Ki_nM" : inhibs.iloc[i, 1],
        "IC50_nM" : inhibs.iloc[i, 2],
        "Kd_nM" : inhibs.iloc[i, 3],
        "EC50_nM" : inhibs.iloc[i, 4],
        "POC" : inhibs.iloc[i, 5],
        "Source" : inhibs.iloc[i, 6],
        "IMG_URL" : inhibs.iloc[i, 7],
        "ID_IN" : inhibs.iloc[i, 8]
    })
    session.add(record) # add all the records
    
session.commit() # commit all the records

session.close() # close the connection

print ("Finished")

2020-01-23 13:42:06,395 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-01-23 13:42:08,424 INFO sqlalchemy.engine.base.Engine INSERT INTO inhibitors ("Inhibitor", "Ki_nM", "IC50_nM", "Kd_nM", "EC50_nM", "POC", "Source", "IMG_URL", "ID_IN") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
2020-01-23 13:42:08,425 INFO sqlalchemy.engine.base.Engine (('CHEMBL253116', nan, '1000', nan, nan, nan, 'BindingDB', 'http://gemdock.life.nctu.edu.tw/kidfammap/data/png/BindingDB/BD50196392.png', 'IN0000001'), ('CHEMBL366185', nan, '10000', nan, nan, nan, 'BindingDB', 'http://gemdock.life.nctu.edu.tw/kidfammap/data/png/BindingDB/BD50168249.png', 'IN0000002'), ('CHEMBL193172', nan, '1140', nan, nan, nan, 'BindingDB', 'http://gemdock.life.nctu.edu.tw/kidfammap/data/png/BindingDB/BD50168239.png', 'IN0000003'), ('CHEMBL193162', nan, '1280', nan, nan, nan, 'BindingDB', 'http://gemdock.life.nctu.edu.tw/kidfammap/data/png/BindingDB/BD50168236.png', 'IN0000004'), ('CHEMBL364499', nan, '1360', nan, nan, nan, 'Bindi

In [17]:
for kp in range(len(phospho_kinases)):
    record = KinasesPhosphosites(**{
        "GENE" : phospho_kinases.iloc[kp, 0],
        "KIN_ACC_ID" : phospho_kinases.iloc[kp, 1],
        "SUB_ACC_ID" : phospho_kinases.iloc[kp, 2],
        "IN_VIVO_RXN" : phospho_kinases.iloc[kp, 3],
        "IN_VITRO_RXN" : phospho_kinases.iloc[kp, 4],
        "CST_CAT" : phospho_kinases.iloc[kp, 5],
        "PHOS_ID" : phospho_kinases.iloc[kp, 6],
        "PHOS_ID2" : phospho_kinases.iloc[kp, 7],
        "PHOS_ID3" : phospho_kinases.iloc[kp, 8],
        "PHOS_ID4" : phospho_kinases.iloc[kp, 9],
        "PHOS_ID5" : phospho_kinases.iloc[kp, 10],
        "SOURCE" : phospho_kinases.iloc[kp, 11],
        "SEQUENCE" : phospho_kinases.iloc[kp, 12],
        "PMID" : phospho_kinases.iloc[kp, 13],
        "ID_KS" : phospho_kinases.iloc[kp, 15]
    })
    session.add(record) # add all the records
    
session.commit() # commit all the records

session.close() # close the connection

print ("Finished")

2020-01-23 13:42:14,920 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-01-23 13:42:15,698 INFO sqlalchemy.engine.base.Engine INSERT INTO kinases_phosphosites ("GENE", "KIN_ACC_ID", "SUB_ACC_ID", "IN_VIVO_RXN", "IN_VITRO_RXN", "CST_CAT", "PHOS_ID", "PHOS_ID2", "PHOS_ID3", "PHOS_ID4", "PHOS_ID5", "SOURCE", "SEQUENCE", "PMID", "ID_KS") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2020-01-23 13:42:15,701 INFO sqlalchemy.engine.base.Engine (('EIF2AK1', 'Q9BQI3', 'P05198', ' ', 'X', '3597; 9721; 3398; 5199', 'EIF2S1_HUMAN(S52)', 'EIF2-ALPHA(S52)', 'EIF2-ALPHA_HUMAN(S52)', 'EIF2S1(S52)', 'P05198(S52)', 'http://www.phosphosite.org/uniprotAccAction?id=P05198', nan, nan, 'KS0000001'), ('EIF2AK1', 'Q9BQI3', 'P05198', ' ', 'X', nan, 'EIF2S1_HUMAN(S49)', 'EIF2-ALPHA(S49)', 'EIF2-ALPHA_HUMAN(S49)', 'EIF2S1(S49)', 'P05198(S49)', 'http://www.phosphosite.org/uniprotAccAction?id=P05198', nan, nan, 'KS0000002'), ('PRKCD', 'Q05655', 'Q9UQL6', ' ', 'X', '3443', 'HDAC5_HUMAN(S259)', 'HDAC5

In [18]:
for dp in range(len(phospho_diseases)):
    record = PhosphositesDiseases(**{
        "DISEASE" : phospho_diseases.iloc[dp, 0],
        "ALTERATION" : phospho_diseases.iloc[dp, 1],
        "ACC_ID" : phospho_diseases.iloc[dp, 2],
        "PMIDs" : phospho_diseases.iloc[dp, 3],
        "LT_LIT" : phospho_diseases.iloc[dp, 4],
        "MS_LIT" : phospho_diseases.iloc[dp, 5],
        "MS_CST" : phospho_diseases.iloc[dp, 6],
        "CST_CAT" : phospho_diseases.iloc[dp, 7],
        "NOTES" : phospho_diseases.iloc[dp, 8],
        "PHOS_ID" : phospho_diseases.iloc[dp, 9],
        "ID_PD" : phospho_diseases.iloc[dp, 10]
    })
    session.add(record) # add all the records
    
session.commit() # commit all the records

session.close() # close the connection

print ("Finished")

2020-01-23 13:42:16,573 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-01-23 13:42:16,631 INFO sqlalchemy.engine.base.Engine INSERT INTO phosphosites_diseases ("DISEASE", "ALTERATION", "ACC_ID", "PMIDs", "LT_LIT", "MS_LIT", "MS_CST", "CST_CAT", "NOTES", "PHOS_ID", "ID_PD") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
2020-01-23 13:42:16,632 INFO sqlalchemy.engine.base.Engine (("Alzheimer's disease", 'mutation of site', 'P05067-4', 16554819, 36, 5, 3, '6986; 3823', 'Pin1 binds to the pT668-Pro motif but not when mutated to Ala. ', 'P05067-4(T668)', 'DP0000001'), ('breast cancer', 'increased', 'P31749', 12244301, 1404, 19, 44, '88106; 9271; 3787; 4060; 5315; 9563; 4070; 9018; 15116; 4051; 5102; 4075; 5012; 4071; 4054; 4058; 11962; 2336; 2337; 12694', 'Expression of phosphorylated S473 in invasive breast carcinomas correlates with cytosolic p27.  ', 'P31749(S473)', 'DP0000002'), ("Alzheimer's disease", 'increased', 'P05067-4', 14970211, 36, 5, 3, '6986; 3823', nan, 'P05067-4(T668)',

In [19]:
for ik in range(len(inhib_kinases)):
    record = InhibKin(**{
        "Kinase" : inhib_kinases.iloc[ik, 0],
        "Inhibitor" : inhib_kinases.iloc[ik, 1],
        "ID_KI" : inhib_kinases.iloc[ik, 3]
    })
    session.add(record) # add all the records
    
session.commit() # commit all the records

session.close() # close the connection

print ("Finished")

2020-01-23 13:42:24,169 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2020-01-23 13:42:26,050 INFO sqlalchemy.engine.base.Engine INSERT INTO inhib_kin ("Kinase", "Inhibitor", "ID_KI") VALUES (?, ?, ?)
2020-01-23 13:42:26,052 INFO sqlalchemy.engine.base.Engine (('RPS6KB2', 'CHEMBL253116', 'KI0000001'), ('RPS6KB2', 'CHEMBL366185', 'KI0000002'), ('RPS6KB2', 'CHEMBL193172', 'KI0000003'), ('RPS6KB2', 'CHEMBL193162', 'KI0000004'), ('RPS6KB2', 'CHEMBL364499', 'KI0000005'), ('RPS6KB2', 'CHEMBL437331', 'KI0000006'), ('RPS6KB2', 'CHEMBL426445', 'KI0000007'), ('RPS6KB2', 'CHEMBL190143', 'KI0000008')  ... displaying 10 of 58978 total bound parameter sets ...  ('EGFR', 'POX', 'KI0058977'), ('EGFR', 'STU', 'KI0058978'))
2020-01-23 13:42:28,137 INFO sqlalchemy.engine.base.Engine COMMIT
Finished


In [40]:
# engine.execute("SELECT * FROM phosphosites").fetchall()
engine.execute("SELECT * FROM human_kinases WHERE Subcellular_Location LIKE '%Nucleus%'").fetchall()
# engine.execute("SELECT * FROM human_kinases WHERE Subcellular_Location LIKE 'Nucleus'").fetchall()

2020-01-23 15:29:29,980 INFO sqlalchemy.engine.base.Engine SELECT * FROM human_kinases WHERE Subcellular_Location LIKE '%Nucleus%'
2020-01-23 15:29:29,982 INFO sqlalchemy.engine.base.Engine ()


[('AKT1_HUMAN', 'P31749', 'RAC-alpha serine/threonine-protein kinase       ', 'Protein kinase B, PKB, Protein kinase B alpha, PKB alpha, Proto-oncogene c-Akt, RAC-PK-alpha', 'AKT1', 'PKB, RAC', 'Protein kinase superfamily, AGC Ser/Thr protein kinase family, RAC subfamily', 'MSDVAIVKEGWLHKRGEYIKTWRPRYFLLKNDGTFIGYKERPQDVDQREAPLNNFSVAQCQLMKTERPRPNTFIIRCLQWTTVIERTFHVETPEEREEWTTAIQTVADGLKKQEEEEMDFRSGSPSDNSGAEEMEVSLAKPKHRVTMNE ... (182 characters truncated) ... WWGLGVVMYEMMCGRLPFYNQDHEKLFELILMEEIRFPRTLGPEAKSLLSGLLKKDPKQRLGGGSEDAKEIMQHRFFAGIVWQHVYEKKLSPPFKPQVTSETDTRYFDEEFTAQMITITPPDQDDSMECVDSERRPHFPQFSYSASGTA', '55,686', 'Cell membrane, Cytoplasm, Nucleus'),
 ('AKT2_HUMAN', 'P31751', 'RAC-beta serine/threonine-protein kinase     ', 'Protein kinase Akt-2, Protein kinase B beta, PKB beta, RAC-PK-beta', 'AKT2', None, 'Protein kinase superfamily, AGC Ser/Thr protein kinase family, RAC subfamily', 'MNEVSVIKEGWLHKRGEYIKTWRPRYFLLKSDGSFIGYKERPEAPDQTLPPLNNFSVAECQLMKTERPRPNTFVIRCLQWTTVIERTFHVDSPDEREEWM