Skip to content

Commit

Permalink
addition of intact complex database
Browse files Browse the repository at this point in the history
  • Loading branch information
saapooch committed Nov 21, 2017
1 parent 5abcb08 commit 164541d
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 19 deletions.
100 changes: 84 additions & 16 deletions kinetic_datanator/data_source/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from six.moves.urllib.request import urlretrieve
import zipfile
from six import BytesIO
from ftplib import FTP
import os


Base = sqlalchemy.ext.declarative.declarative_base()

Expand Down Expand Up @@ -39,16 +42,81 @@ class ProteinInteractions(Base):
stoich_b = Column(String(255))
interaction_type = Column(String(255))

class ProteinComplex(Base):
""" Represents protein complexes from the IntAct Database
Attributes:
identifier (:obj:`str`):
name (:obj:`str`):
ncbi (:obj:`str`):
subunits (:obj:`str`):
evidence (:obj:`str`):
go_annot (:obj:`str`):
desc (:obj:`str`):
source (:obj:`str`):
"""
__tablename__ = 'Protein_Complex'

identifier = Column(String(255), primary_key = True)
name = Column(String(255))
ncbi = Column(String(255))
subunits = Column(String(255))
evidence = Column(String(255))
go_annot = Column(String(255))
desc = Column(String(255))
source = Column(String(255))

class IntAct(data_source.HttpDataSource):
""" A local SQLite copy of the IntAct Database"""
base_model = Base

ENDPOINT_DOMAINS = {'intact' : 'ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.zip'}
ENDPOINT_DOMAINS = {'intact' : 'ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.zip',
'complex' : 'ftp://ftp.ebi.ac.uk/pub/databases/intact/complex/current/complextab/'}


def load_content(self):

#Downloads Content from FTP Server
self.add_complex()
self.add_interactions()

def add_complex(self):
if not os.path.exists(self.cache_dirname+'/intact_complex'):
os.makedirs(self.cache_dirname+'/intact_complex')

ftp = FTP('ftp.ebi.ac.uk')
ftp.login()
ftp.cwd('/pub/databases/intact/complex/current/complextab/')
filenames = ftp.nlst()
if not os.path.exists(self.cache_dirname+'/intact_complex/'+filenames[0]):
for filename in filenames:
local_filename = self.cache_dirname+'/intact_complex/'+filename
file = open(local_filename, 'wb')
ftp.retrbinary('RETR '+filename, file.write)
file.close()
ftp.quit()

columns = ['#Complex ac', 'Recommended name', 'Taxonomy identifier',
'Identifiers (and stoichiometry) of molecules in complex', 'Experimental evidence' ,
'Go Annotations', 'Description', 'Source']

new_columns = ['identifier', 'name', 'ncbi', 'subunits', 'evidence', 'go_annot', 'desc', 'source']

files = os.listdir(self.cache_dirname+'/intact_complex')
for tsv in files:
if 'README' in tsv:
continue
else:
dt = pd.read_csv(self.cache_dirname+'/intact_complex/'+tsv, delimiter = '\t', encoding='utf-8')
pand = dt.loc[:, columns]
pand.columns = new_columns
pand = pand.set_index('identifier')
pand.to_sql(name = 'Protein_Complex', con=self.engine, if_exists = 'append')
self.session.commit()


def add_interactions(self):

if not self.cache_dirname + '/intact.txt':
path = urlretrieve(self.ENDPOINT_DOMAINS['intact'])
zipped = zipfile.ZipFile(BytesIO(path[0]))
Expand All @@ -70,18 +138,18 @@ def load_content(self):
pand.to_sql(name = 'Protein_Interactions', con=self.engine, if_exists = 'replace', chunksize = 1000)
self.session.commit()

# column_list = ["#ID(s) interactor A") , "ID(s) interactor B"), "Alt. ID(s) interactor A"),\
# "Alt. ID(s) interactor B"), "Alias(es) interactor A"), "Alias(es) interactor B"),\
# "Interaction detection method(s)"), "Publication 1st author(s)"), "Publication Identifier(s)"),\
# "Taxid interactor A"), "Taxid interactor B"), "Interaction type(s)"), "Source database(s)"),\
# "Interaction identifier(s)"), "Confidence value(s)"), "Expansion method(s)"), \
# "Biological role(s) interactor A"), "Biological role(s) interactor B"), \
# "Experimental role(s) interactor A"), "Experimental role(s) interactor B"), \
# "Type(s) interactor A"), "Type(s) interactor B"), "Xref(s) interactor A"),\
# "Xref(s) interactor B"), "Interaction Xref(s)"), "Annotation(s) interactor A"),\
# "Annotation(s) interactor B"), "Interaction annotation(s)"), "Host organism(s)"), \
# "Interaction parameter(s)"), "Creation date"), "Update date"), \
# "Checksum(s) interactor A"), "Checksum(s) interactor B"), "Interaction Checksum(s)"),\
# "Negative"), "Feature(s) interactor A"), "Feature(s) interactor B"),\
# "Stoichiometry(s) interactor A"), "Stoichiometry(s) interactor B"), "Identification method participant A"), \
# "Identification method participant B")]
# column_list = ["#ID(s) interactor A") , "ID(s) interactor B"), "Alt. ID(s) interactor A"),\
# "Alt. ID(s) interactor B"), "Alias(es) interactor A"), "Alias(es) interactor B"),\
# "Interaction detection method(s)"), "Publication 1st author(s)"), "Publication Identifier(s)"),\
# "Taxid interactor A"), "Taxid interactor B"), "Interaction type(s)"), "Source database(s)"),\
# "Interaction identifier(s)"), "Confidence value(s)"), "Expansion method(s)"), \
# "Biological role(s) interactor A"), "Biological role(s) interactor B"), \
# "Experimental role(s) interactor A"), "Experimental role(s) interactor B"), \
# "Type(s) interactor A"), "Type(s) interactor B"), "Xref(s) interactor A"),\
# "Xref(s) interactor B"), "Interaction Xref(s)"), "Annotation(s) interactor A"),\
# "Annotation(s) interactor B"), "Interaction annotation(s)"), "Host organism(s)"), \
# "Interaction parameter(s)"), "Creation date"), "Update date"), \
# "Checksum(s) interactor A"), "Checksum(s) interactor B"), "Interaction Checksum(s)"),\
# "Negative"), "Feature(s) interactor A"), "Feature(s) interactor B"),\
# "Stoichiometry(s) interactor A"), "Stoichiometry(s) interactor B"), "Identification method participant A"), \
# "Identification method participant B")]
12 changes: 9 additions & 3 deletions tests/data_source/test_intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,19 @@ def setUpClass(self):
def tearDownClass(self):
shutil.rmtree(self.cache_dirname)

def test_loading(self):

def test_add_complex(self):
q = self.intact.session.query(intact.ProteinComplex).get('EBI-1256672')

self.assertEqual(q.name, 'INO80 chromatin remodeling complex')
self.assertEqual(q.ncbi, '559292')
self.assertEqual(q.evidence, 'intact:EBI-1212520')


def test_add_interactions(self):
q = self.intact.session.query(intact.ProteinInteractions).filter_by(interactor_a = 'uniprotkb:P27986').count()
self.assertEqual(q, 274)

q = self.intact.session.query(intact.ProteinInteractions).filter_by(interactor_a = 'uniprotkb:Q61824').first()
self.assertEqual(q.interactor_b, 'uniprotkb:Q60631')
self.assertEqual(q.publications, 'pubmed:11127814|mint:MINT-5213342')

#TODO: Add more Tests

0 comments on commit 164541d

Please sign in to comment.