Skip to content

Commit

Permalink
added new column in uniprot_nosql
Browse files Browse the repository at this point in the history
  • Loading branch information
lzy7071 committed Feb 3, 2020
1 parent e50aefb commit 5e22f6b
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
2 changes: 1 addition & 1 deletion datanator/data_source/gene_ortholog.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def main():
)['datanator']['mongodb']['server']
manager = KeggGeneOrtholog(server, collection_str=collection_str, username=username,
password=password, des_db=des_db)
manager.load_data(skip=157723)
manager.load_data(skip=158673)

if __name__ == '__main__':
main()
5 changes: 3 additions & 2 deletions datanator/data_source/uniprot_nosql.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def load_uniprot(self, query=False, msg='', species=None):
msg (:obj:`str`, optional): Query message. Defaults to ''.
species (:obj:`list`, optional): species information to extract from df and loaded into uniprot. Defaults to None.
"""
fields = '&columns=id,entry name,genes(PREFERRED),protein names,sequence,length,mass,ec,database(GeneID),reviewed,organism-id,database(KO),genes(ALTERNATIVE),genes(ORF),genes(OLN),database(EMBL),database(RefSeq)'
fields = '&columns=id,entry name,genes(PREFERRED),protein names,sequence,length,mass,ec,database(GeneID),reviewed,organism-id,database(KO),genes(ALTERNATIVE),genes(ORF),genes(OLN),database(EMBL),database(RefSeq),Cross-reference (KEGG)'
if not query:
url = self.url + fields
else:
Expand All @@ -76,9 +76,10 @@ def load_uniprot(self, query=False, msg='', species=None):
data.columns = [
'uniprot_id', 'entry_name', 'gene_name', 'protein_name', 'canonical_sequence', 'length', 'mass',
'ec_number', 'entrez_id', 'status', 'ncbi_taxonomy_id', 'ko_number', 'gene_name_alt',
'gene_name_orf', 'gene_name_oln', 'sequence_embl', 'sequence_refseq'
'gene_name_orf', 'gene_name_oln', 'sequence_embl', 'sequence_refseq', 'kegg_org_gene'
]
data['entrez_id'] = data['entrez_id'].astype(str).str.replace(';', '')
data['kegg_org_gene'] = data['kegg_org_gene'].astype(str).str.replace(';', '')

try:
data['mass'] = data['mass'].str.replace(',', '')
Expand Down

0 comments on commit 5e22f6b

Please sign in to comment.