# ENVs

In [None]:
dir_pjs         = "/home/martingb/Projects/"
# caid-references
dir_main        = f"{dir_pjs}2022/caid2-reference/"
dir_data        = f"{dir_main}data/"
dir_data_lists  = f"{dir_data}lists/"
dir_data_tables = f"{dir_data}tables/"
dir_src         = f"{dir_main}src/"
dir_src_modules = f"{dir_src}modules/"
dir_tmp         = f"{dir_main}tmp/"

# Imports

In [131]:
import os
import json
import requests
import pandas as pd
from pprint import pprint

# Functions

In [106]:
from typing import Literal, get_args
from requests import RequestException
from enum import Enum

class Disprot_versions(Enum):
    DATE10 = '2022_06'
    DATE9 = '2022_03'
    DATE8 = '2021_12'
    DATE7 = '2021_08'
    DATE6 = '2021_06'
    DATE5 = '2020_12'
    DATE4 = '2020_06'
    DATE3 = '2019_09'
    DATE2 = '2018_11'
    DATE1 = '2016_10'

class Disprot_namespaces(Enum):
    NS5 = "structural_state"
    NS4 = "transition_state"
    NS3 = "interaction_partner"
    NS2 = "disorder_function"
    NS1 = "all"


def get_disprot_release( version  : Literal[ '2022_06'
                                           , '2022_03'
                                           , '2021_12'
                                           , '2021_08'
                                           , '2021_06'
                                           , '2020_12'
                                           , '2020_06'
                                           , '2019_09'
                                           , '2018_11'
                                           , '2016_10' ]
                       , namespace: Literal[ "structural_state"
                                           , "transition_state"
                                           , "interaction_partner"
                                           , "disorder_function"
                                           , "all" ]
                       , ambiguos : str='false'
                       , obsolete : str='false'
                       , format   : Literal[ 'json'
                                           , 'fasta'
                                           , 'tsv' ] = "json"
                       , consensus: str='true' ):

    """
    Get the DisProt release. By default it returns a json.
    https://disprot.org/api/search?release=curator&show_ambiguous=false&show_obsolete=false&format=fasta&namespace=structural_state&get_consensus=true

    https://mobidb.bio.unipd.it/mobidb3_datasets/latest/derived_disorder.mjson.gz
    """
    versions   = Literal[ '2022_06', '2022_03', '2021_12', '2021_08', '2021_06'
                        , '2020_12', '2020_06', '2019_09', '2018_11', '2016_10' ]
    namespaces = Literal[ "structural_state", "transition_state"
                        , "interaction_partner", "disorder_function", "all" ]
    formats    = Literal[ 'json', 'fasta', 'tsv' ]

    assert version   in get_args(versions), f"'{version}' not a valid value for version."
    assert namespace in get_args(namespaces), f"'{namespace}' not a valid value for namespace."
    assert format    in get_args(formats), f"'{format}' not a valid value for format."

    url = "https://disprot.org/api/search?"
    r = requests.get(url, params = { 'release'        : version
                                   , 'show_ambiguous' : ambiguos
                                   , 'show_obsolete'  : obsolete
                                   , 'format'         : format
                                   , 'namespace'      : namespace
                                   , 'get_consensus'  : consensus } )

    print(r.url)

    try:
        r.status_code == 200
    except RequestException as e:
        raise e (f'Request error: {r.status_code}')

    if format == "json":
        return r.json()
    elif format == "fasta":
        return r.text


# Load release file

In [133]:
aversion='2018_11'
#aversion='2022_06'
anamespace='structural_state'
#anamespace='interaction_partner'
#anamespace='all'
aformat='json'
#aformat='fasta'
arelease = get_disprot_release( version   = aversion
                              , namespace = anamespace
                              , format    = aformat )
#print(type(arelease))

https://disprot.org/api/search?release=2018_11&show_ambiguous=false&show_obsolete=false&format=json&namespace=structural_state&get_consensus=true


# Save file

In [128]:
with open(f'{dir_tmp}disprot-{aversion}-{anamespace}.{aformat}', 'w') as f:
    if isinstance(arelease, dict):
        f.writelines(json.dumps(arelease))
    else:
        f.writelines(arelease.strip())

In [139]:
print(f"data length: {arelease['size']}")
#print(arelease['data'][0])
pprint(arelease['data'][1])

data length: 1391
{'acc': 'P03045',
 'creator': 'aschramm',
 'date': '2016-09-07T19:11:25.000Z',
 'disorder_content': 1,
 'disprot_consensus': {'Interaction partner': [{'end': 107, 'start': 1}],
                       'Structural state': [],
                       'Structural transition': [{'end': 107,
                                                  'start': 1,
                                                  'type': 'T'}],
                       'full': [{'end': 107, 'start': 1, 'type': 'T'}]},
 'disprot_id': 'DP00005',
 'features': {'pfam': [{'end': 36,
                        'id': 'PF11438',
                        'name': 'Antitermination protein N, arginine-rich '
                                'motif',
                        'start': 2}]},
 'length': 107,
 'name': 'Antitermination protein N',
 'ncbi_taxon_id': 10710,
 'organism': 'Escherichia phage lambda',
 'regions': [{'curator_id': 'aschramm',
              'curator_name': 'Antoine Schramm',
              'date': '2016-0