# ENVs

In [24]:
dir_pjs         = "/home/martingb/Projects/"
# caid-reference
dir_main        = f"{dir_pjs}2022/caid2-reference/"
dir_data        = f"{dir_main}data/"
dir_data_lists  = f"{dir_data}lists/"
dir_data_tables = f"{dir_data}tables/"
dir_src         = f"{dir_main}src/"
dir_src_modules = f"{dir_src}modules/"
dir_tmp         = f"{dir_main}tmp/"

# Imports

In [25]:
import os
import json
import requests
import pandas as pd
from pprint import pprint

# Functions

In [None]:
from typing import Any, Literal, get_args
from requests import RequestException
from enum import Enum

class Disprot_versions(Enum):
    DATE10 = "2022_06"
    DATE9 = "2022_03"
    DATE8 = "2021_12"
    DATE7 = "2021_08"
    DATE6 = "2021_06"
    DATE5 = "2020_12"
    DATE4 = "2020_06"
    DATE3 = "2019_09"
    DATE2 = "2018_11"
    DATE1 = "2016_10"

class Disprot_namespaces(Enum):
    NS5 = "structural_state"
    NS4 = "transition_state"
    NS3 = "interaction_partner"
    NS2 = "disorder_function"
    NS1 = "all"


def get_disprot_release( version  : Literal[ "2022_06"
                                           , "2022_03"
                                           , "2021_12"
                                           , "2021_08"
                                           , "2021_06"
                                           , "2020_12"
                                           , "2020_06"
                                           , "2019_09"
                                           , "2018_11"
                                           , "2016_10" ]
                       , namespace: Literal[ "structural_state"
                                           , "transition_state"
                                           , "interaction_partner"
                                           , "disorder_function"
                                           , "all" ]
                       , ambiguos : str="false"
                       , obsolete : str="false"
                       , format   : Literal[ "json"
                                           , "fasta"
                                           , "tsv" ] = "json"
                       , consensus: str="true" ):
    # """
    # Get a DisProt release. 
    # Observations:
    #     * Uses DisProt API: 'https://disprot.org/api'
    #     * By default it returns a json.
    # https://disprot.org/api/search?release=curator&show_ambiguous=false&show_obsolete=false&format=fasta&namespace=structural_state&get_consensus=true

    # https://mobidb.bio.unipd.it/mobidb3_datasets/latest/derived_disorder.mjson.gz
    # """
    versions   = Literal[ "2022_06", "2022_03", "2021_12", "2021_08", "2021_06"
                        , "2020_12", "2020_06", "2019_09", "2018_11", "2016_10" ]
    namespaces = Literal[ "structural_state", "transition_state"
                        , "interaction_partner", "disorder_function", "all" ]
    formats    = Literal[ "json", "fasta", "tsv" ]

    assert version   in get_args(versions), f"'{version}' not a valid value for version."
    assert namespace in get_args(namespaces), f"'{namespace}' not a valid value for namespace."
    assert format    in get_args(formats), f"'{format}' not a valid value for format."

    server = "https://disprot.org/api"
    endpoint = "search?"

    url = f"{server}/{endpoint}"

    r = requests.get(url, params = { "release"        : version
                                   , "show_ambiguous" : ambiguos
                                   , "show_obsolete"  : obsolete
                                   , "format"         : format
                                   , "namespace"      : namespace
                                   , "get_consensus"  : consensus } )

    print(r.url)
    #print(r.text)

    try:
        r.status_code == 200
    except RequestException as e:
        raise e (f"Request error: {r.status_code}")

    if format == "json":
        return r.json()
    elif format == "fasta" or format == "tsv":
        return r.text


def save_disprot_release_file( response_content: Any
                             , filename: str
                             , dir_output: str ) -> None:
    # """
    # Save the DisProt file.
    # """
    if dir_output.endswith("/"):
        dir_output = dir_output[:-1]
    os.makedirs(f"{dir_output}", exist_ok=True)
    with open(f"{dir_output}/{filename}", "w") as f:
        if isinstance(response_content, dict):
            f.writelines(json.dumps(response_content))
        else:
            f.writelines(response_content.strip())
    return


# Retrieve release file

In [27]:
aversion="2018_11"
#aversion="2022_06"

anamespace="structural_state"
#anamespace="interaction_partner"
#anamespace="all"

#aformat="json"
aformat="tsv"
#aformat="fasta"

arelease = get_disprot_release( version   = aversion
                              , namespace = anamespace
                              , format    = aformat )

https://disprot.org/api/search?release=2018_11&show_ambiguous=false&show_obsolete=false&format=tsv&namespace=structural_state&get_consensus=true


### Explore the release

In [28]:
print(type(arelease))
if isinstance(arelease, dict):
    print(f"data length: {arelease['size']}")
    #print(arelease["data"][0])
    pprint(arelease["data"][1])

<class 'str'>


# Save file

In [29]:
save_disprot_release_file( response_content = arelease
                         , filename         = f"disprot-{aversion}-{anamespace}.{aformat}"
                         , dir_output       = dir_tmp )