In [37]:
import sys
import os
import json

# Add src directory (parent of Composer) to sys.path
project_root = os.path.dirname(os.path.dirname(os.path.abspath(os.getcwd())))
src_path = os.path.join(project_root, "src")
sys.path.append(src_path)

from Composer import uniprot_utils
from Composer import protein_utils

Demo: get_protein_info

In [38]:
example_id = "P01308"  # Human insulin
protein_data = uniprot_utils.get_protein_info(example_id)

print(f"Protein ID: {example_id}")
print("Full UniProtKB entry (formatted JSON):\n")
print(json.dumps(protein_data, indent=4, ensure_ascii=False))

Protein ID: P01308
Full UniProtKB entry (formatted JSON):

{
    "entryType": "UniProtKB reviewed (Swiss-Prot)",
    "primaryAccession": "P01308",
    "secondaryAccessions": [
        "Q5EEX2"
    ],
    "uniProtkbId": "INS_HUMAN",
    "entryAudit": {
        "firstPublicDate": "1986-07-21",
        "lastAnnotationUpdateDate": "2025-06-18",
        "lastSequenceUpdateDate": "1986-07-21",
        "entryVersion": 281,
        "sequenceVersion": 1
    },
    "annotationScore": 5.0,
    "organism": {
        "scientificName": "Homo sapiens",
        "commonName": "Human",
        "taxonId": 9606,
        "lineage": [
            "Eukaryota",
            "Metazoa",
            "Chordata",
            "Craniata",
            "Vertebrata",
            "Euteleostomi",
            "Mammalia",
            "Eutheria",
            "Euarchontoglires",
            "Primates",
            "Haplorrhini",
            "Catarrhini",
            "Hominidae",
            "Homo"
        ]
    },
    "protei

Demo: get_protein_fields

In [46]:
example_id = "P69905"  # Human Hemoglobin subunit alpha
fields = "go_f"

protein_data = uniprot_utils.get_protein_fields(example_id, fields)

print(f"Protein ID: {example_id}")
print("Selected fields from UniProtKB entry (formatted JSON):\n")
print(json.dumps(protein_data, indent=4, ensure_ascii=False))

Protein ID: P69905
Selected fields from UniProtKB entry (formatted JSON):

{
    "entryType": "UniProtKB reviewed (Swiss-Prot)",
    "primaryAccession": "P69905",
    "uniProtKBCrossReferences": [
        {
            "database": "GO",
            "id": "GO:0072562",
            "properties": [
                {
                    "key": "GoTerm",
                    "value": "C:blood microparticle"
                },
                {
                    "key": "GoEvidenceType",
                    "value": "HDA:UniProtKB"
                }
            ],
            "evidences": [
                {
                    "evidenceCode": "ECO:0007005",
                    "source": "PubMed",
                    "id": "22516433"
                }
            ]
        },
        {
            "database": "GO",
            "id": "GO:0005829",
            "properties": [
                {
                    "key": "GoTerm",
                    "value": "C:cytosol"
                },
    

Demo: list_protein_keys

Demo: get_protein_key_value

In [41]:
# Example: retrieve a whole dict
audit_info = uniprot_utils.get_protein_key_value("A0A0C5B5G6", "entryAudit")
print("Entry audit info:")
print(audit_info)

# Example: retrieve a list
evidences_list = uniprot_utils.get_protein_key_value("A0A0C5B5G6", "organism.evidences")
print("\nEvidences list:")
print(evidences_list)

# Example: retrieve a string value
sci_name = uniprot_utils.get_protein_key_value("A0A0C5B5G6", "organism.scientificName")
print("\nScientific name:")
print(sci_name)

Entry audit info:
{'firstPublicDate': '2016-04-13', 'lastAnnotationUpdateDate': '2025-06-18', 'lastSequenceUpdateDate': '2015-04-29', 'entryVersion': 29, 'sequenceVersion': 1}

Evidences list:
[{'evidenceCode': 'ECO:0000312', 'source': 'EMBL', 'id': 'AJM13597.1'}]

Scientific name:
Homo sapiens
