In [8]:
from unipressed import *
import pandas as pd
import json
#https://rest.uniprot.org/uniprotkb/P02766

In [None]:
import pandas as pd
from IPython.display import display

def procesar_uniprot(uniProtID):
    data = UniprotkbClient.fetch_one(uniProtID, parse=True)

    # 1. Función
    df_function = pd.DataFrame([{
        "Function": txt.get("value", ""),
        "EvidenceCode": ev.get("evidenceCode", "⚠️ No se han encontrado datos."),
        "QuickGO": f"https://www.ebi.ac.uk/QuickGO/term/{ev['evidenceCode']}" if ev.get("evidenceCode") else "⚠️ No se han encontrado datos.",
        "Source": ev.get("source","⚠️ No se han encontrado datos."),
        "PublicationID": ev.get("id","⚠️ No se han encontrado datos."),
        "PubMed": f"https://pubmed.ncbi.nlm.nih.gov/{ev['id']}" if ev.get("id") else "⚠️ No se han encontrado datos.",

    } for comment in data.get("comments", []) if comment.get("commentType") == "FUNCTION"
      for txt in comment.get("texts", [])
      for ev in txt.get("evidences", [{}])
      ])

    # 2. Subcellular Location (desnormalizado)

    df_subcellular = pd.DataFrame([{
        "Value": loc.get("location", {}).get("value", ""),
        "ID": loc.get("location", {}).get("id", "")
    }
    for comment in data.get("comments", []) if comment.get("commentType") == "SUBCELLULAR LOCATION"
    for loc in comment.get("subcellularLocations", [])
    ])

    df_subcellular_godata = pd.DataFrame([
        {
            "GO_ID": reference.get("id", ""),
            "GO_TERM and Evidence": f"{go_term} ({evidence})" if go_term else "",
            "Link source": f"https://www.ebi.ac.uk/QuickGO/term/{reference['id']}"
        }
        for reference in data.get("uniProtKBCrossReferences", [])
        if reference.get("database") == "GO"
        for go_term, evidence in [(
            next((p["value"] for p in reference.get("properties", []) if p.get("key") == "GoTerm"), ""),
            next((p["value"] for p in reference.get("properties", []) if p.get("key") == "GoEvidenceType"), "")
        )]
    ])


    # 3. Enfermedades (desnormalizado)
    df_disease = pd.DataFrame([{

    "Nombre": comment.get("disease", {}).get("diseaseId"),
    "Acronym": comment.get("disease", {}).get("acronym"),
    "Description": comment.get("disease", {}).get("description"),
    "OMIM": f"https://www.omim.org/entry/"+comment.get("disease", {}).get("diseaseCrossReference", {}).get("id"),
    "Publications": ", ".join(ev.get("id") for ev in comment.get("disease", {}).get("evidences", []) if ev.get("id"))
    } for comment in data.get("comments", []) if comment.get("commentType") == "DISEASE"
    ])

    df_disease_publications = pd.DataFrame([{
    "DiseaseID": comment.get("disease", {}).get("diseaseId"),  # Clave foránea
    "PubMed": f"https://pubmed.ncbi.nlm.nih.gov/"+ev.get("id")
    } for comment in data.get("comments", []) if comment.get("commentType") == "DISEASE"
    for ev in comment.get("disease", {}).get("evidences", [])])

    # 4. Variantes (desnormalizado)
    df_variants = pd.DataFrame({
        "Description": [feature.get("description")
                        for feature in data.get("features", [])
                        if feature.get("type") == "Natural variant"],
        "Publications": [", ".join(ev.get("id") for ev in feature.get("evidences", [])
                         if ev.get("id"))
                         for feature in data.get("features", [])
                         if feature.get("type") == "Natural variant"]
    })

    # 5. Interacciones (desnormalizado)
    df_interactions = pd.DataFrame([{
        "Interactor": inter.get("interactantTwo", {}).get("uniProtKBAccession"),
        "GeneName": inter.get("interactantTwo", {}).get("geneName"),
        "NumExperiments": inter.get("numberOfExperiments")
    } for comment in data.get("comments", []) if comment.get("commentType") == "INTERACTION"
      for inter in comment.get("interactions", [])

    ]).sort_values(by="NumExperiments", ascending=False)

    # Mostrar los DataFrames
    mostrar_dataframe("1. Función", df_function)
    mostrar_dataframe("2.1. Subcellular Location", df_subcellular)
    mostrar_dataframe("2.2. Subcellular Location", df_subcellular_godata)
    mostrar_dataframe("3. Enfermedades", df_disease)
    for name, df in df_disease_publications.groupby("DiseaseID"):
      mostrar_dataframe(f"## Publicaciones enfermedad: {name}", df)
      display(Markdown("---"))
    mostrar_dataframe("4. Variantes", df_variants)
    mostrar_dataframe("5. Interacciones con: ", df_interactions)

    # Esto sería para devolver los dataframes para cuando desacomplemos la interfaz
    # return {
      #  "Function": df_function,
      #  "Subcellular Location": df_subcellular,
      #  "Disease": df_disease,
      #  "Variants": df_variants,
      #  "Interactions": df_interactions
    #}


In [3]:
from IPython.display import display, Markdown
def mostrar_dataframe(titulo, df):
      """ Muestra un DataFrame con título o un mensaje si está vacío """
      display(Markdown(f"## {titulo}"))
      if df.empty:
          display(Markdown("> ⚠️ No se han encontrado datos."))
      else:
          display(Markdown(df.to_markdown(index=False)))

In [4]:
#display(Markdown("# Transthyretin: P02766"))
#procesar_uniprot("P02766")

display(Markdown("# FANCA_HUMAN: O15360"))
procesar_uniprot("O15360")

# FANCA_HUMAN: O15360

## 1. Función

| Function                                                                                                                                                                                                        | EvidenceCode                   | QuickGO                        | Source                         | PublicationID                  | PubMed                         |
|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------|:-------------------------------|:-------------------------------|:-------------------------------|:-------------------------------|
| DNA repair protein that may operate in a postreplication repair or a cell cycle checkpoint function. May be involved in interstrand DNA cross-link repair and in the maintenance of normal chromosome stability | ⚠️ No se han encontrado datos. | ⚠️ No se han encontrado datos. | ⚠️ No se han encontrado datos. | ⚠️ No se han encontrado datos. | ⚠️ No se han encontrado datos. |

## 2.1. Subcellular Location

| Value     | ID      |
|:----------|:--------|
| Nucleus   | SL-0191 |
| Cytoplasm | SL-0086 |

## 2.2. Subcellular Location

| GO_ID      | GO_TERM and Evidence                                               | Link source                                   |
|:-----------|:-------------------------------------------------------------------|:----------------------------------------------|
| GO:0000785 | C:chromatin (IDA:ComplexPortal)                                    | https://www.ebi.ac.uk/QuickGO/term/GO:0000785 |
| GO:0005737 | C:cytoplasm (TAS:ProtInc)                                          | https://www.ebi.ac.uk/QuickGO/term/GO:0005737 |
| GO:0005829 | C:cytosol (TAS:Reactome)                                           | https://www.ebi.ac.uk/QuickGO/term/GO:0005829 |
| GO:0043240 | C:Fanconi anaemia nuclear complex (IDA:UniProtKB)                  | https://www.ebi.ac.uk/QuickGO/term/GO:0043240 |
| GO:0005654 | C:nucleoplasm (IDA:HPA)                                            | https://www.ebi.ac.uk/QuickGO/term/GO:0005654 |
| GO:0005634 | C:nucleus (IDA:BHF-UCL)                                            | https://www.ebi.ac.uk/QuickGO/term/GO:0005634 |
| GO:0006281 | P:DNA repair (TAS:ProtInc)                                         | https://www.ebi.ac.uk/QuickGO/term/GO:0006281 |
| GO:0008585 | P:female gonad development (IEA:Ensembl)                           | https://www.ebi.ac.uk/QuickGO/term/GO:0008585 |
| GO:0036297 | P:interstrand cross-link repair (NAS:ComplexPortal)                | https://www.ebi.ac.uk/QuickGO/term/GO:0036297 |
| GO:0008584 | P:male gonad development (IEA:Ensembl)                             | https://www.ebi.ac.uk/QuickGO/term/GO:0008584 |
| GO:0007140 | P:male meiotic nuclear division (IEA:Ensembl)                      | https://www.ebi.ac.uk/QuickGO/term/GO:0007140 |
| GO:0065003 | P:protein-containing complex assembly (TAS:ProtInc)                | https://www.ebi.ac.uk/QuickGO/term/GO:0065003 |
| GO:2000348 | P:regulation of CD40 signaling pathway (IEA:Ensembl)               | https://www.ebi.ac.uk/QuickGO/term/GO:2000348 |
| GO:1905936 | P:regulation of germ cell proliferation (IEA:Ensembl)              | https://www.ebi.ac.uk/QuickGO/term/GO:1905936 |
| GO:0050727 | P:regulation of inflammatory response (IEA:Ensembl)                | https://www.ebi.ac.uk/QuickGO/term/GO:0050727 |
| GO:0045589 | P:regulation of regulatory T cell differentiation (IBA:GO_Central) | https://www.ebi.ac.uk/QuickGO/term/GO:0045589 |

## 3. Enfermedades

| Nombre                                  | Acronym   | Description                                                                                                                                                                                                                                                                                                                                                                                                                 | OMIM                              | Publications                                                                          |
|:----------------------------------------|:----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------|:--------------------------------------------------------------------------------------|
| Fanconi anemia, complementation group A | FANCA     | A disorder affecting all bone marrow elements and resulting in anemia, leukopenia and thrombopenia. It is associated with cardiac, renal and limb malformations, dermal pigmentary changes, and a predisposition to the development of malignancies. At the cellular level it is associated with hypersensitivity to DNA-damaging agents, chromosomal instability (increased chromosome breakage) and defective DNA repair. | https://www.omim.org/entry/227650 | 10094191, 10210316, 10521298, 10807541, 11091222, 17924555, 9371798, 9399890, 9929978 |

## ## Publicaciones enfermedad: Fanconi anemia, complementation group A

| DiseaseID                               | PubMed                                   |
|:----------------------------------------|:-----------------------------------------|
| Fanconi anemia, complementation group A | https://pubmed.ncbi.nlm.nih.gov/10094191 |
| Fanconi anemia, complementation group A | https://pubmed.ncbi.nlm.nih.gov/10210316 |
| Fanconi anemia, complementation group A | https://pubmed.ncbi.nlm.nih.gov/10521298 |
| Fanconi anemia, complementation group A | https://pubmed.ncbi.nlm.nih.gov/10807541 |
| Fanconi anemia, complementation group A | https://pubmed.ncbi.nlm.nih.gov/11091222 |
| Fanconi anemia, complementation group A | https://pubmed.ncbi.nlm.nih.gov/17924555 |
| Fanconi anemia, complementation group A | https://pubmed.ncbi.nlm.nih.gov/9371798  |
| Fanconi anemia, complementation group A | https://pubmed.ncbi.nlm.nih.gov/9399890  |
| Fanconi anemia, complementation group A | https://pubmed.ncbi.nlm.nih.gov/9929978  |

---

## 4. Variantes

| Description                                   | Publications                                                 |
|:----------------------------------------------|:-------------------------------------------------------------|
| in dbSNP:rs1800282                            |                                                              |
| in FANCA; benign; dbSNP:rs76275444            | 9371798                                                      |
| in dbSNP:rs34491278                           |                                                              |
| in dbSNP:rs35566151                           |                                                              |
| in FANCA; benign; dbSNP:rs17232246            | 9371798                                                      |
| in FANCA; dbSNP:rs2040601073                  | 17924555                                                     |
| in FANCA; dbSNP:rs2040522671                  | 9371798                                                      |
| in FANCA; benign; dbSNP:rs17225943            | 9371798                                                      |
| in dbSNP:rs7190823                            | 15489334, 18987736, 9399890, Ref.4                           |
| in dbSNP:rs35880318                           |                                                              |
| in dbSNP:rs13336566                           |                                                              |
| in dbSNP:rs11646374                           | 18987736                                                     |
| in FANCA; dbSNP:rs148473140                   | 9371798                                                      |
| in FANCA; dbSNP:rs925457555                   | 9371798                                                      |
| in dbSNP:rs2239359                            | 18987736, 8896563, 9169126, 9371798, 9399890, 9721219, Ref.4 |
| in FANCA; dbSNP:rs2039605345                  | 10094191, 10521298                                           |
| in dbSNP:rs17232910                           |                                                              |
| in FANCA; dbSNP:rs1567621042                  | 17924555                                                     |
| in dbSNP:rs1131660                            | 14702039, 8896563                                            |
| in dbSNP:rs45441106                           | 9371798                                                      |
| in dbSNP:rs2039276663                         | 17924555                                                     |
| in dbSNP:rs7195066                            | 18987736, 9371798, 9399890, Ref.4                            |
| in FANCA; dbSNP:rs1307805145                  | 9371798                                                      |
| in FANCA; dbSNP:rs374030577                   | 17924555                                                     |
| in FANCA; dbSNP:rs1173704265                  | 9371798                                                      |
| in FANCA; dbSNP:rs17233141                    | 10094191, 11091222                                           |
| in FANCA; dbSNP:rs780825099                   | 17924555                                                     |
| in dbSNP:rs755922289                          | 17924555                                                     |
| in dbSNP:rs755546887                          | 17924555                                                     |
| in FANCA; dbSNP:rs1429943036                  | 9371798                                                      |
| in FANCA; dbSNP:rs753063086                   | 9929978                                                      |
| in FANCA                                      | 10807541                                                     |
| in FANCA; benign; dbSNP:rs17233497            | 10094191, 18987736                                           |
| in FANCA; loss of function; dbSNP:rs752837228 | 10210316, 10521298                                           |
| in FANCA; dbSNP:rs149277003                   | 10210316, 9371798                                            |
| in FANCA; dbSNP:rs1439817346                  | 9371798                                                      |
| in FANCA; dbSNP:rs574034197                   | 17924555, 9371798                                            |
| in FANCA; dbSNP:rs753316789                   | 17924555                                                     |
| in FANCA; dbSNP:rs1555534579                  | 10521298                                                     |
| in FANCA                                      | 10521298, 9371798                                            |
| in dbSNP:rs17227354                           |                                                              |
| in FANCA; dbSNP:rs878853665                   | 9371798                                                      |
| in FANCA; dbSNP:rs182657062                   | 10521298, 17924555                                           |
| in dbSNP:rs9282681                            | 9371798                                                      |
| in FANCA; likely benign; dbSNP:rs17227396     | 17924555                                                     |
| in FANCA; dbSNP:rs1555533313                  | 9399890                                                      |
| in FANCA; dbSNP:rs1555533300                  | 10521298                                                     |
| in FANCA; dbSNP:rs149851163                   | 17924555                                                     |
| in FANCA; dbSNP:rs17227403                    | 9371798                                                      |

## 5. Interacciones con: 

| Interactor   | GeneName   |   NumExperiments |
|:-------------|:-----------|-----------------:|
| O15287       | FANCG      |               23 |
| Q6NZ36-1     | FAAP20     |                5 |
| O15287       | FANCG      |                5 |
| Q9NPI8       | FANCF      |                5 |
| Q0VG06       | FAAP100    |                4 |
| P62993       | GRB2       |                2 |