In [1]:
from utils.relation_utils import RelationshipSpec, FixedChoiceField, FreeTextField, DynamicEntityField, CandidateSpec, build_relationship_models
# your enums/constants live here
from constants.my_enums import *

# ----------------------------
# Example specs (yours)
# ----------------------------

CHEMICAL_AFFECTS_GENE = RelationshipSpec(
    name="ChemicalAffectsGene",
    description= "Describes an effect that a chemical has on a gene or gene product (e.g. an impact of on its abundance, activity,localization, processing, expression, etc.)",
    subject_classes=["SmallMolecule"],
    object_classes=["Gene", "Protein", "RnaTranscript"],
    predicate_choices=["affects", "causes"],
    fixed_fields=[
        FixedChoiceField("subject_form_or_variant", CHEMICAL_OR_GENE_OR_GENE_PRODUCT_FORM_OR_VARIANT_ENUM, optional=True, schema_name="ChemicalOrGeneOrGeneProductFormOrVariant"),
        FixedChoiceField("subject_part", GENE_OR_GENE_PRODUCT_OR_CHEMICAL_PART_QUALIFIER_ENUM, optional=True, schema_name="GeneGeneProductOrChemicalPartQualifier"),
        FixedChoiceField("subject_derivative", CHEMICAL_ENTITY_DERIVATIVE_ENUM, optional=True, schema_name="ChemicalEntityDerivative"),
        FixedChoiceField("subject_aspect", GENE_OR_GENE_PRODUCT_OR_CHEMICAL_ENTITY_ASPECT_ENUM, optional=True, schema_name="GeneGeneProductOrChemicalEntityAspect"),
        FixedChoiceField("subject_direction", DIRECTION_QUALIFIER_ENUM, optional=True, schema_name="DirectionQualifier"),
        FixedChoiceField("object_form_or_variant", CHEMICAL_OR_GENE_OR_GENE_PRODUCT_FORM_OR_VARIANT_ENUM, optional=True, schema_name="ChemicalOrGeneOrGeneProductFormOrVariant"),
        FixedChoiceField("object_part", GENE_OR_GENE_PRODUCT_OR_CHEMICAL_PART_QUALIFIER_ENUM, optional=True, schema_name="GeneGeneProductOrChemicalPartQualifier"),
        FixedChoiceField("object_aspect", GENE_OR_GENE_PRODUCT_OR_CHEMICAL_ENTITY_ASPECT_ENUM, optional=True, schema_name="GeneGeneProductOrChemicalEntityAspect"),
        FixedChoiceField("object_direction", DIRECTION_QUALIFIER_ENUM, optional=True, schema_name="DirectionQualifier"),
        FixedChoiceField("causal_mechanism", CAUSAL_MECHANISM_QUALIFIER_ENUM, optional=True, schema_name="CausalMechanismQualifier"),
    ],
    dynamic_fields=[ # !!! CLASSES MUST HAVE SAME NAMES OF ENTITIES CLASSES !!!
        DynamicEntityField("subject_context", classes=["CellType", "CellularComponent", "TissueOrOrgan"], optional=True, schema_name="AnatomicalContext"),
        DynamicEntityField("object_context", classes=["CellType", "CellularComponent", "TissueOrOrgan"], optional=True, schema_name="AnatomicalContext"),
        DynamicEntityField("anatomical_context", classes=["CellType", "CellularComponent", "TissueOrOrgan"], optional=True, schema_name="AnatomicalContext"),
        DynamicEntityField("species_context", classes=["OrganismTaxon"], optional=True, schema_name="SpeciesContext"),
    ],
)

CHEMICAL_TO_PATHWAY = RelationshipSpec(
    name="ChemicalToPathway",
    description="An interaction between a chemical entity and a biological process or pathway.",
    subject_classes=["SmallMolecule"],
    object_classes=["Pathway"],
    predicate_choices=["participates_in", "actively_involved_in", "consumed_by", "is_output_of", "enables", "catalyzes"]
)

# DiseaseOrPhenotypicFeatureToGeneticInheritance
DISEASE_OR_PHENOTYPIC_FEATURE_TO_GENETIC_INHERITANCE = RelationshipSpec(
    name="DiseaseOrPhenotypicFeatureToGeneticInheritance",
    description="A relationship between either a disease or a phenotypic feature and its mode of (genetic) inheritance.",
    subject_classes=["Disease", "PhenotypicFeature"],
    object_classes=["GeneticInheritance"],
    predicate_choices=["has_mode_of_inheritance"],
)

DISEASE_TO_PHENOTYPIC_FEATURE = RelationshipSpec(
    name="DiseaseToPhenotypicFeature",
    description="A relationship between a disease and a phenotypic feature in which the phenotypic feature is associated with the disease in some way.",
    subject_classes=["Disease"],
    object_classes=["PhenotypicFeature"],
    predicate_choices=["has_phenotype"],
    fixed_fields=[
        FreeTextField("subject_aspect", optional=True), # e.g stability, abundance, expression, exposure
        FixedChoiceField("subject_direction", DIRECTION_QUALIFIER_ENUM, optional=True, schema_name="DirectionQualifier"),
        FreeTextField("object_aspect", optional=True), # e.g stability, abundance, expression, exposure
        FixedChoiceField("object_direction", DIRECTION_QUALIFIER_ENUM, optional=True, schema_name="DirectionQualifier"),
        FreeTextField("frequency", optional=True), # e.g. "80% of patients", "very common", "rarely"
    ],
    dynamic_fields=[
        DynamicEntityField("disease_context", classes=["Disease"], optional=True, schema_name="DiseaseContext")
    ]
    )

GENE_TO_GENE_COEXPRESSION = RelationshipSpec( # CHECK !!!!!
    name="GeneToGeneCoexpression",
    description="Indicates that two genes or gene products are co-expressed, generally under the same conditions.",
    subject_classes=["Gene", "Protein", "RnaTranscript"],
    object_classes=["Gene", "Protein", "RnaTranscript"],
    predicate_choices=['coexpressed_with'],
    fixed_fields=[
        FreeTextField("quantifier", optional=True), # e.g. Optional quantitative value indicating degree of expression.
    ],
    dynamic_fields=[
        DynamicEntityField("expression_site", classes=["CellType", "CellularComponent", "TissueOrOrgan"], optional=True, schema_name="ExpressionSite"),
        DynamicEntityField("stage_qualifier", classes=["MouseDevelopmentalTimepoint", "HumanDevelopmentalTimepoint"], optional=True, schema_name="StageQualifier"),
    ]
)


DEFAULT_SPECS: list[RelationshipSpec] = [
    CHEMICAL_AFFECTS_GENE,
    CHEMICAL_TO_PATHWAY,
    DISEASE_OR_PHENOTYPIC_FEATURE_TO_GENETIC_INHERITANCE,
    DISEASE_TO_PHENOTYPIC_FEATURE,
    GENE_TO_GENE_COEXPRESSION,
]

In [2]:

# ============================================================
# Example usage
# ============================================================
if __name__ == "__main__":
    # Example NER output
    ner = {
        "Gene": [{"label": "CDKN2A"}, {"label": "GN123"}],
        "Protein": [{"label": "TP53 protein"}],
        "RnaTranscript": [{"label": "BRCA1 mRNA"}],
        "SmallMolecule": [{"label": "Hexachlorobenzene"}],
        "TissueOrOrgan": [{"label": "liver"}, {"label": "brain"}],
        "CellType": [{"label": "neuron"}],
        #"OrganismTaxon": [{"label": "homo sapiens"}, {"label": "bacteria"}] # e.g. excluding this will exclude anatomical_context field form the models
    }

    # (Optional) toy scorer: prefer relations with more subject choices
    def toy_scorer(c: CandidateSpec) -> float:
        return len(c.subject_choices)

    models_by_name, RelationshipUnion, Relationships, schema = build_relationship_models(
        ner_output=ner,
        specs=DEFAULT_SPECS,
        enum_ref_policy="auto",       # "auto" | "never" | "always"
        enum_share_threshold=4,        # only share long enums if repeated
        top_k=None,                    # or an int
        scorer=None,                   # plug in your dual-encoder here
        ref_template="#/$defs/{model}" # JSON Schema $ref template
    )


In [3]:

# You can inspect the schema to verify $defs/$refs usage:
import json, pprint
#pprint.pprint(schema)
# Then pass `Relationships` (the container model) to with_structured_output,
# or pass `schema` if your client accepts raw JSON Schema.
Relationships.model_json_schema()

{'$defs': {'ChemicalAffectsGene': {'properties': {'rel_type': {'const': 'ChemicalAffectsGene',
     'default': 'ChemicalAffectsGene',
     'title': 'Rel Type',
     'type': 'string'},
    'subject_label': {'const': 'Hexachlorobenzene',
     'title': 'Subject Label',
     'type': 'string'},
    'predicate': {'enum': ['affects', 'causes'],
     'title': 'Predicate',
     'type': 'string'},
    'object_label': {'enum': ['CDKN2A', 'GN123', 'TP53 protein', 'BRCA1 mRNA'],
     'title': 'Object Label',
     'type': 'string'},
    'subject_form_or_variant': {'anyOf': [{'$ref': '#/$defs/ChemicalOrGeneOrGeneProductFormOrVariant'},
      {'type': 'null'}],
     'default': None},
    'subject_part': {'anyOf': [{'$ref': '#/$defs/GeneGeneProductOrChemicalPartQualifier'},
      {'type': 'null'}],
     'default': None},
    'subject_derivative': {'anyOf': [{'const': 'metabolite', 'type': 'string'},
      {'type': 'null'}],
     'default': None,
     'title': 'Subject Derivative'},
    'subject_aspect'