# Preparing the RareLink-CDM for the Phenopacket Mapper


RD CDM v2.0: 'res/RD CDM v2.0.xlsx'


RareLink Data Dictionary: '../res/RareLink_v2.0_DataDictionary.csv'


### 1 - Defining the RD CDM

#### 1.1 Resources

In [27]:
from phenopacket_mapper.data_standards.code_system import ORDO, HPO, MONDO, OMIM, LOINC, HGNC

In [28]:
from phenopacket_mapper.data_standards.code_system import CodeSystem

In [46]:
NCIT = CodeSystem(
    name='National Cancer Institute Thesaurus (NCIT)', 
    namespace_prefix='ncit', 
    url='https://ncithesaurus.org/'
)

UO = CodeSystem(
    name='Units of Measurement Ontology (UO)', 
    namespace_prefix='uo', 
    url='http://purl.obolibrary.org/obo/uo.owl'
)

NCBITaxon = CodeSystem(
    name='NCBI organismal classification', 
    namespace_prefix='ncbitaxon', 
    url='https://www.ncbi.nlm.nih.gov/taxonomy'
)

GENO = CodeSystem(
    name='GENO: The Genotype Ontology', 
    namespace_prefix='geno', 
    url='http://www.genoontology.org/'
)

SO = CodeSystem(
    name='Sequence Ontology (SO)', 
    namespace_prefix='so', 
    url='http://www.sequenceontology.org/'
)

ICD10CM = CodeSystem(
    name='ICD-10-CM', 
    namespace_prefix='icd10cm', 
    url='https://www.cdc.gov/nchs/icd/icd10cm.htm'
)

SNOMED = CodeSystem(
    name='SNOMED CT', 
    namespace_prefix='snomed', 
    url='https://www.snomed.org/snomed-ct'
)

ICD11 = CodeSystem(
    name='ICD-11', 
    namespace_prefix='icd11', 
    url='https://icd.who.int/en'
)

HL7FHIR = CodeSystem(
    name='HL7 FHIR', 
    namespace_prefix='hl7fhir', 
    url='https://www.hl7.org/fhir/'
)


In [30]:
resources = [
    HPO.set_version("2024-08-13"),
    MONDO.set_version("2024-09-03"),
    OMIM.set_version("2024-09-12"),
    ORDO.set_version("2024-09-12"),
    NCBITaxon.set_version("2024-07-03"),
    LOINC.set_version("2.78"),
    HGNC.set_version("2024-08-23"),
    GENO.set_version("2023-10-08"),
    NCIT.set_version("24.04e"),
    SO.set_version("2.6"),
    ICD10CM.set_version("2024-09-01"),
    SNOMED.set_version("2024-09-01"),
    ICD11.set_version("2024-09-01"),
    HL7FHIR.set_version("v4.0.1"),
]



#### 1.2 Fields of the data model and their value sets

In [31]:
from phenopacket_mapper.data_standards.value_set import ValueSet
from phenopacket_mapper.data_standards import Date

##### Formal Criteria

In [32]:
vs_1_1 = ValueSet(
    elements=[str],
    name="Value set for 1.1 Pseudonym",
    description="Value set for field 1.1 Pseudonym of the RareLink cdm in section rarelink_1_formal_criteria. This field represents the (local) patient-related Identification code."
)

vs_1_2 = ValueSet(
    elements=[Date],
    name="Value set for 1.2 Date of Admission",
    description="Value set for field 1.2 Date of Admission of the RareLink data model in section rarelink_1_formal_criteria. This field represents the date of admission or data capture of the individual."
)

##### Personal Information

In [42]:
from phenopacket_mapper.data_standards.code import Coding


vs_2_1 = ValueSet(
    elements=[Date],  # Keep non-string elements as is
    name="Value set for 2.1 Date of Birth",
    description=("Value set for field 2.1 Date of Birth of the RareLink "
                 "data model in section rarelink_2_personal_information. "
                 "The individual's date of birth. If the exact month or day "
                 "is allowed to be captured or not known, select the 1st day "
                 "of the month or the 1st month of the year, respectively.")
)

vs_2_2 = ValueSet(
    elements=[
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="248152002"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="248153007"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="184115007"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="32570691000036108"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="1220561009")
    ],
    name="Value set for 2.2 Sex at Birth",
    description=("Value set for field 2.2 Sex at Birth of the RareLink "
                 "data model in section rarelink_2_personal_information. "
                 "The individual's sex that was assigned at birth.")
)

vs_2_3 = ValueSet(
    elements=[
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="261665006"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="734875008"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="734876009"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="80427008"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="65162001"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="35111009"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="403760006"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="78317008"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="10567003"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="48930007"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="74964007")
    ],
    name="Value set for 2.3 Karyotypic Sex",
    description=("Value set for field 2.3 Karyotypic Sex of the RareLink "
                 "data model in section rarelink_2_personal_information. "
                 "The chromosomal sex of an individual.")
)

vs_2_4 = ValueSet(
    elements=[
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="446141000124107"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="446151000124109"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="394743007"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="33791000087105"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="1220561009")
    ],
    name="Value set for 2.4 Gender Identity",
    description=("Value set for field 2.4 Gender Identity of the RareLink "
                 "data model in section rarelink_2_personal_information. "
                 "The self-assigned gender of the individual.")
)


# 2.5 Country of Birth
# GA4GH Phenopacket Schema v2.0: n/a


##### Formal Criteria

In [45]:
# 3.1 Vital Status
vs_3_1 = ValueSet(
    elements=[
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="438949009"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="419099009"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="399307001"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="185924006"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="261665006")
    ],
    name="Value set for 3.1 Vital Status",
    description=("Value set for field 3.1 Vital Status of the RareLink "
                 "data model in section rarelink_3_patient_status. The "
                 "individual’s general clinical status or vital status.")
)

# 3.2 Time of Death
vs_3_2 = ValueSet(
    elements=[Date],  # Based on the "date_ymd" format
    name="Value set for 3.2 Time of Death",
    description=("Value set for field 3.2 Time of Death of the RareLink "
                 "data model in section rarelink_3_patient_status. If "
                 "deceased, the individual’s date of death. If the specific "
                 "month or day is not known, select the 1st day of the "
                 "month or the 1st month of the year, respectively.")
)

# 3.3 Cause of Death [ICD10CM]
vs_3_3 = ValueSet(
    elements=[ICD10CM],  # Based on ICD10CM coding
    name="Value set for 3.3 Cause of Death [ICD10CM]",
    description=("Value set for field 3.3 Cause of Death [ICD10CM] of the "
                 "RareLink data model in section rarelink_3_patient_status. "
                 "If deceased, the individual’s primary cause of death "
                 "(i.e. according to the death certificate).")
)

# 3.4 Age Category
vs_3_4 = ValueSet(
    elements=[
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="3658006"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="713153009"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="255398004"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="263659003"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="41847000"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="303112003"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="419099009"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="261665006")
    ],
    name="Value set for 3.4 Age Category",
    description=("Value set for field 3.4 Age Category of the RareLink "
                 "data model in section rarelink_3_patient_status. The "
                 "individual's age category at the time of data capture (1.2).")
)

# 3.5 Length of Gestation at Birth [weeks+days]
# GA4GH Phenopacket Schema v2.0: n/a

# 3.6 Undiagnosed RD Case
vs_3_6 = ValueSet(
    elements=[
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="373066001"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="373067005")
    ],
    name="Value set for 3.6 Undiagnosed RD Case",
    description=("Value set for field 3.6 Undiagnosed RD Case of the "
                 "RareLink data model in section rarelink_3_patient_status. "
                 "Identifies cases where an RD diagnosis has not been established.")
)


##### Care Pathway

In [35]:
# 4.1 Encounter Start
# GA4GH Phenopacket Schema v2.0: n/a

# 4.2 Encounter End
# GA4GH Phenopacket Schema v2.0: n/a

# 4.3 Encounter Status
# GA4GH Phenopacket Schema v2.0: n/a

# 4.4 Encounter Class
# GA4GH Phenopacket Schema v2.0: n/a

##### Disease


In [36]:
vs_5_1 = ValueSet(
    elements=[MONDO, ORDO, ICD10CM, ICD11, OMIM],
    name="Value set for 5.1 Disease [MONDO, ORDO, ICD10CM, ICD11, OMIM]",
    description=("Value set for field 5.1 Disease [MONDO, ORDO, ICD10CM, "
                 "ICD11, OMIM] of the RareLink data model in section "
                 "rarelink_5_disease. Please look up the disease code for "
                 "the disease that the individual was affected by.")
)

# 5.2 Verification Status
vs_5_2 = ValueSet(
    elements=[
        Coding(system=CodeSystem(name="HL7 FHIR", namespace_prefix="hl7fhir"),
               code="unconfirmed"),
        Coding(system=CodeSystem(name="HL7 FHIR", namespace_prefix="hl7fhir"),
               code="provisional"),
        Coding(system=CodeSystem(name="HL7 FHIR", namespace_prefix="hl7fhir"),
               code="differential"),
        Coding(system=CodeSystem(name="HL7 FHIR", namespace_prefix="hl7fhir"),
               code="confirmed"),
        Coding(system=CodeSystem(name="HL7 FHIR", namespace_prefix="hl7fhir"),
               code="refuted"),
        Coding(system=CodeSystem(name="HL7 FHIR", namespace_prefix="hl7fhir"),
               code="entered-in-error")
    ],
    name="Value set for 5.2 Verification Status",
    description=("Value set for field 5.2 Verification Status of the RareLink "
                 "data model in section rarelink_5_disease. The verification "
                 "status of the disease.")
)

# 5.3 Age at Onset
vs_5_3 = ValueSet(
    elements=[
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="118189007"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="3950001"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="410672004"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="261665006")
    ],
    name="Value set for 5.3 Age at Onset",
    description=("Value set for field 5.3 Age at Onset of the RareLink data "
                 "model in section rarelink_5_disease. The age at the onset of "
                 "the first symptoms or signs of the disease.")
)

# 5.4 Date of Onset
vs_5_4 = ValueSet(
    elements=[Date],
    name="Value set for 5.4 Date of Onset",
    description=("Value set for field 5.4 Date of Onset of the RareLink data "
                 "model in section rarelink_5_disease. The date at onset of "
                 "first symptoms or signs of the disease. If the specific month "
                 "or day is not known, select the 1st day of the month or the "
                 "1st month of the year, respectively.")
)

# 5.5 Age at Diagnosis
vs_5_5 = ValueSet(
    elements=[
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="118189007"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="3950001"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="410672004"),
        Coding(system=CodeSystem(name="SNOMED CT", namespace_prefix="SNOMED"),
               code="261665006")
    ],
    name="Value set for 5.5 Age at Diagnosis",
    description=("Value set for field 5.5 Age at Diagnosis of the RareLink "
                 "data model in section rarelink_5_disease. The individual’s "
                 "age when the diagnosis was made.")
)

# 5.6 Date of Diagnosis
vs_5_6 = ValueSet(
    elements=[Date],
    name="Value set for 5.6 Date of Diagnosis",
    description=("Value set for field 5.6 Date of Diagnosis of the RareLink "
                 "data model in section rarelink_5_disease. The date on which "
                 "the disease was determined. If the specific month or day is "
                 "not known, select the 1st day of the month or the 1st month "
                 "of the year, respectively.")
)

# 5.7 Body Site [SNOMED CT]
vs_5_7 = ValueSet(
    elements=[SNOMED],
    name="Value set for 5.7 Body Site [SNOMED CT]",
    description=("Value set for field 5.7 Body Site [SNOMED CT] of the "
                 "RareLink data model in section rarelink_5_disease. The "
                 "specific body site affected by disease is encoded using all "
                 "descendants of SCT Body Structure (123037004).")
)

# 5.8 Clinical Status
vs_5_8 = ValueSet(
    elements=[
        Coding(system=CodeSystem(name="HL7 FHIR", namespace_prefix="hl7fhir"),
               code="active"),
        Coding(system=CodeSystem(name="HL7 FHIR", namespace_prefix="hl7fhir"),
               code="recurrence"),
        Coding(system=CodeSystem(name="HL7 FHIR", namespace_prefix="hl7fhir"),
               code="relapse"),
        Coding(system=CodeSystem(name="HL7 FHIR", namespace_prefix="hl7fhir"),
               code="inactive"),
        Coding(system=CodeSystem(name="HL7 FHIR", namespace_prefix="hl7fhir"),
               code="remission"),
        Coding(system=CodeSystem(name="HL7 FHIR", namespace_prefix="hl7fhir"),
               code="resolved")
    ],
    name="Value set for 5.8 Clinical Status",
    description=("Value set for field 5.8 Clinical Status of the RareLink "
                 "data model in section rarelink_5_disease. The clinical "
                 "status of the disease indicates whether it is active, "
                 "inactive, or resolved.")
)

# 5.9 Severity
# GA4GH Phenopacket Schema v2.0: n/a


##### Genetic Finddings

In [37]:
# 6.1.1 Genomic Diagnosis [MONDO, OMIM]
vs_6_1_1 = ValueSet(
    elements=[MONDO, OMIM],
    name="Value set for 6.1.1 Genomic Diagnosis [MONDO]",
    description="Value set for field 6.1.1 Genomic Diagnosis [MONDO] of the RareLink data model in section rarelink_6_1_genetic_findings. The genetic finding of a variant can be linked to a disease in (5.1) if the same MONDO code is used."
)

# 6.1.2 Progress Status of Interpretation
vs_6_1_2 = ValueSet(
    elements=["ga4gh_unknown_progress", "ga4gh_in_progress", "ga4gh_completed", "ga4gh_solved", "ga4gh_unsolved"],
    name="Value set for 6.1.2 Progress Status of Interpretation",
    description="Value set for field 6.1.2 Progress Status of Interpretation of the RareLink data model in section rarelink_6_1_genetic_findings. The interpretation has a ProgressStatus that refers to the status of the attempted diagnosis."
)

# 6.1.3 Interpretation Status
vs_6_1_3 = ValueSet(
    elements=["ga4gh_unknown_status", "ga4gh_rejected", "ga4gh_candidate", "ga4gh_contributory", "ga4gh_causative"],
    name="Value set for 6.1.3 Interpretation Status",
    description="Value set for field 6.1.3 Interpretation Status of the RareLink data model in section rarelink_6_1_genetic_findings. An enumeration that describes the conclusion made about the genomic interpretation."
)

# 6.1.4 Structural Variant Analysis Method
# GA4GH Phenopacket Schema v2.0: n/a

# 6.1.5 Reference Genome
vs_6_1_5 = ValueSet(
    elements=["loinc_la14032-9", "loinc_la14029-5", "loinc_la14030-3", "loinc_la14031-1", "loinc_la26806-2"],
    name="Value set for 6.1.5 Reference Genome",
    description="Value set for field 6.1.5 Reference Genome of the RareLink data model in section rarelink_6_1_genetic_findings. The reference genome used for analyzing the genetic variant."
)

# 6.1.6 Genetic Mutation String
vs_6_1_6 = ValueSet(
    elements=[str],  # HGVS strings for genetic mutations
    name="Value set for 6.1.6 Genetic Mutation String",
    description="Value set for field 6.1.6 Genetic Mutation String of the RareLink data model in section rarelink_6_1_genetic_findings. An unvalidated (HGVS) string that describes the variant change."
)

# 6.1.7 Genomic DNA Change [g.HGVS]
vs_6_1_7 = ValueSet(
    elements=[str],  # HGVS strings for genomic DNA change
    name="Value set for 6.1.7 Genomic DNA Change [g.HGVS]",
    description="Value set for field 6.1.7 Genomic DNA Change [g.HGVS] of the RareLink data model in section rarelink_6_1_genetic_findings. The specific change in the genomic DNA sequence encoded with a validated g.HGVS expression."
)

# 6.1.8 Sequence DNA Change [c.HGVS]
vs_6_1_8 = ValueSet(
    elements=[str],  # HGVS strings for DNA sequence changes
    name="Value set for 6.1.8 Sequence DNA Change [c.HGVS]",
    description="Value set for field 6.1.8 Sequence DNA Change [c.HGVS] of the RareLink data model in section rarelink_6_1_genetic_findings. The specific change in the DNA sequence at the nucleotide level with a validated c.HGVS expression."
)

# 6.1.9 Amino Acid Change [p.HGVS]
vs_6_1_9 = ValueSet(
    elements=[str],  # HGVS strings for amino acid changes
    name="Value set for 6.1.9 Amino Acid Change [p.HGVS]",
    description="Value set for field 6.1.9 Amino Acid Change [p.HGVS] of the RareLink data model in section rarelink_6_1_genetic_findings. The specific change in the amino acid sequence resulting from a genetic variant as a validated p.HGVS expression."
)

# 6.1.10 Gene [HGNC-NR]
vs_6_1_10 = ValueSet(
    elements=[HGNC],  # Gene codes
    name="Value set for 6.1.10 Gene [HGNC-NR]",
    description="Value set for field 6.1.10 Gene [HGNC-NR] of the RareLink data model in section rarelink_6_1_genetic_findings. The specific gene or genes that were analyzed or identified in the study."
)

# 6.1.11 Zygosity
vs_6_1_11 = ValueSet(
    elements=["loinc_la6705-3", "loinc_la6706-1", "loinc_la26217-2", "loinc_la26220-6", "loinc_la6707-9", "loinc_la6703-8", "loinc_la6704-6", "loinc_53034_5_other"],
    name="Value set for 6.1.11 Zygosity",
    description="Value set for field 6.1.11 Zygosity of the RareLink data model in section rarelink_6_1_genetic_findings. The zygosity of the genetic variant."
)

# 6.1.12 Genomic Source Class
# GA4GH Phenopacket Schema v2.0: n/a

# 6.1.13 DNA Change Type
vs_6_1_13 = ValueSet(
    elements=["loinc_la9658-1", "loinc_la6692-3", "loinc_la6686-5", "loinc_la6687-3", "loinc_la6688-1", "loinc_la6689-9", "loinc_la6690-7", "loinc_48019_4_other"],
    name="Value set for 6.1.13 DNA Change Type",
    description="Value set for field 6.1.13 DNA Change Type of the RareLink data model in section rarelink_6_1_genetic_findings. The variant’s type of DNA change, such as point mutation, deletion, insertion, or other types."
)

vs_6_1_13a = ValueSet(
    elements=[LOINC],
    name="Other 6.1.13 DNA Change Type in LOINC",
    description="Value set for other DNA change types in LOINC."
)

# 6.1.14 Clinical Significance [ACMG]
vs_6_1_14 = ValueSet(
    elements=["loinc_la6668-3", "loinc_la26332-9", "loinc_la26333-7", "loinc_la26334-5", "loinc_la6675-8", "loinc_la4489-6"],
    name="Value set for 6.1.14 Clinical Significance [ACMG]",
    description="Value set for field 6.1.14 Clinical Significance [ACMG] of the RareLink data model in section rarelink_6_1_genetic_findings. The clinical significance of the genetic variant, indicating its impact on health and disease."
)


# 6.1.15 Therapeutic Actionability
vs_6_1_15 = ValueSet(
    elements=["ga4gh_unknown_actionability", "ga4gh_not_actionable", "ga4gh_actionable"],
    name="Value set for 6.1.15 Therapeutic Actionability",
    description="Value set for field 6.1.15 Therapeutic Actionability of the RareLink data model in section rarelink_6_1_genetic_findings. This field flags the variant as being a candidate for treatment/clinical intervention, which could improve the clinical outcome."
)

# 6.1.16 Clinical Annotation Level Of Evidence
# GA4GH Phenopacket Schema v2.0: n/a

##### Phenotypic Feature

In [38]:
# 6.2.1 Phenotypic Feature
vs_6_2_1 = ValueSet(
    elements=[HPO],  # HPO codes for phenotypic features
    name="Value set for 6.2.1 Phenotypic Feature",
    description="Value set for field 6.2.1 Phenotypic Feature of the RareLink data model in section rarelink_6_2_phenotypic_feature. An observed physical and clinical characteristic encoded with HPO."
)

# 6.2.2 Determination Date
vs_6_2_2 = ValueSet(
    elements=[Date],  # Based on the "date_ymd" format
    name="Value set for 6.2.2 Determination Date",
    description="Value set for field 6.2.2 Determination Date of the RareLink data model in section rarelink_6_2_phenotypic_feature. The date on which the phenotypic feature was observed or recorded."
)

# 6.2.3 Status
vs_6_2_3 = ValueSet(
    elements=["snomed_410605003", "snomed_723511001"],
    name="Value set for 6.2.3 Status",
    description="Value set for field 6.2.3 Status of the RareLink data model in section rarelink_6_2_phenotypic_feature. The current status of the phenotypic feature, indicating whether it is confirmed or refuted."
)

# 6.2.4 Modifier
vs_6_2_4 = ValueSet(
    elements=[HPO, NCBITaxon, SNOMED],  # Grouped modifiers from HPO, NCBITaxon, SNOMED
    name="Value set for 6.2.4 Modifier",
    description="Value set for field 6.2.4 Modifier of the RareLink data model in section rarelink_6_2_phenotypic_feature. Further clinical modifiers to describe a specific phenotypic feature, such as severity or linked causative agents."
)


##### Family History

In [39]:
# 6.3.0 Pseudonym
vs_6_3_0 = ValueSet(
    elements=[str],  # A string for pseudonym
    name="Value set for 6.3.0 Pseudonym",
    description="Value set for field 6.3.0 Pseudonym of the RareLink data model in section rarelink_6_3_family_history. If a pseudonym was already assigned to the specific family member, please enter it here to identify across records."
)

# 6.3.1 Propositus/-a
vs_6_3_1 = ValueSet(
    elements=["snomed_373066001", "snomed_373067005", "snomed_261665006", "snomed_1220561009"],
    name="Value set for 6.3.1 Propositus/-a",
    description="Value set for field 6.3.1 Propositus/-a of the RareLink data model in section rarelink_6_3_family_history. Is the individual the first affected family member who seeks medical attention for a genetic disorder, leading to the diagnosis of other family members."
)

# 6.3.2 Relationship of the individual to the index case / propositus/a
vs_6_3_2 = ValueSet(
    elements=["snomed_65656005", "snomed_9947008", "snomed_83420006", "snomed_113160008", "snomed_60614009", "snomed_73678001", "snomed_11286003", "snomed_45929001", "snomed_2272004", "snomed_62296006", "snomed_17945006", "snomed_1220561009"],
    name="Value set for 6.3.2 Relationship of the individual to the index case / propositus/a",
    description="Value set for field 6.3.2 Relationship of the individual to the index case / propositus/a of the RareLink data model in section rarelink_6_3_family_history. Specifies the familial relationship of the individual being evaluated to the index case or propositus/proposita."
)

# 6.3.3 Consanguinity
vs_6_3_3 = ValueSet(
    elements=["snomed_373066001", "snomed_373067005", "snomed_261665006", "snomed_1220561009"],
    name="Value set for 6.3.3 Consanguinity",
    description="Value set for field 6.3.3 Consanguinity of the RareLink data model in section rarelink_6_3_family_history. The presence of a biological relationship between parents who are related by blood, typically as first or second cousins."
)

# 6.3.4 Family Member Relationship
vs_6_3_4 = ValueSet(
    elements=["snomed_65656005", "snomed_9947008", "snomed_83420006", "snomed_113160008", "snomed_60614009", "snomed_73678001", "snomed_11286003", "snomed_45929001", "snomed_2272004", "snomed_62296006", "snomed_17945006", "snomed_1220561009"],
    name="Value set for 6.3.4 Family Member Relationship",
    description="Value set for field 6.3.4 Family Member Relationship of the RareLink data model in section rarelink_6_3_family_history. Specifies the relationship of the selected family member to the patient."
)

# 6.3.5 Family Member Record Status
vs_6_3_5 = ValueSet(
    elements=["hl7fhir_partial", "hl7fhir_completed", "hl7fhir_entered-in-error", "hl7fhir_health-unknown"],
    name="Value set for 6.3.5 Family Member Record Status",
    description="Value set for field 6.3.5 Family Member Record Status of the RareLink data model in section rarelink_6_3_family_history. Specifies the record’s status of the family history of a specific family member."
)

# 6.3.6 Family Member Sex
vs_6_3_6 = ValueSet(
    elements=["snomed_248152002", "snomed_248153007", "snomed_184115007", "snomed_32570691000036108", "snomed_1220561009"],
    name="Value set for 6.3.6 Family Member Sex",
    description="Value set for field 6.3.6 Family Member Sex of the RareLink data model in section rarelink_6_3_family_history. Specifies the sex (or gender) of the specific family member. If possible, the sex assigned at birth should be selected."
)

# 6.3.7 Family Member Age
vs_6_3_7 = ValueSet(
    elements=[int],  # Integer for age
    name="Value set for 6.3.7 Family Member Age",
    description="Value set for field 6.3.7 Family Member Age of the RareLink data model in section rarelink_6_3_family_history. Records the current age in full years of the selected family member."
)

# 6.3.8 Family Member Date of Birth
vs_6_3_8 = ValueSet(
    elements=[Date],  # Based on the "date_ymd" format
    name="Value set for 6.3.8 Family Member Date of Birth",
    description="Value set for field 6.3.8 Family Member Date of Birth of the RareLink data model in section rarelink_6_3_family_history. Records the date of birth of the selected family member."
)

# 6.3.9 Family Member Deceased
vs_6_3_9 = ValueSet(
    elements=["snomed_373066001", "snomed_373067005", "snomed_261665006"],
    name="Value set for 6.3.9 Family Member Deceased",
    description="Value set for field 6.3.9 Family Member Deceased of the RareLink data model in section rarelink_6_3_family_history. Indicates whether the selected family member is deceased."
)

# 6.3.10 Family Member Cause of Death [ICD10CM]
vs_6_3_10 = ValueSet(
    elements=[ICD10CM],  # ICD10CM for cause of death
    name="Value set for 6.3.10 Family Member Cause of Death [ICD10CM]",
    description="Value set for field 6.3.10 Family Member Cause of Death [ICD10CM] of the RareLink data model in section rarelink_6_3_family_history. Records the cause of death of the selected deceased family member."
)

# 6.3.11 Family Member Deceased Age
vs_6_3_11 = ValueSet(
    elements=[int],  # Integer for age
    name="Value set for 6.3.11 Family Member Deceased Age",
    description="Value set for field 6.3.11 Family Member Deceased Age of the RareLink data model in section rarelink_6_3_family_history. Records the age in full years of the selected family member at death."
)

# 6.3.12 Family Member Disease [MONDO]
vs_6_3_12 = ValueSet(
    elements=[MONDO],  # MONDO for family member disease
    name="Value set for 6.3.12 Family Member Disease [MONDO]",
    description="Value set for field 6.3.12 Family Member Disease [MONDO] of the RareLink data model in section rarelink_6_3_family_history. Indicates whether the selected family member is affected by the same RD as the individual or a different rare disease."
)


##### Consent

n/a for Phenopackets

##### Disability

n/a for Phenopackets

## Define the Data Model object

In [40]:
from phenopacket_mapper.data_standards import DataModel, DataField

In [41]:
rarelink_cdm = DataModel(
    data_model_name="RareLink CDM",
    resources=resources,  # Assuming resources are already defined
    fields=[
        # 1. Formal Criteria
        # 1.1 Pseudonym
        DataField(section="1. Formal Criteria", ordinal="1.1", name="Pseudonym", value_set=vs_1_1),

        # 1.2 Date of Admission
        DataField(section="1. Formal Criteria", ordinal="1.2", name="Date of Admission", value_set=vs_1_2),

        # 2. Personal Information
        # 2.1 Date of Birth
        DataField(section="2. Personal Information", ordinal="2.1", name="Date of Birth", value_set=vs_2_1),

        # 2.2 Sex at Birth
        DataField(section="2. Personal Information", ordinal="2.2", name="Sex at Birth", value_set=vs_2_2),

        # 2.3 Karyotypic Sex
        DataField(section="2. Personal Information", ordinal="2.3", name="Karyotypic Sex", value_set=vs_2_3),

        # 2.4 Gender Identity
        DataField(section="2. Personal Information", ordinal="2.4", name="Gender Identity", value_set=vs_2_4),

        # 2.5 Country of Birth
        # GA4GH Phenopacket Schema v2.0: n/a

        # 3. Patient Status
        # 3.1 Vital Status
        DataField(section="3. Patient Status", ordinal="3.1", name="Vital Status", value_set=vs_3_1),

        # 3.2 Time of Death
        DataField(section="3. Patient Status", ordinal="3.2", name="Time of Death", value_set=vs_3_2),

        # 3.3 Cause of Death
        DataField(section="3. Patient Status", ordinal="3.3", name="Cause of Death", value_set=vs_3_3),

        # 3.4 Age Category
        DataField(section="3. Patient Status", ordinal="3.4", name="Age Category", value_set=vs_3_4),

        # 3.5 Length of Gestation at Birth
        DataField(section="3. Patient Status", ordinal="3.5", name="Length of Gestation at Birth", value_set=vs_3_5),

        # 3.6 Undiagnosed RD Case
        DataField(section="3. Patient Status", ordinal="3.6", name="Undiagnosed RD Case", value_set=vs_3_6),

        # 4. Care Pathway
        # 4.1 Encounter Start
        DataField(section="4. Care Pathway", ordinal="4.1", name="Encounter Start", value_set=vs_4_1),

        # 4.2 Encounter End
        DataField(section="4. Care Pathway", ordinal="4.2", name="Encounter End", value_set=vs_4_2),

        # 4.3 Encounter Status
        DataField(section="4. Care Pathway", ordinal="4.3", name="Encounter Status", value_set=vs_4_3),

        # 4.4 Encounter Class
        DataField(section="4. Care Pathway", ordinal="4.4", name="Encounter Class", value_set=vs_4_4),

        # 5. Disease
        # 5.1 Disease
        DataField(section="5. Disease", ordinal="5.1", name="Disease", value_set=vs_5_1),

        # 5.2 Verification Status
        DataField(section="5. Disease", ordinal="5.2", name="Verification Status", value_set=vs_5_2),

        # 5.3 Age at Onset
        DataField(section="5. Disease", ordinal="5.3", name="Age at Onset", value_set=vs_5_3),

        # 5.4 Date of Onset
        DataField(section="5. Disease", ordinal="5.4", name="Date of Onset", value_set=vs_5_4),

        # 5.5 Age at Diagnosis
        DataField(section="5. Disease", ordinal="5.5", name="Age at Diagnosis", value_set=vs_5_5),

        # 5.6 Date of Diagnosis
        DataField(section="5. Disease", ordinal="5.6", name="Date of Diagnosis", value_set=vs_5_6),

        # 5.7 Body Site [SNOMED CT]
        DataField(section="5. Disease", ordinal="5.7", name="Body Site [SNOMED CT]", value_set=vs_5_7),

        # 5.8 Clinical Status
        DataField(section="5. Disease", ordinal="5.8", name="Clinical Status", value_set=vs_5_8),

        # 5.9 Severity
        DataField(section="5. Disease", ordinal="5.9", name="Severity", value_set=vs_5_9),

        # 6.1 Genetic Findings
        # 6.1.1 Genomic Diagnosis
        DataField(section="6.1 Genetic Findings", ordinal="6.1.1", name="Genomic Diagnosis [MONDO]", value_set=vs_6_1_1),

        # 6.1.2 Progress Status of Interpretation
        DataField(section="6.1 Genetic Findings", ordinal="6.1.2", name="Progress Status of Interpretation", value_set=vs_6_1_2),

        # 6.1.3 Interpretation Status
        DataField(section="6.1 Genetic Findings", ordinal="6.1.3", name="Interpretation Status", value_set=vs_6_1_3),

        # 6.1.4 Structural Variant Analysis Method
        DataField(section="6.1 Genetic Findings", ordinal="6.1.4", name="Structural Variant Analysis Method", value_set=vs_6_1_4),

        # 6.1.5 Reference Genome
        DataField(section="6.1 Genetic Findings", ordinal="6.1.5", name="Reference Genome", value_set=vs_6_1_5),

        # 6.1.6 Genetic Mutation String
        DataField(section="6.1 Genetic Findings", ordinal="6.1.6", name="Genetic Mutation String", value_set=vs_6_1_6),

        # 6.1.7 Genomic DNA Change [g.HGVS]
        DataField(section="6.1 Genetic Findings", ordinal="6.1.7", name="Genomic DNA Change [g.HGVS]", value_set=vs_6_1_7),

        # 6.1.8 Sequence DNA Change [c.HGVS]
        DataField(section="6.1 Genetic Findings", ordinal="6.1.8", name="Sequence DNA Change [c.HGVS]", value_set=vs_6_1_8),

        # 6.1.9 Amino Acid Change [p.HGVS]
        DataField(section="6.1 Genetic Findings", ordinal="6.1.9", name="Amino Acid Change [p.HGVS]", value_set=vs_6_1_9),

        # 6.1.10 Gene [HGNC-NR]
        DataField(section="6.1 Genetic Findings", ordinal="6.1.10", name="Gene [HGNC-NR]", value_set=vs_6_1_10),

        # 6.1.11 Zygosity
        DataField(section="6.1 Genetic Findings", ordinal="6.1.11", name="Zygosity", value_set=vs_6_1_11),

        # 6.1.12 Genomic Source Class
        DataField(section="6.1 Genetic Findings", ordinal="6.1.12", name="Genomic Source Class", value_set=vs_6_1_12),

        # 6.1.13 DNA Change Type
        DataField(section="6.1 Genetic Findings", ordinal="6.1.13", name="DNA Change Type", value_set=vs_6_1_13),

        # 6.1.14 Clinical Significance [ACMG]
        DataField(section="6.1 Genetic Findings", ordinal="6.1.14", name="Clinical Significance [ACMG]", value_set=vs_6_1_14),

        # 6.1.15 Therapeutic Actionability
        DataField(section="6.1 Genetic Findings", ordinal="6.1.15", name="Therapeutic Actionability", value_set=vs_6_1_15),

        # 6.1.16 Clinical Annotation Level Of Evidence
        DataField(section="6.1 Genetic Findings", ordinal="6.1.16", name="Clinical Annotation Level Of Evidence", value_set=vs_6_1_16),

        # 6.2 Phenotypic Feature
        # 6.2.1 Phenotypic Feature
        DataField(section="6.2 Phenotypic Feature", ordinal="6.2.1", name="Phenotypic Feature", value_set=vs_6_2_1),

        # 6.2.2 Determination Date
        DataField(section="6.2 Phenotypic Feature", ordinal="6.2.2", name="Determination Date", value_set=vs_6_2_2),

        # 6.2.3 Status
        DataField(section="6.2 Phenotypic Feature", ordinal="6.2.3", name="Status", value_set=vs_6_2_3),

        # 6.2.4 Modifier
        DataField(section="6.2 Phenotypic Feature", ordinal="6.2.4", name="Modifier", value_set=vs_6_2_4),

        # 6.3 Family History
        # 6.3.0 Pseudonym
        DataField(section="6.3 Family History", ordinal="6.3.0", name="Pseudonym", value_set=vs_6_3_0),

        # 6.3.1 Propositus/-a
        DataField(section="6.3 Family History", ordinal="6.3.1", name="Propositus/-a", value_set=vs_6_3_1),

        # 6.3.2 Relationship of the individual to the index case / propositus/a
        DataField(section="6.3 Family History", ordinal="6.3.2", name="Relationship of the individual to the index case / propositus/a", value_set=vs_6_3_2),

        # 6.3.3 Consanguinity
        DataField(section="6.3 Family History", ordinal="6.3.3", name="Consanguinity", value_set=vs_6_3_3),

        # 6.3.4 Family Member Relationship
        DataField(section="6.3 Family History", ordinal="6.3.4", name="Family Member Relationship", value_set=vs_6_3_4),

        # 6.3.5 Family Member Record Status
        DataField(section="6.3 Family History", ordinal="6.3.5", name="Family Member Record Status", value_set=vs_6_3_5),

        # 6.3.6 Family Member Sex
        DataField(section="6.3 Family History", ordinal="6.3.6", name="Family Member Sex", value_set=vs_6_3_6),

        # 6.3.7 Family Member Age
        DataField(section="6.3 Family History", ordinal="6.3.7", name="Family Member Age", value_set=vs_6_3_7),

        # 6.3.8 Family Member Date of Birth
        DataField(section="6.3 Family History", ordinal="6.3.8", name="Family Member Date of Birth", value_set=vs_6_3_8),

        # 6.3.9 Family Member Deceased
        DataField(section="6.3 Family History", ordinal="6.3.9", name="Family Member Deceased", value_set=vs_6_3_9),

        # 6.3.10 Family Member Cause of Death [ICD10CM]
        DataField(section="6.3 Family History", ordinal="6.3.10", name="Family Member Cause of Death [ICD10CM]", value_set=vs_6_3_10),

        # 6.3.11 Family Member Deceased Age
        DataField(section="6.3 Family History", ordinal="6.3.11", name="Family Member Deceased Age", value_set=vs_6_3_11),

        # 6.3.12 Family Member Disease [MONDO]
        DataField(section="6.3 Family History", ordinal="6.3.12", name="Family Member Disease [MONDO]", value_set=vs_6_3_12),
    ]
)


ValueError: All fields in a DataModel must have unique identifiers