# Resolve Schema

**input**
```
+schema_path: str
```

**methods**
```
+read_json(schema_path: str): dict
+split_json(schema: dict): list
+resolver(entity1: dict, entity2: dict): dict
+resolve_defs(terms: dict, defs: dict) : dict
+ node_order(schema: dict): list
+resolve_nodes(nodeList: list, splitJsonList: list): list
+recombine_nodes(resolvedList: list) : dict
```

In [51]:
import json
class ResolveSchema:
    
    def __init__(self, schema_path: str):
        self.schema_path = schema_path
        self.schema = self.read_json(self.schema_path)
        # self.split_schema = split_json(self.schema)
        self.nodes = self.get_nodes()
        
        
    def read_json(self, path: str):
        with open(path) as f:
            return json.load(f)
    
    
    def get_nodes(self):
        nodes = list(self.schema.keys())
        return nodes
    
    
    def get_node_link(self, node_name: str):
        links = self.schema[node_name]['links']
        node_id = self.schema[node_name]['id']
        if 'subgroup' in links[0]:
            return node_id, links[0]['subgroup']
        else:
            return node_id, links

    def find_upstream_downstream(self, node_name: str):
        """Takes a node name and returns the upstream and downstream nodes"""
        
        node_id, links = self.get_node_link(node_name)
        print(node_id, links)

        # Ensure links is a list
        if isinstance(links, dict):
            links = [links]

        results = []

        for link in links:
            target_type = link.get("target_type")
            
            if not node_id or not target_type:
                print("Missing essential keys in link:", link)
                results.append((None, None))
                continue
            
            print(f"Upstream: {target_type}, Downstream: {node_id}")

            results.append((target_type, node_id))

        return results

    def get_all_node_pairs(self, excluded_nodes=["_definitions.yaml", "_terms.yaml", "_settings.yaml", "program.yaml"]):
        node_pairs = []
        for node in self.nodes:
            if not node in excluded_nodes:
                print(node)
                node_pairs.extend(self.find_upstream_downstream(node))
            else:
                continue
        return node_pairs
    # def split_json(self):

In [52]:
resolved_schema = ResolveSchema("../schema/gen3_test_schema.json")
resolved_schema.get_all_node_pairs()
# resolved_schema.get_node_link("serum_marker_assay.yaml")

demographic.yaml
demographic [{'backref': 'demographics', 'label': 'describes', 'multiplicity': 'one_to_one', 'name': 'subjects', 'required': True, 'target_type': 'subject'}]
Upstream: subject, Downstream: demographic
project.yaml
project [{'backref': 'projects', 'label': 'member_of', 'multiplicity': 'many_to_one', 'name': 'programs', 'required': True, 'target_type': 'program'}]
Upstream: program, Downstream: project
serum_marker_assay.yaml
serum_marker_assay [{'backref': 'serum_marker_assays', 'label': 'produces', 'multiplicity': 'many_to_one', 'name': 'samples', 'required': False, 'target_type': 'sample'}]
Upstream: sample, Downstream: serum_marker_assay
alignment_workflow.yaml
alignment_workflow [{'backref': 'alignment_workflows', 'label': 'part_of', 'multiplicity': 'many_to_one', 'name': 'unaligned_reads_files', 'required': False, 'target_type': 'unaligned_reads_file'}]
Upstream: unaligned_reads_file, Downstream: alignment_workflow
imaging_file.yaml
imaging_file [{'backref': 'imagi

[('subject', 'demographic'),
 ('program', 'project'),
 ('sample', 'serum_marker_assay'),
 ('unaligned_reads_file', 'alignment_workflow'),
 ('subject', 'imaging_file'),
 ('core_metadata_collection', 'imaging_file'),
 ('sample', 'lipidomics_assay'),
 ('metabolomics_assay', 'metabolomics_file'),
 ('core_metadata_collection', 'metabolomics_file'),
 ('project', 'acknowledgement'),
 ('subject', 'medical_history'),
 ('subject', 'blood_pressure_test'),
 ('sample', 'genomics_assay'),
 ('variant_workflow', 'variant_file'),
 ('aligned_reads_file', 'variant_file'),
 ('core_metadata_collection', 'variant_file'),
 ('serum_marker_assay', 'serum_marker_file'),
 ('core_metadata_collection', 'serum_marker_file'),
 ('sample', 'proteomics_assay'),
 ('subject', 'sample'),
 ('genomics_assay', 'unaligned_reads_file'),
 ('core_metadata_collection', 'unaligned_reads_file'),
 ('aligned_reads_file', 'aligned_reads_index_file'),
 ('alignment_workflow', 'aligned_reads_index_file'),
 ('core_metadata_collection', 'a

In [5]:
ref_dict = {
    resolved_schema.schema['demographic.yaml']['links']: resolved_schema.schema['demographic.yaml']['links']
    }


TypeError: unhashable type: 'list'

In [22]:
resolved_schema.get_node_link('lipidomics_file.yaml')

[{'backref': 'lipidomics_files',
  'label': 'data_from',
  'multiplicity': 'many_to_one',
  'name': 'lipidomics_assays',
  'required': False,
  'target_type': 'lipidomics_assay'},
 {'backref': 'lipidomics_files',
  'label': 'data_from',
  'multiplicity': 'one_to_one',
  'name': 'core_metadata_collections',
  'required': False,
  'target_type': 'core_metadata_collection'}]

In [23]:
# for getting upstream downstream from links
def find_upstream_downstream(links):
    """Takes a list of dictionaries or a single dictionary of links and returns the upstream and downstream nodes"""

    # Ensure links is a list
    if isinstance(links, dict):
        links = [links]

    results = []

    for link in links:
        backref = link.get("backref")
        target_type = link.get("target_type")
        
        if not backref or not target_type:
            print("Missing essential keys in link:", link)
            results.append((None, None))
            continue
        
        # strip last s from name
        if backref.endswith('s'):
            backref = backref[:-1]

        # Determine upstream/downstream logic
        upstream = target_type
        downstream = backref
        
        print(f"Upstream: {upstream}, Downstream: {downstream}")

        results.append((upstream, downstream))

    return results

In [25]:
link = resolved_schema.get_node_link('lipidomics_file.yaml')
find_upstream_downstream(link)

Upstream: lipidomics_assay, Downstream: lipidomics_file
Upstream: core_metadata_collection, Downstream: lipidomics_file


[('lipidomics_assay', 'lipidomics_file'),
 ('core_metadata_collection', 'lipidomics_file')]