In [2]:
import json
from rdflib import Graph, Literal, BNode, RDF, URIRef

with open('../examples_conductivity/single_result.json') as f:
    data = json.load(f)


with open("./mapping.json") as f:
    mapping = json.load(f)

## Initial strategy: recursive navigation of original json

 * The code in reads the `./mapping.json` and the `../examples_conductivity/single_result.json` file.
* From `single_result.json` the code find recursively each key and value, navigating through the nesting of the `single_result.json` contents.  
* As the code finds keys and values it creates "triples", which are (subject, predicate, object) statements based on the mapping.json file
    * Each triple is a NODE -LINK-> NODE statement. Many statements build a graph. 
    * The graph is stored in a Graph() object from the rdflib library
* After the graph has been populated with triples, then it is serialized to JSON-LD

**Problem**. While the code works, the serialization is a list of JSON-LD objects. There is no hierarchy wihin the JSON-LD file, as objects with the same id (i.e. the same object) are not nested into each other.

In [14]:
def get_json_object_uuid(json_object:dict):
    #if key is uuid, use value as uuid of the subject node. Ex "uuid": "ce83md"
    for key, value in json_object.items():
        if key in mapping["uuids"]:
            return value
        else:
            return None

        
def replace_blank_node(g, blank_node, uriref):
    # Add new triples with the new IRI as the subject, while keeping the original triples
    for s, p, o in g.triples((blank_node, None, None)):
        g.add((uriref, p, o))

    # Remove the original triples with the blank node as the subject
    g.remove((blank_node, None, None))

    return g


def process_object(json_object, g, json_object_type=None, temp_subject = None):


    ##############      ASSIGN THE SUBJECT     ################
    uuid = get_json_object_uuid(json_object)
    if uuid:
        subject = URIRef(uuid)
        g = replace_blank_node(g, temp_subject, subject)
    else:
        subject = temp_subject


    ##############      ASSIGN THE TYPE     ################
    if json_object_type:
        g.add((subject, RDF.type, json_object_type))


    ##############      ADD ATTRIBUTES     ################

    for key, value in json_object.items():



        #### If key is mapped to an entity...
        if key in mapping["entities"].keys():

            ####... and the entity does not have a type, add it as a literal
            if isinstance(mapping["entities"][key], str):  

                predicate = URIRef(mapping["entities"][key])
                g.add((subject, predicate, Literal(value)))


            ####... otherwise if the entity has a type....
            elif isinstance(mapping["entities"][key], dict): 

                predicate = URIRef(mapping["entities"][key]["iri"])
                object_type = URIRef(mapping["entities"][key]["type"])

                ###... and the value is an object, send back recurvisely
                if isinstance(value, dict):
                    temp_object = BNode()
                    g.add((subject, predicate, temp_object))
                    process_object(value, g, object_type, temp_object)

                ####... otherwise if the value is a list...
                elif isinstance(value,list):
                    for element in value:

                        ### ...and the elements of the list are objects, then send back recursively.
                        if isinstance(element,dict):
                            temp_object = BNode()
                            g.add((subject, predicate, temp_object))
                            process_object(element, g, object_type, temp_object)

                        ### Otherwise if the value is a primitive, then add it as a literal.
                        elif isinstance(element,(float,int)):
                            g.add((subject, predicate, Literal(element)))



        ####...otherwise if key is mapped to an quantity...
        elif key in mapping["quantities"].keys():

            temp_object = BNode()
            predicate = URIRef(mapping["quantities"][key]["iri"]) #hasQuantitativeProperty
            object_type = URIRef(mapping["quantities"][key]["type"]) #Type, example "Temperature"
            object_value = URIRef(mapping["quantities"][key]["value"])#hasValue
                
            g.add((subject, predicate, temp_object))
            g.add((temp_object, RDF.type, object_type))

            for predicate_, object_ in mapping["quantities"][key]["pred_obj"]:
                g.add((temp_object, URIRef(predicate_), URIRef(object_))) #hasReferenceUnit: SomeUnit


            ####...and the value is a dict with a "values" key, and its value is itself a list, add each value as literal 
            if isinstance(value, dict):
                if ("values" in value.keys()) & isinstance(value["values"], list): 
                    print("and the value is a dict with a values key, and its value is itself a list, add each value as literal ")
                    print(f"Here goes a value of {key}: {value}")
                    for num_value in value["values"]:
                        g.add((temp_object, object_value, Literal(num_value)))

            ####...otherwise if the value is a primitive type, add it as literal, in addition to other quantity statements
            elif isinstance(value, (float,int)):
                g.add((temp_object, object_value, Literal(value)))


        ####...otherwise if key is not mapped to anything, but the value is an object, send it back recursively
        elif isinstance(value, dict):
            process_object(value, g, json_object_type=None, temp_subject=subject)




def get_label(uri):
    for key, value in mapping["@context"].items():
        if value == uri:
            return key
    return uri


def replace_uris_with_labels(obj:dict):

    if isinstance(obj, list):

        iris_list = []

        for item in obj:

            if isinstance(item, dict):                
                iris_list.append(replace_uris_with_labels(item))

            elif isinstance(item, str):
                label = get_label(item) if item.startswith("http") and item in  mapping["@context"].values() else item
                iris_list.append(label)

        return iris_list
    
    elif isinstance(obj, dict):

        new_obj = {}

        for key, value in obj.items():

            # Replace URIs in keys (if they exist in the mapping)
            new_key = get_label(key) if key.startswith("http") and key in  mapping["@context"].values() else key

            # Replace URIs in values (if they exist in the mapping)
            new_value = get_label(value) if isinstance(value, str) and value.startswith("http") and value in  mapping["@context"].values() else value

            new_obj[new_key] = replace_uris_with_labels(new_value)

        return new_obj
    
    else:
        return obj
    


def organize_objects_by_id(graph):
    objects_by_id = {}
    for subj, _, _ in graph:
        if isinstance(subj, BNode):
            obj_id = subj.toPython()
            if obj_id not in objects_by_id:
                objects_by_id[obj_id] = {
                    "@id": obj_id,
                    "properties": {},
                }

    for subj, pred, obj in graph:
        if isinstance(subj, BNode):
            obj_id = subj.toPython()
            if obj_id in objects_by_id:
                predicate_uri = pred.toPython()
                if predicate_uri not in objects_by_id[obj_id]["properties"]:
                    objects_by_id[obj_id]["properties"][predicate_uri] = []

                if isinstance(obj, BNode):
                    obj_val = obj.toPython()
                else:
                    obj_val = (
                        obj.toPython()["@value"]
                        if isinstance(obj, Literal) and isinstance(obj.toPython(), dict) and "@value" in obj.toPython()
                        else obj.toPython()
                    )
                objects_by_id[obj_id]["properties"][predicate_uri].append(obj_val)

    return list(objects_by_id.values())



In [15]:
rdf_graph = Graph()
object_type = URIRef(mapping["type"])
process_object(data, g=rdf_graph, json_object_type=object_type, temp_subject = None)

# FUNCTION
json_list = organize_objects_by_id(rdf_graph)
print(json_list)
#json_ld:str = rdf_graph.serialize(format='json-ld', indent=4)
#jsonld_dict = json.loads(json_ld)

json_list = [replace_uris_with_labels(element) for element in json_list]
#inteligible_jsonld_dict = replace_uris_with_labels(jsonld_dict)

with open("./jsonld_test.json", "w") as f:
    json.dump(json_list, f, indent=4)

and the value is a dict with a values key, and its value is itself a list, add each value as literal 
Here goes a value of conductivity: {'values': [0.008328539145648056], 'temperature': 298, 'meta': {'success': True, 'rating': 2}}
[{'@id': 'Nd6c4779e4cd54899b3e1fdbcdba8b8d8', 'properties': {'http://emmo.info/emmo#EMMO_67fc0a36_8dcb_4ffa_9a43_31074efa3296': ['http://emmo.info/emmo#EMMO_2e5e45fc_f52c_4294_bdc2_5ed7a06dfce7'], 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type': ['http://emmo.info/electrochemistry#electrochemistry_39a44af0_0e1a_4859_b550_bdabad64386e'], 'http://emmo.info/emmo#EMMO_8ef3cd6d_ae58_4a8d_9fc0_ad8f49015cd0': [298]}}, {'@id': 'Ne599575143b247de92616115ad376528', 'properties': {'http://www.w3.org/1999/02/22-rdf-syntax-ns#type': ['http://emmo.info/emmo#EMMO_04b3300c_98bd_42dc_a3b5_e6c29d69f1ac'], 'http://emmo.info/emmo#EMMO_67fc0a36_8dcb_4ffa_9a43_31074efa3296': ['http://emmo.info/emmo#EMMO_f76f5a24_d703_4e8c_b368_f9a7777cb73a'], 'http://emmo.info/emmo#EMMO_8ef3cd6

**Conclusion**

The code above creates the json-ld file, but as a list of objects, without explicit hierarchy between objects, even if their uuids are used within other objects. Tried several ways to make it work, but the end result is always a list of objects .

## Alternative strategy: Flattening the json
* The main idea is to flatten the `single_result.json`, and develop a new mapping file, and see if its easier to develop a script to recast `single_result.json` into a hierarchy of JSON_LD objects, similar to the hierarchy in the `conductivity_two_electrode.json` example.

In [3]:
def flatten_json(json_obj, parent_key='', sep='/'):
    flattened = {}
    for k, v in json_obj.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            flattened.update(flatten_json(v, new_key, sep))
        elif isinstance(v, list):
            for i, item in enumerate(v):
                if isinstance(item, dict):
                    flattened.update(flatten_json(item, f"{new_key}{sep}{i}", sep))
                else:
                    flattened[f"{new_key}{sep}{i}"] = item
        else:
            flattened[new_key] = v
    return flattened

In [4]:
flattened_json = flatten_json(data)
flattened_json

{'uuid': '5362797a-e5d9-45ec-90a2-4f79be84d683',
 'ctime': '2023-07-26T11:22:20',
 'status': 'original',
 'result/data/run_info/formulation/0/chemical/SMILES': '[Li+].F[P-](F)(F)(F)(F)F',
 'result/data/run_info/formulation/0/chemical/InChIKey': 'AXPLOJNSKRXQPA-UHFFFAOYSA-N',
 'result/data/run_info/formulation/0/fraction': 0.05,
 'result/data/run_info/formulation/0/fraction_type': 'molar fraction',
 'result/data/run_info/formulation/1/chemical/SMILES': 'C1COC(=O)O1',
 'result/data/run_info/formulation/1/chemical/InChIKey': 'KMTRUDSVKNLOMY-UHFFFAOYSA-N',
 'result/data/run_info/formulation/1/fraction': 0.45,
 'result/data/run_info/formulation/1/fraction_type': 'molar fraction',
 'result/data/run_info/formulation/2/chemical/SMILES': 'CC1COC(=O)O1',
 'result/data/run_info/formulation/2/chemical/InChIKey': 'RUOJZAUFBMNUDX-UHFFFAOYSA-N',
 'result/data/run_info/formulation/2/fraction': 0.5,
 'result/data/run_info/formulation/2/fraction_type': 'molar fraction',
 'result/data/run_info/internal_r

**Missing**
* Function to identify the flattened keys and map to the right concepts in a mapping file.
* Develop a new mapping json file for the flattened version
* Function to build the hierarchy as in the hand-made example `./conductivity_two_electrode.json`