In [4]:
import json
from pyld import jsonld
from rdflib import Graph, Literal, BNode, RDF, URIRef
import utils

with open('../examples_conductivity/single_result.json') as f:
    data = json.load(f)


with open("./mapping.json") as f:
    mapping = json.load(f)

In [5]:
rdf_graph = Graph()
object_type = URIRef(mapping["type"])
filled_rdf_graph = utils.process_object(data, 
                                        g=rdf_graph, 
                                        mapping=mapping,
                                        json_object_type=object_type, 
                                        temp_subject = None)



serialized_jsonld:str = filled_rdf_graph.serialize(format='json-ld', indent=4)
jsonld_list:list = json.loads(serialized_jsonld)

and the value is a dict with a values key, and its value is itself a list, add each value as literal 
Here goes a value of conductivity: {'values': [0.008328539145648056], 'temperature': 298, 'meta': {'success': True, 'rating': 2}}


In [5]:
compacted = jsonld.compact(jsonld_list, mapping["@context"])
with open("./jsonld_test1.json", 'w') as jsonld_file:
    json.dump(compacted, jsonld_file, indent=4, ensure_ascii=False)

## Initial strategy: recursive navigation of original json

 * The code in reads the `./mapping.json` and the `../examples_conductivity/single_result.json` file.
* From `single_result.json` the code find recursively each key and value, navigating through the nesting of the `single_result.json` contents.  
* As the code finds keys and values it creates "triples", which are (subject, predicate, object) statements based on the mapping.json file
    * Each triple is a NODE -LINK-> NODE statement. Many statements build a graph. 
    * The graph is stored in a Graph() object from the rdflib library
* After the graph has been populated with triples, then it is serialized to JSON-LD

**Problem**. While the code works, the serialization is a list of JSON-LD objects. There is no hierarchy wihin the JSON-LD file, as objects with the same id (i.e. the same object) are not nested into each other.

In [4]:
def create_object_store(object_list:list):
    id_dict = {}
    for obj in object_list:
        if "@id" in obj:
            id_value = obj["@id"]
            id_dict[id_value] = obj
    return id_dict

def replace_references(obj):
    if isinstance(obj, list):
        for i, item in enumerate(obj):
            if isinstance(item, dict) and "@id" in item:
                ref_id = item["@id"]
                if ref_id in id_dict:
                    obj[i] = id_dict[ref_id].copy()
                    replace_references(obj[i])  # Recurse for nested objects in the list
    elif isinstance(obj, dict):
        for key, value in obj.items():
            if isinstance(value, list):
                new_list = []
                for item in value:
                    if isinstance(item, dict) and "@id" in item:
                        ref_id = item["@id"]
                        if ref_id in id_dict:
                            new_list.append(id_dict[ref_id].copy())
                            replace_references(new_list[-1])  # Recurse for nested objects in the list
                    else:
                        new_list.append(item)
                obj[key] = new_list
            elif isinstance(value, dict):
                replace_references(value)  # Recurse for nested objects


id_dict = create_object_store(jsonld_list)

replace_references(jsonld_list)



jsonld_list

[{'@id': '5362797a-e5d9-45ec-90a2-4f79be84d683',
  '@type': ['http://emmo.info/emmo#EMMO_0f6f0120_c079_4d95_bb11_4ddee05e530e'],
  'http://emmo.info/emmo#EMMO_5d73661e_e710_4844_ab9b_a85b7e68576a': [{'@id': '_:N85dcc1313cc74d73a970389dde54b4be',
    '@type': ['http://emmo.info/emmo#EMMO_cde4368c_1d4d_4c94_8548_604749523c6d'],
    'http://emmo.info/emmo#EMMO_67fc0a36_8dcb_4ffa_9a43_31074efa3296': [],
    'http://emmo.info/emmo#EMMO_8ef3cd6d_ae58_4a8d_9fc0_ad8f49015cd0': [{'@value': 0.008328539145648056}]}],
  'http://emmo.info/emmo/domain/chameo/chameo#hasSampledSample': [{'@id': '_:N2cf94be1ae30408492d27ebb4569c8e5',
    '@type': ['http://emmo.info/emmo#EMMO_4b3e2374_52a1_4420_8e3f_3ae6b9bf7dff'],
    'http://emmo.info/emmo#EMMO_0aa934ee_1ad4_4345_8a7f_bc73ec67c7e5': [{'@id': '_:Na5c5c36cb04a43bdaa0f8f36488cdbce',
      '@type': ['http://emmo.info/emmo#EMMO_04b3300c_98bd_42dc_a3b5_e6c29d69f1ac'],
      'http://emmo.info/emmo#EMMO_67fc0a36_8dcb_4ffa_9a43_31074efa3296': [],
      'http:/

In [5]:

object_store = create_object_store(jsonld_list)




In [6]:
# jsonld= replace_uris_with_labels(appended_objects)

# with open("./jsonld_test1.json", "w") as f:
#      json.dump(jsonld, f, indent=4)

NameError: name 'appended_objects' is not defined

**Conclusion**

The code above creates the json-ld file, but as a list of objects, without explicit hierarchy between objects, even if their uuids are used within other objects. Tried several ways to make it work, but the end result is always a list of objects .

## Alternative strategy: Flattening the json
* The main idea is to flatten the `single_result.json`, and develop a new mapping file, and see if its easier to develop a script to recast `single_result.json` into a hierarchy of JSON_LD objects, similar to the hierarchy in the `conductivity_two_electrode.json` example.

In [None]:
def flatten_json(json_obj, parent_key='', sep='/'):
    flattened = {}
    for k, v in json_obj.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            flattened.update(flatten_json(v, new_key, sep))
        elif isinstance(v, list):
            for i, item in enumerate(v):
                if isinstance(item, dict):
                    flattened.update(flatten_json(item, f"{new_key}{sep}{i}", sep))
                else:
                    flattened[f"{new_key}{sep}{i}"] = item
        else:
            flattened[new_key] = v
    return flattened

In [None]:
flattened_json = flatten_json(data)
flattened_json

{'uuid': '5362797a-e5d9-45ec-90a2-4f79be84d683',
 'ctime': '2023-07-26T11:22:20',
 'status': 'original',
 'result/data/run_info/formulation/0/chemical/SMILES': '[Li+].F[P-](F)(F)(F)(F)F',
 'result/data/run_info/formulation/0/chemical/InChIKey': 'AXPLOJNSKRXQPA-UHFFFAOYSA-N',
 'result/data/run_info/formulation/0/fraction': 0.05,
 'result/data/run_info/formulation/0/fraction_type': 'molar fraction',
 'result/data/run_info/formulation/1/chemical/SMILES': 'C1COC(=O)O1',
 'result/data/run_info/formulation/1/chemical/InChIKey': 'KMTRUDSVKNLOMY-UHFFFAOYSA-N',
 'result/data/run_info/formulation/1/fraction': 0.45,
 'result/data/run_info/formulation/1/fraction_type': 'molar fraction',
 'result/data/run_info/formulation/2/chemical/SMILES': 'CC1COC(=O)O1',
 'result/data/run_info/formulation/2/chemical/InChIKey': 'RUOJZAUFBMNUDX-UHFFFAOYSA-N',
 'result/data/run_info/formulation/2/fraction': 0.5,
 'result/data/run_info/formulation/2/fraction_type': 'molar fraction',
 'result/data/run_info/internal_r

**Missing**
* Function to identify the flattened keys and map to the right concepts in a mapping file.
* Develop a new mapping json file for the flattened version
* Function to build the hierarchy as in the hand-made example `./conductivity_two_electrode.json`

# Third strategy: JSONLD package

In [9]:
from pyld import jsonld

In [15]:
compacted = jsonld.compact(jsonld_list, mapping["@context"])
with open("./jsonld_test1.json", 'w') as jsonld_file:
    json.dump(compacted, jsonld_file, indent=4, ensure_ascii=False)



In [None]:
with open("./mapping2.json") as f:
    mapping2 = json.load(f)

In [None]:
# Function to recursively apply mappings to data
def apply_mapping(data, mapping):
    if isinstance(data, dict):
        mapped_data = {}
        for key, value in data.items():
            if key in mapping:
                key = mapping[key]  # Map keys to IRIs
            if isinstance(value, str) and value in mapping:
                value = mapping[value]  # Map string values to IRIs
            mapped_data[key] = apply_mapping(value, mapping)
        return mapped_data
    elif isinstance(data, list):
        return [apply_mapping(item, mapping) for item in data]
    else:
        return data

# Apply the mapping to the data
mapped_data = apply_mapping(data, mapping2)


In [None]:
jsonld_data = jsonld.compact(mapped_data, mapping2)
jsonld_data

{'@context': {'fraction': 'http://emmo.info/emmo#EMMO_0aa934ee_1ad4_4345_8a7f_bc73ec67c7e5',
  'molar fraction': 'http://emmo.info/emmo#EMMO_f76f5a24_d703_4e8c_b368_f9a7777cb73a',
  'fraction_type': 'http://emmo.info/emmo#EMMO_67fc0a36_8dcb_4ffa_9a43_31074efa3296',
  'conductivity': 'http://emmo.info/emmo#EMMO_5d73661e_e710_4844_ab9b_a85b7e68576a',
  'temperature': 'http://emmo.info/emmo/domain/chameo/chameo#SamplePreparationParameter',
  'quantity': 'http://emmo.info/emmo#EMMO_5d73661e_e710_4844_ab9b_a85b7e68576a',
  'molecular_dynamics': 'http://emmo.info/emmo#EMMO_d0200cf1_e4f4_45ae_873f_b9359daea3cd',
  'ctime': 'https://schema.org/productionDate',
  'SMILES': 'http://emmo.info/emmo#EMMO_cf604b30_42be_44c7_a379_795236967ec2',
  'InChIKey': 'http://emmo.info/emmo#EMMO_b292d30e_4d60_43f5_9f3e_9c4e00e17a10',
  'chemical': 'http://emmo.info/emmo#EMMO_e2b11f6a_4191_427e_9844_2e0ac88dfc8b',
  'formulation': 'http://emmo.info/emmo#EMMO_4b3e2374_52a1_4420_8e3f_3ae6b9bf7dff'},
 'ctime': '20

In [None]:

# Serialize the data to JSON-LD format
jsonld_data = {
    "@context": mapping2,  # Use the mapping as the JSON-LD context
    **mapped_data  # Include the mapped data
}

# Write the JSON-LD data to data_ld.json
with open('data_ld.json', 'w') as jsonld_file:
    json.dump(jsonld_data, jsonld_file, indent=4)

## Using the FRAME method from JSONLD

In [6]:
with open('./frame.json') as f:
    frame = json.load(f)
formatted_data = jsonld.frame(jsonld_list, frame)
# Write the JSON-LD data to data_ld.json
with open('jsonld_test3.json', 'w') as jsonld_file:
    json.dump(formatted_data, jsonld_file, indent=4)

JsonLdError: ('Invalid JSON-LD syntax; a JSON-LD frame must be a single object.',)
Type: jsonld.SyntaxError
Code: invalid frame
Details: {'frame': [{'@type': ['http://emmo.info/emmo#EMMO_e2b11f6a_4191_427e_9844_2e0ac88dfc8b'], 'http://emmo.info/emmo#EMMO_0aa934ee_1ad4_4345_8a7f_bc73ec67c7e5': [{'@type': ['http://emmo.info/emmo#EMMO_04b3300c_98bd_42dc_a3b5_e6c29d69f1ac']}]}, {'@type': ['http://emmo.info/emmo#EMMO_e2b11f6a_4191_427e_9844_2e0ac88dfc8b'], 'http://emmo.info/emmo#EMMO_0aa934ee_1ad4_4345_8a7f_bc73ec67c7e5': [{'@type': ['http://emmo.info/emmo#EMMO_04b3300c_98bd_42dc_a3b5_e6c29d69f1ac']}]}, {'@type': ['http://emmo.info/emmo#EMMO_e2b11f6a_4191_427e_9844_2e0ac88dfc8b'], 'http://emmo.info/emmo#EMMO_0aa934ee_1ad4_4345_8a7f_bc73ec67c7e5': [{'@type': ['http://emmo.info/emmo#EMMO_04b3300c_98bd_42dc_a3b5_e6c29d69f1ac']}]}]}