# Serialization example

This example illustrates how to load a dataset from JSON, run the model on that dataset and write the output back to JSON. At the end the example is also shown for `msgpack`.

## Load dependencies

In [1]:
import json
import pprint
from pathlib import Path

from pandas import DataFrame

from power_grid_model import ComponentAttributeFilterOptions, ComponentType, PowerGridModel
from power_grid_model.utils import json_deserialize, json_serialize

## Load a dataset from a JSON file

The data is in the `data/serialized-input.json` file.

### Load the JSON file

This is just for illustration purposes.

In [2]:
with Path("data/serialized_input.json").open() as fp:
    data = fp.read()

pprint.pprint(json.loads(data))

{'attributes': {'node': ['id', 'u_rated'],
                'source': ['id', 'node', 'status', 'u_ref', 'sk'],
                'sym_load': ['id',
                             'node',
                             'status',
                             'type',
                             'p_specified',
                             'q_specified']},
 'data': {'line': [{'c1': 4e-05,
                    'from_node': 1,
                    'from_status': 1,
                    'i_n': 500.0,
                    'id': 4,
                    'r1': 0.11,
                    'tan1': 0.1,
                    'to_node': 2,
                    'to_status': 1,
                    'x1': 0.12},
                   {'c1': 5e-05,
                    'from_node': 2,
                    'from_status': 1,
                    'i_n': 550.0,
                    'id': 5,
                    'r1': 0.15,
                    'tan1': 0.12,
                    'to_node': 3,
                    'to_status': 1,
        

### Deserialize the JSON data

In [3]:
dataset = json_deserialize(data)

print("components:", list(dataset.keys()))
display(dataset[ComponentType.node])
display(DataFrame(dataset[ComponentType.node]))

components: [<ComponentType.node: 'node'>, <ComponentType.line: 'line'>, <ComponentType.source: 'source'>, <ComponentType.sym_load: 'sym_load'>]


array([(1, 10500.), (2, 10500.), (3, 10500.)],
      dtype={'names': ['id', 'u_rated'], 'formats': ['<i4', '<f8'], 'offsets': [0, 8], 'itemsize': 16, 'aligned': True})

Unnamed: 0,id,u_rated
0,1,10500.0
1,2,10500.0
2,3,10500.0


## Run power flow calculation on the loaded input data

In [4]:
model = PowerGridModel(dataset)
output = model.calculate_power_flow()

display(DataFrame(output[ComponentType.node]))

Unnamed: 0,id,energized,u_pu,u,u_angle,p,q
0,1,1,1.03,10815.0,-2.530317e-14,2408998.0,-2863495.0
1,2,1,1.029997,10814.968183,-0.004398,-1010000.0,-210000.0
2,3,1,1.029484,10809.581008,-0.006839956,-1020000.0,-220000.0


## Serialize the output dataset

### Default format

By default, the data is formatted nicely

In [5]:
serialized_output = json_serialize(output)

print(serialized_output)

{
  "version": "1.0",
  "type": "sym_output",
  "is_batch": false,
  "attributes": {},
  "data": {
    "node": [
      {"id": 1, "energized": 1, "u_pu": 1.0300000000010254, "u": 10815.000000010767, "u_angle": -2.5303169101427072e-14, "p": 2408997.8394388668, "q": -2863495.3646741668},
      {"id": 2, "energized": 1, "u_pu": 1.0299969698156055, "u": 10814.968183063858, "u_angle": -0.0043979998047547451, "p": -1009999.9999999704, "q": -210000.00000006545},
      {"id": 3, "energized": 1, "u_pu": 1.0294839055693445, "u": 10809.581008478117, "u_angle": -0.0068399561753802384, "p": -1019999.999999999, "q": -219999.99999996895}
    ],
    "line": [
      {"id": 4, "energized": 1, "loading": 0.39953190919371073, "p_from": 2408997.8394388668, "q_from": -2863495.3646741668, "i_from": 199.76595459685538, "s_from": 3742041.7279784, "p_to": -2252625.7643675441, "q_to": 1403928.5369478234, "i_to": 141.69843328389507, "s_to": 2654305.5911384653},
      {"id": 5, "energized": 1, "loading": 0.19770474

### Compact serialization

In the full result, all attributes are explicitly listed for each component.
In addition, all attributes are listed on a separate whiteline.
This is fairly expensive memory-wise.

If you need a more memory-efficient output, you can tell the serializer to use compact lists and to avoid using redundant newlines and whitespaces.

In [6]:
serialized_output = json_serialize(output, use_compact_list=True, indent=-1)

print(serialized_output)

{"version":"1.0","type":"sym_output","is_batch":false,"attributes":{"node":["id","energized","u_pu","u","u_angle","p","q"],"line":["id","energized","loading","p_from","q_from","i_from","s_from","p_to","q_to","i_to","s_to"],"source":["id","energized","p","q","i","s","pf"],"sym_load":["id","energized","p","q","i","s","pf"]},"data":{"node":[[1,1,1.0300000000010254,10815.000000010767,-2.5303169101427072e-14,2408997.8394388668,-2863495.3646741668],[2,1,1.0299969698156055,10814.968183063858,-0.0043979998047547451,-1009999.9999999704,-210000.00000006545],[3,1,1.0294839055693445,10809.581008478117,-0.0068399561753802384,-1019999.999999999,-219999.99999996895]],"line":[[4,1,0.39953190919371073,2408997.8394388668,-2863495.3646741668,199.76595459685538,3742041.7279784,-2252625.7643675441,1403928.5369478234,141.69843328389507,2654305.5911384653],[5,1,0.19770474338129659,1242625.7643675739,-1613928.5369477719,108.73760885971312,2036880.9765532378,-1019999.999999999,-219999.99999996895,55.7319922698

The compact result is still valid JSON

In [7]:
pprint.pprint(json.loads(serialized_output))

{'attributes': {'line': ['id',
                         'energized',
                         'loading',
                         'p_from',
                         'q_from',
                         'i_from',
                         's_from',
                         'p_to',
                         'q_to',
                         'i_to',
                         's_to'],
                'node': ['id', 'energized', 'u_pu', 'u', 'u_angle', 'p', 'q'],
                'source': ['id', 'energized', 'p', 'q', 'i', 's', 'pf'],
                'sym_load': ['id', 'energized', 'p', 'q', 'i', 's', 'pf']},
 'data': {'line': [[4,
                    1,
                    0.39953190919371073,
                    2408997.839438867,
                    -2863495.364674167,
                    199.76595459685538,
                    3742041.7279784,
                    -2252625.764367544,
                    1403928.5369478234,
                    141.69843328389507,
                    2654305.591

## Msgpack serialization

To have even higher performance and smaller space, you can use the binary format [`msgpack`](https://msgpack.org/). The example below shows a round trip to dump and load `msgpack` data, and intantiate model.

### Serialize to msgpack

We can serialize the output data into `msgpack` binary with and without compact list. The result is a `bytes` object. See the resulted differences in length of the data. The differences will be significant when you have a large dataset.

In [8]:
from power_grid_model.utils import msgpack_serialize

msgpack_data_not_compact = msgpack_serialize(output, use_compact_list=False)
msgpack_data_compact = msgpack_serialize(output, use_compact_list=True)

print(f"Type of the returned objects: {type(msgpack_data_not_compact)}, {type(msgpack_data_compact)}")
print(f"Length of not-compact data: {len(msgpack_data_not_compact)}")
print(f"Length of compact data: {len(msgpack_data_compact)}")

Type of the returned objects: <class 'bytes'>, <class 'bytes'>
Length of not-compact data: 993
Length of compact data: 818


### Deserialize from msgpack

We can deserialize the data we just created. We then print the node result. Note that they are exactly the same.

In [9]:
from power_grid_model.utils import msgpack_deserialize

output_data_not_compact = msgpack_deserialize(msgpack_data_not_compact)
output_data_compact = msgpack_deserialize(msgpack_data_compact)

print("----Node result from not compact data----")
print(DataFrame(output_data_not_compact[ComponentType.node]))
print("----Node result from compact data----")
print(DataFrame(output_data_compact[ComponentType.node]))

----Node result from not compact data----
   id  energized      u_pu             u       u_angle             p  \
0   1          1  1.030000  10815.000000 -2.530317e-14  2.408998e+06   
1   2          1  1.029997  10814.968183 -4.398000e-03 -1.010000e+06   
2   3          1  1.029484  10809.581008 -6.839956e-03 -1.020000e+06   

              q  
0 -2.863495e+06  
1 -2.100000e+05  
2 -2.200000e+05  
----Node result from compact data----
   id  energized      u_pu             u       u_angle             p  \
0   1          1  1.030000  10815.000000 -2.530317e-14  2.408998e+06   
1   2          1  1.029997  10814.968183 -4.398000e-03 -1.010000e+06   
2   3          1  1.029484  10809.581008 -6.839956e-03 -1.020000e+06   

              q  
0 -2.863495e+06  
1 -2.100000e+05  
2 -2.200000e+05  


## Selective deserialization and dataset format

To control the dataset returned by the deserialization functionality, you can use the `data_filter` argument.

### Deserialization to columnar dataset


In [10]:
dataset = json_deserialize(data, data_filter=ComponentAttributeFilterOptions.everything)

print("components:", list(dataset.keys()))
display(dataset[ComponentType.node])

components: [<ComponentType.node: 'node'>, <ComponentType.line: 'line'>, <ComponentType.source: 'source'>, <ComponentType.sym_load: 'sym_load'>]


{'id': array([1, 2, 3], dtype=int32),
 'u_rated': array([10500., 10500., 10500.])}

### Deserialized data format selection per component type

To select specific components and data formats for the deserialized data, provide a dictionary of components and their desired output types to the `data_filter`.

In [11]:
dataset = json_deserialize(
    data,
    data_filter={
        ComponentType.node: None,  # nodes in a row-based data format
        ComponentType.source: ["id", "node", "status", "u_ref", "sk"],  # only specific attributes
        ComponentType.sym_load: ComponentAttributeFilterOptions.everything,  # all attributes as columns
        ComponentType.line: ComponentAttributeFilterOptions.relevant,  # only attributes that are not null/nan
    },
)

print("components:", list(dataset.keys()))
print("node attributes:", list(dataset[ComponentType.node].dtype.names))
print("source attributes:", list(dataset[ComponentType.source].keys()))
print("sym_load attributes:", list(dataset[ComponentType.sym_load].keys()))
print("line attributes:", list(dataset[ComponentType.line].keys()))

components: [<ComponentType.node: 'node'>, <ComponentType.line: 'line'>, <ComponentType.source: 'source'>, <ComponentType.sym_load: 'sym_load'>]
node attributes: ['id', 'u_rated']
source attributes: ['id', 'node', 'status', 'u_ref', 'sk']
sym_load attributes: ['id', 'node', 'status', 'type', 'p_specified', 'q_specified']
line attributes: ['id', 'from_node', 'to_node', 'from_status', 'to_status', 'r1', 'x1', 'c1', 'tan1', 'i_n']


A columnar dataset can also be serialized again, as one would expect.

In [12]:
serialized_output = json_serialize(output)

print(serialized_output)

{
  "version": "1.0",
  "type": "sym_output",
  "is_batch": false,
  "attributes": {},
  "data": {
    "node": [
      {"id": 1, "energized": 1, "u_pu": 1.0300000000010254, "u": 10815.000000010767, "u_angle": -2.5303169101427072e-14, "p": 2408997.8394388668, "q": -2863495.3646741668},
      {"id": 2, "energized": 1, "u_pu": 1.0299969698156055, "u": 10814.968183063858, "u_angle": -0.0043979998047547451, "p": -1009999.9999999704, "q": -210000.00000006545},
      {"id": 3, "energized": 1, "u_pu": 1.0294839055693445, "u": 10809.581008478117, "u_angle": -0.0068399561753802384, "p": -1019999.999999999, "q": -219999.99999996895}
    ],
    "line": [
      {"id": 4, "energized": 1, "loading": 0.39953190919371073, "p_from": 2408997.8394388668, "q_from": -2863495.3646741668, "i_from": 199.76595459685538, "s_from": 3742041.7279784, "p_to": -2252625.7643675441, "q_to": 1403928.5369478234, "i_to": 141.69843328389507, "s_to": 2654305.5911384653},
      {"id": 5, "energized": 1, "loading": 0.19770474