# Serialization

## Now Use class (Complex Object) for serialization

In [1]:
# this is simple object but is used to create complete object

simple_object = dict(int_list=[1, 2, 3],
                         text='string',
                       number=3.44,
                      boolean=True,
                         none=None)

In [2]:
from datetime import datetime
 
class A(object):
    def __init__(self, simple_object):
         self.simple_object = simple_object        
 
    def __eq__(self, other):
         if not hasattr(other, 'simple_object'):
             return False
         return self.simple_object == other.simple_object
 
    def __ne__(self, other):
         if not hasattr(other, 'simple_object'):
             return True
         return self.simple_object != other.simple_object
 
complex_object = dict(a = A(simple_object), when=datetime(2020, 7, 21))

In [3]:
print(complex_object)

{'a': <__main__.A object at 0x000001B70F52ECF8>, 'when': datetime.datetime(2020, 7, 21, 0, 0)}


### Dump String using highest protocol

In [4]:
#import cPickle as pickle
import pickle as pickle

pickle.dumps(complex_object, protocol=pickle.HIGHEST_PROTOCOL)

b'\x80\x04\x95\xaf\x00\x00\x00\x00\x00\x00\x00}\x94(\x8c\x01a\x94\x8c\x08__main__\x94\x8c\x01A\x94\x93\x94)\x81\x94}\x94\x8c\rsimple_object\x94}\x94(\x8c\x08int_list\x94]\x94(K\x01K\x02K\x03e\x8c\x04text\x94\x8c\x06string\x94\x8c\x06number\x94G@\x0b\x85\x1e\xb8Q\xeb\x85\x8c\x07boolean\x94\x88\x8c\x04none\x94Nusb\x8c\x04when\x94\x8c\x08datetime\x94\x8c\x08datetime\x94\x93\x94C\n\x07\xe4\x07\x15\x00\x00\x00\x00\x00\x00\x94\x85\x94R\x94u.'

In [5]:
serialize_data_complex = pickle.dumps(complex_object, protocol=pickle.HIGHEST_PROTOCOL)

###  Deserialize the data and Load String using highest protocol

In [6]:
#import cPickle as pickle
import pickle as pickle

deserialize_data_complex = pickle.loads(serialize_data_complex)
print(deserialize_data_complex)

{'a': <__main__.A object at 0x000001B70F52E2B0>, 'when': datetime.datetime(2020, 7, 21, 0, 0)}


# Python provides built-in JSON libraries to encode and decode JSON.

In [7]:
import json
json_string = json.dumps([1, 2, 3, 'a', 'b', "c"])
print(json_string)

[1, 2, 3, "a", "b", "c"]


## Same as in Pickle

In [8]:
import pickle
pickled_string = pickle.dumps([1, 2, 3, 'a', 'b', "c"])
print(pickle.loads(pickled_string))

[1, 2, 3, 'a', 'b', 'c']


In [9]:
import json
print(json.dumps(simple_object))

{"int_list": [1, 2, 3], "text": "string", "number": 3.44, "boolean": true, "none": null}


#### The output looks pretty readable, but there is no indentation. For a larger object graph, this can be a problem. Let's indent the output:

In [10]:
import json
print(json.dumps(simple_object, indent=4))

{
    "int_list": [
        1,
        2,
        3
    ],
    "text": "string",
    "number": 3.44,
    "boolean": true,
    "none": null
}


#### That looks much better. Let's move on to the complex object graph.

In [23]:
json.dumps(complex_object)

TypeError: Object of type A is not JSON serializable

#### The error message is that the A object is not JSON serializable. 
#### Remember that JSON has a very limited type system and it can't serialize user defined classes automatically. 
#### The way to address it is to subclass the JSONEncoder class used by the json module and implement the default() that is called whenever the JSON encoder runs into an object it can't serialize. 

#### The job of the custom encoder is to convert it to a Python object graph that the JSON encoder is able to encode. In this case we have two objects that require special encoding: the datetime object and the A class. The following encoder does the job. Each special object is converted to a dict where the key is the name of the type surrounded by dunders (double underscores). This will be important for decoding. 

In [12]:
from datetime import datetime
import json 
 
class CustomEncoder(json.JSONEncoder):
     def default(self, o):
         if isinstance(o, datetime):
             return {'__datetime__': o.replace(microsecond=0).isoformat()}
         return {'__{}__'.format(o.__class__.__name__): o.__dict__}

In [14]:
serialized = json.dumps(complex_object, indent=4, cls=CustomEncoder)
 
print(serialized)

{
    "a": {
        "__A__": {
            "simple_object": {
                "int_list": [
                    1,
                    2,
                    3
                ],
                "text": "string",
                "number": 3.44,
                "boolean": true,
                "none": null
            }
        }
    },
    "when": {
        "__datetime__": "2020-07-21T00:00:00"
    }
}


#### Check whether both are same or not serialized = deserialized?

In [16]:
deserialized = json.loads(serialized)

In [17]:
deserialized == complex_object

False

In [18]:
print(deserialized)

{'a': {'__A__': {'simple_object': {'int_list': [1, 2, 3], 'text': 'string', 'number': 3.44, 'boolean': True, 'none': None}}}, 'when': {'__datetime__': '2020-07-21T00:00:00'}}


In [19]:
from pprint import pprint
 
pprint(deserialized)

{'a': {'__A__': {'simple_object': {'boolean': True,
                                   'int_list': [1, 2, 3],
                                   'none': None,
                                   'number': 3.44,
                                   'text': 'string'}}},
 'when': {'__datetime__': '2020-07-21T00:00:00'}}


#### The problem is that the json module doesn't know anything about the A class or even the standard datetime object. 
#### It just deserializes everything by default to the Python object that matches its type system. 
#### In order to get back to a rich Python object graph, you need custom decoding. 

There is no need for a custom decoder subclass. The load() and loads() functions provide the "object_hook" parameter that lets you provide a custom function that converts dicts to objects.

## Custom Decoding

In [20]:
def decode_object(o):
    if '__A__' in o:
        a = A(o)
        a.__dict__.update(o['__A__'])
        return a
    elif '__datetime__' in o:
        return datetime.strptime(o['__datetime__'], '%Y-%m-%dT%H:%M:%S')        
    return o

### Let's decode using the decode_object() function as a parameter to the loads() object_hook parameter.

In [21]:
deserialized = json.loads(serialized, object_hook=decode_object)
 
print(deserialized)

{'a': <__main__.A object at 0x000001B70F5E5208>, 'when': datetime.datetime(2020, 7, 21, 0, 0)}


#### Now check whether both are equal or not

In [22]:
deserialized == complex_object

True