In [1]:
############################### Serializing Python Objects #######################################

# 'pickle' module in Python can store:
# All native datatypes (booleans, integers, floating point numbers, complex numbers, strings, bytes objects, byte arrays, and None.)
# Lists, tuples, dictionaries, and sets containing any combination of native datatypes.
# Functions, classes, and instances of classes

In [2]:
# Saving data to a pickle file

# pickle.dump() -> To save data in pickle file
# pickle.load() -> To load data from pickle file

shell = 1
entry = {}
entry['title'] = 'Dive into history, 2009 edition'
entry['article_link'] = 'http://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition'
entry['comments_link'] = None
entry['internal_id'] = b'\xDE\xD5\xB4\xF8'
entry['tags'] = ('diveintopython', 'docbook', 'html')
entry['published'] = True
import time
entry['published_date'] = time.strptime('Fri Mar 27 22:20:42 2009')

entry['published_date']

time.struct_time(tm_year=2009, tm_mon=3, tm_mday=27, tm_hour=22, tm_min=20, tm_sec=42, tm_wday=4, tm_yday=86, tm_isdst=-1)

In [8]:
shell = 1
import pickle
with open('entry.pickle', 'wb') as f:
    pickle.dump(entry, f)

In [9]:
# pickle module takes a Python data structure and saves it to a file.
# it serializes the data structure using a data format called “the pickle protocol.”
# pickle protocol is Python-specific. 
# Not every Python data structure can be serialized by the pickle module.
# No guarantee of compatibility between different versions of Python itself. 
# Newer versions of Python support the older serialization formats, but older versions of Python do not support newer formats 


In [10]:
with open('entry.pickle', 'rb') as f:
    entry2 = pickle.load(f)

entry2

{'title': 'Dive into history, 2009 edition',
 'article_link': 'http://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition',
 'comments_link': None,
 'internal_id': b'\xde\xd5\xb4\xf8',
 'tags': ('diveintopython', 'docbook', 'html'),
 'published': True,
 'published_date': time.struct_time(tm_year=2009, tm_mon=3, tm_mday=27, tm_hour=22, tm_min=20, tm_sec=42, tm_wday=4, tm_yday=86, tm_isdst=-1)}

In [11]:
entry2 == entry

True

In [None]:
# Equality is not the same as identity. entry2 is a copy of entry but both are different

entry2 is entry

In [12]:
# Pickling Without a File

# pickle.dumps() function (note the 's' at the end of the function name) performs the same serialization as the pickle.dump() function.
# Instead of taking a stream object and writing the serialized data to a file on disk, it simply returns the serialized data.

b = pickle.dumps(entry)

In [13]:
type(b)

bytes

In [14]:
entry3 = pickle.loads(b)
entry3 == entry

True

In [15]:
# json module has functions for serializing data structures, storing the serialized data on disk, 
# loading serialized data from disk, and unserializing the data back into a new Python object.

# Differences in json module compared to pickle:
# 1. JSON data format is text-based, not binary. All JSON values are case-sensitive.
# 2. JSON must be stored in a Unicode encoding (UTF-32, UTF-16, or the default, UTF-8)
# 3. allows you to “pretty-print” your JSON data, nicely nesting values within values at different indentation levels

In [16]:
basic_entry = {}
basic_entry['id'] = 256
basic_entry['title'] = 'Dive into history, 2009 edition'
basic_entry['tags'] = ('diveintopython', 'docbook', 'html')
basic_entry['published'] = True
basic_entry['comments_link'] = None

In [17]:
import json
with open('basic.json', mode='w', encoding='utf-8') as f:
    json.dump(basic_entry, f)

In [18]:
with open('basic-pretty.json', mode='w', encoding='utf-8') as f:
    json.dump(basic_entry, f, indent=2) 

In [27]:
# json doesn't support tuple and bytes.

###### Serializing datatypes unsupported by json ######

class CustomSerializer:
    def to_json(python_object):
        if isinstance(python_object, bytes):
            return {'__class__': 'bytes',
                    '__value__': list(python_object)}
        
        if isinstance(python_object, time.struct_time):
            return {'__class__': 'time.asctime',
                    '__value__': time.asctime(python_object)}
        
        raise TypeError(repr(python_object) + ' is not JSON serializable')
    
    def from_json(json_object):                                   
        if '__class__' in json_object:                            
            if json_object['__class__'] == 'time.asctime':
                return time.strptime(json_object['__value__'])    
            if json_object['__class__'] == 'bytes':
                return bytes(json_object['__value__'])            
        return json_object

In [28]:
with open('entry.json', 'w', encoding='utf-8') as f:
    json.dump(entry, f, default=CustomSerializer.to_json)

In [30]:
import json
with open('entry.json', mode='r', encoding='utf-8') as f:
    entry_json = json.load(f, object_hook=CustomSerializer.from_json)

entry_json

{'title': 'Dive into history, 2009 edition',
 'article_link': 'http://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition',
 'comments_link': None,
 'internal_id': b'\xde\xd5\xb4\xf8',
 'tags': ['diveintopython', 'docbook', 'html'],
 'published': True,
 'published_date': [2009, 3, 27, 22, 20, 42, 4, 86, -1]}