# Pickle

This is an in depth go-through of each core function under the <i>pickle</i> module.

Constants

In [75]:

import pickle

# an integer, the highest protocol version available. This value can be passed as a protocol 
# value to functions dump() and dumps() as well as the Pickler constructor
print(pickle.HIGHEST_PROTOCOL)

# an integer, the default protocol version used for pickling. May be less than HIGHEST_PROTOCOL.
# Currently the default protocol is 4, first introduced in Python 3.4 and incompatible with 
# previous versions
print(pickle.DEFAULT_PROTOCOL)

pickle_list = [pickle.ADDITEMS, pickle.APPEND, pickle.APPENDS, pickle.BINBYTES, pickle.BINBYTES8,
                pickle.BINFLOAT, pickle.BINGET, pickle.BININT, pickle.BININT1, pickle.BININT2, 
               pickle.BINPERSID, pickle.BINPUT, pickle.BINSTRING, pickle.BINUNICODE, pickle.BINUNICODE8
              ]
for const in pickle_list:
    print(const)


4
3
b'\x90'
b'a'
b'e'
b'B'
b'\x8e'
b'G'
b'h'
b'J'
b'K'
b'M'
b'Q'
b'q'
b'T'
b'X'
b'\x8d'


pickle.dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None) <br><br>

Write the pickled representation of the object obj to the open file object file. This is equivalent to Pickler(file, protocol).dump(obj).

pickle.load(file, *, fix_imports=True, encoding="ASCII", errors="strict", buffers=None) <br><br>

Read the pickled representation of an object from the open file object file and return the reconstituted object hierarchy specified therein. This is equivalent to Unpickler(file).load().

In [76]:

import pickle

mixed_list = [
    0, 
    1.234567890, 
    'cat', 
    [0.1, 0.2, 0.3], 
    {'a': ord('a'), 'b': ord('b'), 'c': ord('c')}
]
filename = 'mixed.txt'
outfile = open(file=filename, mode='wb')
pickle.dump(obj=mixed_list, file=outfile)
outfile.close()
infile = open(file=filename, mode='rb')
new_mixed_list = pickle.load(file=infile, fix_imports=True, encoding='ASCII', errors='strict')
print(new_mixed_list)


[0, 1.23456789, 'cat', [0.1, 0.2, 0.3], {'a': 97, 'b': 98, 'c': 99}]


In [77]:

class House:
    """Simple model for a house"""
    def __init__(self, area, height, number_of_floors, furniture=None):
        """Metadata and furniture of house"""
        self.area = area
        self.height = height
        self.number_of_floors = number_of_floors
        self.furniture=furniture
    
    def get_area(self):
        """Get area of house"""
        return self.area
    
    def get_height(self):
        """Get height of house"""
        return self.height
        
    def get_num_floors(self):
        """Get number of floors"""
        return self.number_of_floors
    
    def get_furnitures(self):
        """Print out all furnitures"""
        for furniture in self.furniture:
            print(furniture)
        
    def has_furniture(self, furniture):
        """Check for existence of furniture"""
        return furniture in self.furniture


In [78]:

import pickle

house = House(area=3600, height=20, number_of_floors=2, furniture=['Dinner Table', 'Bed', 'Sofa'])
print(house)
house.get_furnitures()
filename = 'house.txt'
with open(file=filename, mode='wb') as outfile:
    pickle.dump(obj=house, file=outfile)
    outfile.close()

with open(file=filename, mode='rb') as infile:
    new_house = pickle.load(file=infile, fix_imports=True, encoding='ascii', errors='strict')
    infile.close()
print(new_house)
new_house.get_furnitures()


<__main__.House object at 0x11f07e710>
Dinner Table
Bed
Sofa
<__main__.House object at 0x11f904b10>
Dinner Table
Bed
Sofa


pickle.loads(data, *, fix_imports=True, encoding="ASCII", errors="strict", buffers=None) <br><br>
Return the reconstituted object hierarchy of the pickled representation data of an object. data must be a bytes-like object.

In [79]:

import pickle

one_hundred = range(100)
print(one_hundred)
one_hundred_bytes = bytes(one_hundred)
print(one_hundred_bytes)
pickle.loads(bytearray(one_hundred_bytes), fix_imports=True, encoding='bytes', errors='strict')


range(0, 100)
b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abc'


UnpicklingError: invalid load key, '\x00'.

class pickle.Pickler(file, protocol=None, *, fix_imports=True, buffer_callback=None) <br><br>
This takes a binary file for writing a pickle data stream.

The optional protocol argument, an integer, tells the pickler to use the given protocol; supported protocols are 0 to HIGHEST_PROTOCOL. If not specified, the default is DEFAULT_PROTOCOL. If a negative number is specified, HIGHEST_PROTOCOL is selected.

The file argument must have a write() method that accepts a single bytes argument. It can thus be an on-disk file opened for binary writing, an io.BytesIO instance, or any other custom object that meets this interface.

If fix_imports is true and protocol is less than 3, pickle will try to map the new Python 3 names to the old module names used in Python 2, so that the pickle data stream is readable with Python 2.

If buffer_callback is None (the default), buffer views are serialized into file as part of the pickle stream.

If buffer_callback is not None, then it can be called any number of times with a buffer view. If the callback returns a false value (such as None), the given buffer is out-of-band; otherwise the buffer is serialized in-band, i.e. inside the pickle stream.

It is an error if buffer_callback is not None and protocol is None or smaller than 5.

dump(obj) <br><br>

Write the pickled representation of obj to the open file object given in the constructor.

persistent_id(obj) <br><br>
Do nothing by default. This exists so a subclass can override it.

If persistent_id() returns None, obj is pickled as usual. Any other value causes Pickler to emit the returned value as a persistent ID for obj. The meaning of this persistent ID should be defined by Unpickler.persistent_load(). Note that the value returned by persistent_id() cannot itself have a persistent ID.

dispatch_table <br><br>
A pickler object’s dispatch table is a registry of reduction functions of the kind which can be declared using copyreg.pickle(). It is a mapping whose keys are classes and whose values are reduction functions. A reduction function takes a single argument of the associated class and should conform to the same interface as a __reduce__() method.

By default, a pickler object will not have a dispatch_table attribute, and it will instead use the global dispatch table managed by the copyreg module. However, to customize the pickling for a specific pickler object one can set the dispatch_table attribute to a dict-like object. Alternatively, if a subclass of Pickler has a dispatch_table attribute then this will be used as the default dispatch table for instances of that class.

reducer_override(self, obj) <br><br>
Special reducer that can be defined in Pickler subclasses. This method has priority over any reducer in the dispatch_table. It should conform to the same interface as a __reduce__() method, and can optionally return NotImplemented to fallback on dispatch_table-registered reducers to pickle obj.

For a detailed example, see Custom Reduction for Types, Functions, and Other Objects.

In [84]:

import pickle
from pickle import Pickler
import random
import os

filename = 'mixed.txt'
file = open(file=filename, mode='wb')
p = Pickler(file=file, protocol=pickle.HIGHEST_PROTOCOL, fix_imports=True)

mixed_list = [True, False, 1, 0, '1', '0', bytes([x for x in random.sample(range(100), 10)])]
p.dump(mixed_list)


class pickle.Unpickler(file, *, fix_imports=True, encoding="ASCII", errors="strict", buffers=None) <br><br>
This takes a binary file for reading a pickle data stream.

The protocol version of the pickle is detected automatically, so no protocol argument is needed.

The argument file must have three methods, a read() method that takes an integer argument, a readinto() method that takes a buffer argument and a readline() method that requires no arguments, as in the io.BufferedIOBase interface. Thus file can be an on-disk file opened for binary reading, an io.BytesIO object, or any other custom object that meets this interface.

The optional arguments fix_imports, encoding and errors are used to control compatibility support for pickle stream generated by Python 2. If fix_imports is true, pickle will try to map the old Python 2 names to the new names used in Python 3. The encoding and errors tell pickle how to decode 8-bit string instances pickled by Python 2; these default to ‘ASCII’ and ‘strict’, respectively. The encoding can be ‘bytes’ to read these 8-bit string instances as bytes objects. Using encoding='latin1' is required for unpickling NumPy arrays and instances of datetime, date and time pickled by Python 2.

If buffers is None (the default), then all data necessary for deserialization must be contained in the pickle stream. This means that the buffer_callback argument was None when a Pickler was instantiated (or when dump() or dumps() was called).

If buffers is not None, it should be an iterable of buffer-enabled objects that is consumed each time the pickle stream references an out-of-band buffer view. Such buffers have been given in order to the buffer_callback of a Pickler object.

load() <br><br>
Read the pickled representation of an object from the open file object given in the constructor, and return the reconstituted object hierarchy specified therein. Bytes past the pickled representation of the object are ignored.

persistent_load(pid) <br><br>
Raise an UnpicklingError by default.

If defined, persistent_load() should return the object specified by the persistent ID pid. If an invalid persistent ID is encountered, an UnpicklingError should be raised.

See Persistence of External Objects for details and examples of uses.

find_class(module, name) <br><br>
Import module if necessary and return the object called name from it, where the module and name arguments are str objects. Note, unlike its name suggests, find_class() is also used for finding functions.

Subclasses may override this to gain control over what type of objects and how they can be loaded, potentially reducing security risks. Refer to Restricting Globals for details.

In [116]:

import pickle
from pickle import Unpickler

print(mixed_list)
filename = 'mixed.txt'
file = open(file=filename, mode='rb')
up = Unpickler(file=file, fix_imports=True, encoding='ASCII', errors='strict')
loaded_mixed_list = up.load()
print(loaded_mixed_list)

from pickle import UnpicklingError
try:
#     up.persistent_load(pid=0)
    pass
except UnpicklingError or AttributeError:
    print('persistent_load called')

print(up.find_class('pickle', 'load'))


[True, False, 1, 0, '1', '0', b'@!]0\x1fRbO7&']
[True, False, 1, 0, '1', '0', b'#\x1dPC?\x0b\x01 9c']
<built-in function load>


class pickle.PickleBuffer(buffer) <br><br>
A wrapper for a buffer representing picklable data. buffer must be a buffer-providing object, such as a bytes-like object or a N-dimensional array.

PickleBuffer is itself a buffer provider, therefore it is possible to pass it to other APIs expecting a buffer-providing object, such as memoryview.

PickleBuffer objects can only be serialized using pickle protocol 5 or higher. They are eligible for out-of-band serialization.

raw() <br><br>
Return a memoryview of the memory area underlying this buffer. The returned object is a one-dimensional, C-contiguous memoryview with format B (unsigned bytes). BufferError is raised if the buffer is neither C- nor Fortran-contiguous.

release() <br><br>
Release the underlying buffer exposed by the PickleBuffer object.

In [124]:
%pip install pickle

[31mERROR: Could not find a version that satisfies the requirement pickle (from versions: none)[0m
[31mERROR: No matching distribution found for pickle[0m
Note: you may need to restart the kernel to use updated packages.


In [125]:

import pickle
from pickle import PickleBuffer

# note that PickleBuffer is new in Python 3.8.3


ImportError: cannot import name 'PickleBuffer' from 'pickle' (/usr/local/anaconda3/lib/python3.7/pickle.py)