### Exemples de sauvegarde des données / modèles

### Ex1 :

In [1]:
from sklearn import svm
from sklearn import datasets

clf = svm.SVC(gamma='auto')
iris = datasets.load_iris()
X, y = iris.data, iris.target
clf.fit(X, y)

"""
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
"""

# Sauvegarder le modèle  : 
import pickle
s = pickle.dumps(clf)

# Charger le modèle en mémoire : 
clf2 = pickle.loads(s)

# tester 
print(clf2.predict(X[0:1]))
# array([0])

print(y[0])
# 0


# Avec joblib
# Changé dans la nelle version sklearn
# from sklearn.externals import joblib
import joblib

# La sauvegarde du modèle 
joblib.dump(clf, 'filename.pkl') 

# Récupération 
clf = joblib.load('filename.pkl')


[0]
0


### Un exemple plus complet

In [2]:
from tempfile import mkdtemp

savedir = mkdtemp()
import os
filename = os.path.join(savedir, 'test.pkl')

# Créeons un objet à sauvegarder
import numpy as np
to_persist = [('a', [1, 2, 3]), ('b', np.arange(10))]

# On sauvegarde :
import joblib
joblib.dump(to_persist, filename)  

# Chargement
joblib.load(filename)
# [('a', [1, 2, 3]), ('b', array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))]


# Utilisation avec descripteur
with open(filename, 'wb') as fo:  joblib.dump(to_persist, fo)

with open(filename, 'rb') as fo:   joblib.load(fo)
# [('a', [1, 2, 3]), ('b', array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))]

# Compression pendant la sauvegarde : création du fichier "test.pkl.compressed"
joblib.dump(to_persist, filename + '.compressed', compress=True)  

# Compression gzip (il suffit de donnée la bonne extension comme 'gzip', 'bz2', 'lzma' et 'xz'). 
# Ces 2 derniers existent pour python > 3.3
# Par défaut, joblib.dump utilise la compression  zlib 
 
joblib.dump(to_persist, filename + '.z')  
# ['...test.pkl.z']

# Exemples de compression :
# Dumping in a gzip compressed file using a compress level of 3.
joblib.dump(to_persist, filename + '.gz', compress=('gzip', 3))  
# ['...test.pkl.gz']

joblib.load(filename + '.gz')
# [('a', [1, 2, 3]), ('b', array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))]
joblib.dump(to_persist, filename + '.bz2', compress=('bz2', 3))  
# ['...test.pkl.bz2']

joblib.load(filename + '.bz2')
# [('a', [1, 2, 3]), ('b', array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))]

# On peut sipper soi-même avant !
# Dumping in a gzip.GzipFile object using a compression level of 3.
import gzip
with gzip.GzipFile(filename + '.gz', 'wb', compresslevel=3) as fo:   joblib.dump(to_persist, fo)

with gzip.GzipFile(filename + '.gz', 'rb') as fo:   joblib.load(fo)
# [('a', [1, 2, 3]), ('b', array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))]
