# Finalize Your model with pickle
Pickle is the standard way of serializing objects in Python. You can use the pickle operation to serialize your ML algorithms and save the serialized format to afile. You can later load this file to deserialize your model and use it to make new predictions.

In [3]:
# Save Model Using pickle
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from pickle import dump
from pickle import load

#Load data 
filename = 'pima-indians-diabetes.data.csv'
names=['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] 
dataframe = read_csv(filename, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

#Split data into train/test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=7)

#Fit The Model
model = LogisticRegression()
model.fit(X_train, Y_train)

#save model to disk
filename = 'finalized_model.sav'
dump(model, open(filename, 'wb'))

In [4]:
#load the model from the disk
loaded_model = load(open('finalized_model.sav', 'rb'))
result = loaded_model.score(X_test, Y_test)
print(result)

0.755905511811


# Finalize Your Model with Joblib
The Joblib library is part of the SciPy ecosystem and provides utilities for pipeling Python jobs. It provides utilites for saving and loading Python objects that make use of NumPy data structure efficiently. This can be useful for some machine learning algorithms that require a lot of parameters or store the entire dataset (e.g. k-Nearest Neighbors).

In [13]:
#Save Model Using joblib
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.externals.joblib import dump
from sklearn.externals.joblib import load

#Load data 
filename = 'pima-indians-diabetes.data.csv'
names=['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] 
dataframe = read_csv(filename, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

#split data
seed = 7
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state = seed)

#Fit the model
model = LogisticRegression()
model.fit(X_train, Y_train)

filename = 'finalized_joblib_model.sav'
dump(model, filename)

['finalized_joblib_model.sav']

In [14]:
loaded_model2 = load('finalized_joblib_model.sav')
result = loaded_model.score(X_test, Y_test)
print(result)

0.755905511811
