# 持久化加载模型
1. 通过 pickle 来序列化和反序列化机器学习的模型
2. 通过 job lib 来序列化和反序列化机器学习的模型

In [6]:
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

import warnings
warnings.filterwarnings("ignore")
# 导入数据
filename = '../data/pima_data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = read_csv(filename, names=names)
data.head(5)

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [12]:
# 通过 pickle 序列化和反序列化机器学习的模型
from pickle import dump
from pickle import load

array = data.values
X = array[:, 0:8]
Y = array[:, 8]
test_size = 0.33
seed = 4
X_train, X_test, Y_traing, Y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
# 训练模型
model = LogisticRegression()
model.fit(X_train, Y_traing)

# 保存模型
model_file = 'model/finalized_model.sav'
with open(model_file, 'wb') as model_f:
    dump(model, model_f)

# 加载模型
with open(model_file, 'rb') as model_f:
    loaded_model = load(model_f)
    result = loaded_model.score(X_test, Y_test)
    print("算法评估结果：%.3f%%" % (result * 100))

算法评估结果：80.315%


In [13]:
#通过 job lib 来序列化和反序列化机器学习的模型
from sklearn.externals.joblib import dump
from sklearn.externals.joblib import load

array = data.values
X = array[:, 0:8]
Y = array[:, 8]
test_size = 0.33
seed = 4
X_train, X_test, Y_traing, Y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
# 训练模型
model = LogisticRegression()
model.fit(X_train, Y_traing)

# 保存模型
model_file = 'model/finalized_model_joblib.sav'
with open(model_file, 'wb') as model_f:
    dump(model, model_f)

# 加载模型
with open(model_file, 'rb') as model_f:
    loaded_model = load(model_f)
    result = loaded_model.score(X_test, Y_test)
    print("算法评估结果：%.3f%%" % (result * 100))

算法评估结果：80.315%
