# Save and Load a Model

In [1]:
# import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [2]:
# import the data
# column headers
_headers = ['CIC0', 'SM1', 'GATS1i', 'NdsCH', 'Ndssc', 'MLOGP', 'response']

# read in data
df = pd.read_csv('https://raw.githubusercontent.com/PacktWorkshops/The-Data-Science-Workshop/master/Chapter06/Dataset/qsar_fish_toxicity.csv', names=_headers, sep=';')

In [3]:
df.head()

Unnamed: 0,CIC0,SM1,GATS1i,NdsCH,Ndssc,MLOGP,response
0,3.26,0.829,1.676,0,1,1.453,3.77
1,2.189,0.58,0.863,0,0,1.348,3.115
2,2.125,0.638,0.831,0,0,1.348,3.531
3,3.027,0.331,1.472,1,0,1.807,3.51
4,2.094,0.827,0.86,0,0,1.886,5.39


In [4]:
# Let's split our data
features = df.drop('response', axis=1).values
labels = df[['response']].values

X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.2, random_state=0)
X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, random_state=0)

In [5]:
# create a simple Linear Regression model
model = LinearRegression()

In [6]:
# train the model
model.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [7]:
# let's use our model to predict on our validation datast
y_pred = model.predict(X_val)

## import functions for saving and loading models

In [9]:
from sklearn.externals import joblib



### save the model

In [10]:
joblib.dump(model, './model.joblib')

['./model.joblib']

### load the model

In [11]:
m2 = joblib.load('./model.joblib')

### make a prediction

In [12]:
m2_preds = m2.predict(X_val)

### compare model predictions

In [14]:
ys = pd.DataFrame(dict(predicted=y_pred.reshape(-1), m2=m2_preds.reshape(-1)))
ys.head()

Unnamed: 0,predicted,m2
0,4.155885,4.155885
1,6.398238,6.398238
2,5.183181,5.183181
3,3.771333,3.771333
4,4.593059,4.593059
