In [1]:
import numpy as np
import pandas as pd
from sklearn import linear_model

In [2]:
df = pd.read_csv('homeprices.csv')
df.head()

Unnamed: 0,area,price
0,2600,550000
1,3000,565000
2,3200,610000
3,3600,680000
4,4000,725000


In [3]:
model = linear_model.LinearRegression()
model.fit(df[['area']], df.price)

LinearRegression()

In [4]:
model.coef_

array([135.78767123])

In [5]:
model.intercept_

180616.43835616432

In [6]:
model.predict([[5000]])

array([859554.79452055])

## Save Model To a File Using Python Pickle

In [7]:
import pickle

In [8]:
with open('model_pickle', 'wb') as f:
    pickle.dump(model,f)

## Load Saved Model

In [9]:
with open('model_pickle', 'rb') as f:
    mp = pickle.load(f)

In [10]:
mp.coef_

array([135.78767123])

In [11]:
mp.intercept_

180616.43835616432

In [12]:
mp.predict([[5000]])

array([859554.79452055])

## Save Trained Model Using joblib

### check "sklearn model persistance"

According to the documentation, if the model consists of large numpy arrays, then using joblib might be more efficient. (usually people say that when you have a lot of numpy arrays joblib tends to be more efficient but essentially it gives you the same functionality)

In [13]:
# # Other way of importing joblib
# from sklearn.externals import joblib

In [14]:
import joblib

In [15]:
joblib.dump(model, 'model_joblib')

['model_joblib']

## Load Saved Model

In [16]:
mj = joblib.load('model_joblib')

In [17]:
mj.coef_

array([135.78767123])

In [18]:
mj.intercept_

180616.43835616432

In [19]:
mj.predict([[5000]])

array([859554.79452055])