## Import packages

In [1]:
from sklearn import datasets, ensemble
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import joblib
import json

## Load the data


Attribute Information: https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset

In [2]:
diabetes = datasets.load_diabetes()
X, y = diabetes.data, diabetes.target

## Data preprocessing

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, random_state=13
)

In [4]:
params = {
    "n_estimators": 500,
    "max_depth": 4,
    "min_samples_split": 5,
    "learning_rate": 0.01,
    "loss": "squared_error",
}

## Train model

In [5]:
model = ensemble.GradientBoostingRegressor(**params)
model.fit(X_train, y_train)

## Evaluate Model

In [6]:
mse = mean_squared_error(y_test, model.predict(X_test))
print("The mean squared error (MSE) on test set: {:.4f}".format(mse))

The mean squared error (MSE) on test set: 3015.4474


## Save model

In [7]:
joblib.dump(model, 'model.joblib')

['model.joblib']

## Load model

In [8]:
model = joblib.load('model.joblib')

In [28]:
model.predict([X_test[0]])

array([237.27208926])

In [29]:
[X_test[0]]

[array([-0.00551455, -0.04464164,  0.05630715, -0.03665608, -0.04835136,
        -0.04296262, -0.07285395,  0.03799897,  0.05078203,  0.0569118 ])]