# Run H2O on local machine

In [None]:
import h2o
h2o.init()

## Import the data 

In [None]:
input_csv = "https://raw.githubusercontent.com/PacktPublishing/Machine-Learning-at-Scale-with-H2O/main/chapt3/loans-lite.csv"
loans = h2o.import_file(input_csv)
loans.dim

In [None]:
loans.head()

## Clean the data

In [None]:
loans["bad_loan"] = loans["bad_loan"].asfactor()

## Engineer new features from the original data

In [None]:
loans["issue_d_year"] = loans["issue_d"].year().asfactor()
loans["issue_d_month"] = loans["issue_d"].month().asfactor()

## Model training

In [None]:
train, validate, test = loans.split_frame(seed=1, ratios=[0.7, 0.15])

In [None]:
predictors = list(loans.col_names)
predictors.remove("bad_loan")
predictors.remove("issue_d")

In [None]:
from h2o.estimators import H2OXGBoostEstimator
param = {
    "ntrees" : 20,
    "nfolds" : 5,
    "seed": 12345
}
model = H2OXGBoostEstimator(**param)
model.train(
    x = predictors,
    y = "bad_loan",
    training_frame = train,
    validation_frame = validate)

## Evaluate and explain the model

In [None]:
perf = model.model_performance(test)
perf

In [None]:
# requires matplotlib
explain = model.explain(test,include_explanations="varimp")
explain

## Export the model scoring artifact

In [None]:
# replace with your local path
model.download_mojo("~/download-destination-path")

## Shut down the cluster

In [None]:
h2o.cluster().shutdown()