In [None]:
###: Importing H2O Library

In [None]:
import h2o

In [None]:
###: Initializing h2O

In [None]:
h2o.init()

In [None]:
### : Getting H2O Cluster status

In [None]:
h2o.cluster().show_status()

In [None]:
###: Importing Dataset

In [None]:
df = h2o.import_file("https://raw.githubusercontent.com/Avkash/mldl/master/data/house_price_train.csv")
test = h2o.import_file("https://raw.githubusercontent.com/Avkash/mldl/master/data/house_price_test.csv")

In [None]:
###: Understanding Dataset

In [None]:
df.describe()

In [None]:
###: Understanding Dataset with pylab library

In [None]:
import pylab as pl
pl.figure(figsize=(20,10))
df.as_data_frame().hist()
pl.show()

In [None]:
###: Getting a list of columns in our dataset

In [None]:
df.col_names

In [None]:
###: Setting Response column

In [None]:
response = "medv"

In [None]:
###: spliting the dataset into train and validation  

In [None]:
train, valid = df.split_frame(ratios=[0.9])
print(train.shape)
print(valid.shape)

In [None]:
###: Setting all columns/features into a local variable

In [None]:
features = df.col_names

In [None]:
###: Settings all features for model building

In [None]:
features.remove(response)
print(features)

In [None]:
###: Importing H2O AutoML Library

In [None]:
from h2o.automl import H2OAutoML

In [None]:
###: Setting H2O AutoML model configuration (default)

In [None]:
house_price_automl = H2OAutoML(max_models= 100,
                           seed=1234,
                           max_runtime_secs=60, 
                           project_name= "House Price AutoML")

In [None]:
###: Training H2O AutoML 

In [None]:
house_price_automl.train(x = features, y = response,training_frame=train,validation_frame=valid)

In [None]:
###: Getting the list of models created by AutoML in given time

In [None]:
house_price_automl.leaderboard

In [None]:
###: Getting the best model or Leader model from the AutoML leader board

In [None]:
house_price_leader = house_price_automl.leader

In [None]:
###: Understanding best Model (Leader Model) overall performance based on training data

In [None]:
house_price_leader.model_performance(valid=True)

In [None]:
house_price_leader.model_performance(valid=True).r2()

In [None]:
###: Plotting AutoML Leader Model for its AUC metric 

In [None]:
house_price_leader.plot(metric='AUTO')

In [None]:
###: Generating Variable Importance Plot for the Leader Model

In [None]:
house_price_leader.varimp_plot()

In [None]:
###:  H2O AutoMLmodel varialbe Scoring History

In [None]:
house_price_leader.score_history()

In [None]:
### PERFORMING PREDICTIONS USING a TEST DATA SET with Auto ML Leader model

In [None]:
house_price_leader.predict(test)

In [None]:
###: Getting AUC metrics from the test data using AutoML Leader Model

In [None]:
house_price_leader.model_performance(test_data=test).r2()