# H2O Use Case - Predictive Maintenance

- Source: https://archive.ics.uci.edu/ml/datasets/SECOM
- H2O Basics: train a default Gradient Boosting Machine (GBM) for binary classification.

In [None]:
# Load h2o library
suppressPackageStartupMessages(library(h2o))

In [None]:
# Start and connect to a local H2O cluster
h2o.init(nthreads = -1)

In [None]:
# Importing data from local CSV
h_secom <- h2o.importFile(path = "secom.csv", destination_frame = "h_secom")

In [None]:
# Print out column names
colnames(h_secom)

In [None]:
# Look at "Classification"
summary(h_secom$Classification, exact_quantiles=TRUE)

In [None]:
# "Classification" is a column of numerical values
# Convert "Classification" in secom dataset from numerical to categorical value
h_secom$Classification <- as.factor(h_secom$Classification)

In [None]:
# Look at "Classification" again
summary(h_secom$Classification, exact_quantiles=TRUE)

In [None]:
# Define target (y) and features (x)
target <- "Classification"
features <- setdiff(colnames(h_secom), target)
print(features)

In [None]:
# Splitting dataset into training and test
h_split <- h2o.splitFrame(h_secom, ratios = 0.7, seed = 1234)
h_train <- h_split[[1]] # 70%
h_test  <- h_split[[2]] # 30%

In [None]:
# Look at the size
dim(h_train)
dim(h_test)

In [None]:
# Check Classification in each dataset
summary(h_train$Classification, exact_quantiles = TRUE)
summary(h_test$Classification, exact_quantiles = TRUE)

In [None]:
# H2O Gradient Boosting Machine with default settings
model <- h2o.gbm(x = features, 
                 y = target, 
                 training_frame = h_train,
                 seed = 1234)

In [None]:
# Print out model summary
summary(model)

In [None]:
# Check performance on test set
h2o.performance(model, h_test)

# Making Predictions

In [None]:
# Use the model for predictions
yhat_test <- h2o.predict(model, h_test)

In [None]:
# Show first 10 rows
head(yhat_test, 10)