In [11]:
import h2o
from h2o.estimators.deeplearning import H2ODeepLearningEstimator

CLASS_2_DIR = 'Datasets/breast-2-class.txt'
DATA_2_DIR = 'Datasets/breast-2-data.txt'
CLASS_3_DIR = 'Datasets/breast-3-class.txt'
DATA_3_DIR = 'Datasets/breast-3-data.txt'

In [12]:
# Initialise H2O cluster
h2o.init()
h2o.remove_all()

breast_2_class = h2o.import_file(path=CLASS_2_DIR, header=-1, sep='\t')
breast_2_data = h2o.import_file(path=DATA_2_DIR, header=1, sep='\t')
breast_3_class = h2o.import_file(path=CLASS_3_DIR, header=-1, sep='\t')
breast_3_data = h2o.import_file(path=DATA_3_DIR, header=1, sep='\t')

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
; OpenJDK 64-Bit Server VM (Zulu 8.20.0.5-win64) (build 25.121-b15, mixed mode)
  Starting server from C:\Users\andyg\Anaconda3\envs\tensorflow\lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\andyg\AppData\Local\Temp\tmp46e2xadk
  JVM stdout: C:\Users\andyg\AppData\Local\Temp\tmp46e2xadk\h2o_andyg_started_from_python.out
  JVM stderr: C:\Users\andyg\AppData\Local\Temp\tmp46e2xadk\h2o_andyg_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321... successful.


0,1
H2O cluster uptime:,01 secs
H2O cluster timezone:,Australia/Sydney
H2O data parsing timezone:,UTC
H2O cluster version:,3.20.0.6
H2O cluster version age:,1 month and 20 days
H2O cluster name:,H2O_from_python_andyg_3lpnza
H2O cluster total nodes:,1
H2O cluster free memory:,3.523 Gb
H2O cluster total cores:,12
H2O cluster allowed cores:,12


Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%


In [13]:
# Data preparation and manipulation
def transpose_frame(h2o_df, column_name):
    pd_df = h2o_df.as_data_frame(use_pandas=True)
    pd_df = pd_df.transpose()
    if column_name == 'class':
        pd_df.columns = [column_name]
    elif column_name == 'categories':
        pd_df.columns = pd_df.iloc[0]
        pd_df = pd_df.drop(labels=column_name, axis=0)
    else:
        pd_df.columns = ['C' + str(col) for col in range(len(pd_df.columns))]
    return h2o.H2OFrame(pd_df)

def randomize_frame(h2o_df):
    pd_df = h2o_df.as_data_frame(use_pandas=True)
    pd_df.sample(frac=1).reset_index(drop=True)
    return h2o.H2OFrame(pd_df)

breast_2_class_t = transpose_frame(breast_2_class, 'class')
breast_2_data_t = transpose_frame(breast_2_data, 'categories')
breast_3_class_t = transpose_frame(breast_3_class, 'class')
breast_3_data_t = transpose_frame(breast_3_data, 'categories')

breast_2 = randomize_frame(breast_2_data_t.cbind(breast_2_class_t))
breast_3 = randomize_frame(breast_3_data_t.cbind(breast_3_class_t))
breast = randomize_frame(breast_2.rbind(breast_3))

label = 'class'
features = breast.names
features.remove(label)

train, valid = breast.split_frame([0.75])
train[label] = train[label].asfactor()
valid[label] = valid[label].asfactor()

  data = _handle_python_lists(python_obj.as_matrix().tolist(), -1)[1]


Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%


In [14]:
# Model properties and training
model = H2ODeepLearningEstimator(activation = "rectifier_with_dropout", hidden = [190,63,21,7], epochs = 50, input_dropout_ratio = 0.1)
model.train(x = features, y = label, training_frame = train, validation_frame = valid)

deeplearning Model Build progress: |██████████████████████████████████████| 100%


In [17]:
# Model predictions and validation
predictions = model.predict(valid)
print(predictions)
print(model.confusion_matrix(valid))

deeplearning prediction progress: |███████████████████████████████████████| 100%


predict,p0,p1,p2
1,0.248431,0.681521,0.0700479
1,0.161127,0.820739,0.0181336
1,0.225486,0.748883,0.0256309
1,0.316113,0.429582,0.254305
1,0.102561,0.893471,0.00396788
1,0.271666,0.497467,0.230867
1,0.340223,0.430269,0.229508
1,0.0808501,0.917064,0.00208545
1,0.0804313,0.91741,0.00215902
1,0.151865,0.833144,0.0149919



Confusion Matrix: Row labels: Actual class; Column labels: Predicted class



0,1,2,3,4
0.0,1.0,2.0,Error,Rate
9.0,2.0,0.0,0.1818182,2 / 11
1.0,22.0,0.0,0.0434783,1 / 23
6.0,1.0,0.0,1.0,7 / 7
16.0,25.0,0.0,0.2439024,10 / 41



