## Save and Load H2O Ensemble Models

To load and use an H2O Ensemble model, be sure to also load the base learners. In this example, we will save an ensemble and its base learners to S3 so that we can load them any time in a new H2O instance. 

This notebook assumes:
1. You have already trained an h2o ensemble model called 'ensemble' that has a model id 'ensemble_id'
2. You have a python list of your model ids called 'baseList'


In [None]:
import os
from boto.s3.connection import S3Connection
from boto.s3.key import Key

In [None]:
# Function to upload a file to S3

def upload_file_to_s3(myFile):
    def get_bucket():
        access=  # Enter your access key
        secret=  # Enter your secret key
        conn = S3Connection(access,secret)
        b = conn.get_bucket('YOUR-BUCKET-NAME-GOES-HERE',validate=False)  # Enter your bucket name 
        return b

    s3_bucket = get_bucket()
    k = Key(s3_bucket)    
    k.key = myFile
    k.set_contents_from_filename(myFile)
    k.make_public()
    successMessage = "Uploaded %s to S3."%(myFile)    
    return successMessage

In [None]:
# Save each of the models used in the ensemble

for i in range(0,len(baseList)):                     # Where baseList is the list of model ids used in the ensemble
    gridmodel = h2o.get_model(baseList[i])
    myFile = h2o.save_model(gridmodel, force=True)   # Save each model
    upload_file_to_s3(myFile)                        # Upload each model to s3
    print("Uploaded " + str(baseList[i]))

In [None]:
myPath = os.path.dirname(os.path.abspath(myFile))       # Get the local path where the models are saved. Will be the same on S3.
uploadBaseList = pd.DataFrame(baseList)                 # Dataframe to store the list of model ids
filename = myPath + '/' + 'uploadBaseList.csv'          # A file that will store the list of model ids on S3
uploadBaseList.to_csv(filename, header=None)            # Convert the pandas dataframe to a csv
upload_file_to_s3(filename)                             # Upload that csv of model ids to s3
print("Uploaded " + str(filename))

ensembleModelFile = h2o.save_model(ensemble, force=True)   # Save the h2o ensemble model. If your model is called something different, change 'ensemble' to your model name
upload_file_to_s3(ensembleModelFile)                       # Upload the model to S3 
print("Uploaded " + str(ensembleModelFile))          

In [None]:
def pull_file_from_s3(key):
    def get_bucket():            
        access=  # Enter your access key
        secret=  # Enter your secret key
        conn = S3Connection(access,secret)
        b = conn.get_bucket('YOUR-BUCKET-NAME-GOES-HERE',validate=False)  # Enter your bucket name 
        return b

    s3_bucket = get_bucket()
    payload = s3_bucket.get_key(key)
    local_file = payload.get_contents_to_filename(key)
    return key

In [None]:
# Download remote file containing the list of model ids
# Enter your bucket name and modify the aws path as needed

baseModelids = pd.read_csv('https://s3-us-west-1.amazonaws.com/YOUR-BUCKET-NAME-GOES-HERE/home/jupyter/uploadBaseList.csv', header=None, delimiter=',')   # Download the list of model ids the pandas dataframe to a csv



In [None]:
for i in baseModelids.index:                       
    pull_file_from_s3('/home/jupyter/' + str(baseModelids.iloc[i,1]))
    h2o.load_model('/home/jupyter/' + str(baseModelids.iloc[i,1]))
    print('Loaded: ' + str(baseModelids.iloc[i,1]))


In [None]:
# download the model from s3
downloaded_model = pull_file_from_s3('/home/jupyter/Predict-Churn/ensemble_id')  

# load the downloaded model into memory
myEnsmblePredictor = h2o.load_model(path=downloaded_model)
