# Train with RAPIDS

In [1]:
from azureml.core import Workspace

ws = Workspace.from_config('~/code/default.json')
ws

Workspace.create(name='default', subscription_id='6560575d-fa06-4e7d-95fb-f962e74efd7a', resource_group='azureml-examples')

In [2]:
import git
from pathlib import Path

# get root of git repo
prefix = Path(git.Repo('.', search_parent_directories=True).working_tree_dir)

# training script
script_dir = prefix.joinpath('code', 'models', 'rapids')
script_name = 'train.py'

# azure ml settings
environment_name = "rapids-airline-example"
experiment_name = "rapids-airline-example"
compute_target = "gpu-cluster"

In [3]:
print(open(script_dir.joinpath(script_name)).read())

#
# Copyright (c) 2019-2020, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import argparse
import os
import time

import numpy as np
import pandas as pd
import cudf
import cuml

from cuml import RandomForestClassifier as cuRF
from cuml.preprocessing.model_selection import train_test_split
from cuml.metrics.accuracy import accuracy_score

from rapids_csp_azure import RapidsCloudML, PerfTimer
from azureml.core.run import Run

run = Run.get_context()

def main():
    parser =

In [8]:
from azureml.core import ScriptRunConfig, Experiment, Environment, Dataset
from azureml.core.runconfig import MpiConfiguration

ds = Dataset.File.from_files('https://airlinedataset.blob.core.windows.net/airline-20m/*')
# ds = Dataset.File.from_files('https://airlinedataset.blob.core.windows.net/airline-10years/*') # larger data

arguments = ['--data_dir', ds.as_mount(), '--n_bins', 32]

dockerfile = """
FROM rapidsai/rapidsai:0.15-cuda10.2-runtime-ubuntu18.04-py3.7
RUN apt-get update && \
apt-get install -y fuse && \
apt-get install -y openmpi-bin openmpi-common openssh-client openssh-server libopenmpi2 libopenmpi-dev && \
source activate rapids && \
pip install azureml-sdk && \
pip install azureml-dataprep && \
pip install azureml-widgets
"""

env = Environment(environment_name)
env.docker.enabled = True
env.docker.base_image = None
env.docker.base_dockerfile = dockerfile
env.python.user_managed_dependencies = True

distr_config = MpiConfiguration(process_count_per_node=2, node_count=1)

src = ScriptRunConfig(
    source_directory=script_dir,
    script=script_name,
    arguments=arguments,
    environment=env,
    compute_target=compute_target,
    distributed_job_config=distr_config,
)

run = Experiment(ws, experiment_name).submit(src)
run

Experiment,Id,Type,Status,Details Page,Docs Page
rapids-airline-example,rapids-airline-example_1601494194_1354c796,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


In [9]:
from azureml.widgets import RunDetails

RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [None]:
run.wait_for_completion(show_output=True)