In [1]:
import pandas as pd
from azureml.core import Workspace, Experiment, Run
from azureml.train.automl import AutoMLConfig

In [2]:
storage_account_name = "<Storage account name>"
storage_account_key = "<Storage account key>"
container = "<Container name>"

In [3]:
spark.conf.set("fs.azure.account.key.{0}.blob.core.windows.net".format(storage_account_name), storage_account_key)

In [4]:
data = spark.read \
  .option("header", "true") \
  .option("inferSchema", "true") \
  .option("delimiter", ",") \
  .csv("wasbs://{0}@{1}.blob.core.windows.net/housing.csv".format(container, storage_account_name))

data.show()

In [5]:
df = data.toPandas()

In [6]:
df = pd.get_dummies(df, columns=["ocean_proximity"])

In [7]:
df = df.dropna(axis="rows")

In [8]:
ws = Workspace.from_config("/dbfs/FileStore", _file_name="config.json")

In [9]:
settings = {
    "iteration_timeout_minutes": 2,
    "iterations": 20,
    "primary_metric": 'spearman_correlation',
    "preprocess": True,
    "n_cross_validations": 5
}

In [10]:
automl_config = AutoMLConfig(
    task='regression',
    iteration_timeout_minutes=1,
    iterations=20,
    primary_metric='r2_score',
    training_data=df,
    label_column_name="median_house_value",
    n_cross_validations=5)

In [11]:
experiment = Experiment(ws, "house-prices")

In [12]:
local_run = experiment.submit(automl_config, show_output=True)