In [1]:
import azureml.core
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import Dataset
from azureml.core.experiment import Experiment
from azureml.train.automl import AutoMLConfig

import pandas as pd
import numpy as np

from datetime import datetime
from dateutil.relativedelta import relativedelta
from pandas import read_csv
from numpy import set_printoptions
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression


import logging
import os

In [2]:
titanic = pd.read_csv("./data/titanic.csv")

In [3]:
titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
df = titanic.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

df.to_csv("./data/prepped_titanic.csv",index=False)

In [5]:
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.925,S
3,1,1,female,35.0,1,0,53.1,S
4,0,3,male,35.0,0,0,8.05,S


In [6]:
ws = Workspace.from_config()

In [7]:
def_blob_store = ws.get_default_datastore()
def_blob_store.upload_files(["./data/prepped_titanic.csv"], target_path="data", overwrite=True)
titanic_data = Dataset.Tabular.from_delimited_files(def_blob_store.path('./data/prepped_titanic.csv'))
titanic_data = titanic_data.register(ws, 'titanic_data',create_new_version=True)

Uploading an estimated of 1 files
Uploading ./data/prepped_titanic.csv
Uploaded ./data/prepped_titanic.csv, 1 files out of an estimated total of 1
Uploaded 1 files


In [8]:
aml_compute_target = "mdl-cluster"
try:
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("found existing compute target.")
except ComputeTargetException:
    print("creating new compute target")
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 0, 
                                                                max_nodes = 1)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
print("Azure Machine Learning Compute attached")

found existing compute target.
Azure Machine Learning Compute attached


In [9]:
automl_settings = {
    "iteration_timeout_minutes": 1,
    "iterations": 25,
    "primary_metric": 'accuracy',
    "featurization": 'auto',
    "verbosity": logging.INFO,
    "n_cross_validations": 4
}

In [10]:
automl_config = AutoMLConfig(task = 'classification',
                             path = './data',
                             debug_log = 'automated_ml_errors.log',
                             compute_target = aml_compute_target,
                             training_data = titanic_data,
                             enable_early_stopping = False,
                             label_column_name = 'Survived',
                             **automl_settings)

In [11]:
experiment = Experiment(ws, "titanic-experiment-auto-prepped")
remote_run = experiment.submit(automl_config, show_output=True)

Submitting remote run.
No run_configuration provided, running on mdl-cluster with default configuration
Running on remote compute: mdl-cluster


Experiment,Id,Type,Status,Details Page,Docs Page
titanic-experiment-auto-prepped,AutoML_f4dd21ab-369c-4c4a-b740-343964146698,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       DONE
DESCRIPTION:  If the missing values are expected, let the run complete. Otherwise cancel the current run and use a script to customize the handling of missing feature values that may be more appropriate based on the data type and business req

In [12]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…