In [None]:
# Load Azure Libaries
from azureml.core import Workspace, Dataset, Datastore, Experiment
from azureml.core.compute import ComputeTarget
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun
from azureml.widgets import RunDetails

# Load Non-Azure Libraries
import pandas as pd
import numpy as np

In [None]:
# Check the base version of AzureMl
import azureml.core
print("You are currently using version " + azureml.core.VERSION + " of the Azure ML SDK")

In [None]:
# Retrieve your workspace by name by filling in the lower case values between double quotes
ws = Workspace.from_config()

In [None]:
# Retrieve your Compute Target for Running AutoML Remotely
compute_name = 'compute-cluster'
compute_target = ComputeTarget(ws, compute_name)

In [None]:
# Retrieve your Datastore by name by filling in the lower case values between double quotes
datastore = Datastore.get_default(ws)
my_datastore_name = 'workspaceblobstore'
my_datastore = Datastore.get(ws, my_datastore_name)

In [None]:
# If you have already registered your Dataset, retrieve it
dataset_name = "Titanic Training Data"
dataset = Dataset.get_by_name(ws, dataset_name, version='latest')

In [None]:
# If you have not registered your Dataset, use the following code

# First, upload the titanic.csv to your Jupyter Notebook
titanic_data_raw = pd.read_csv('titanic.csv')

# Drop columns superfluous to training
titanic_data = titanic_data_raw.drop(['PassengerId','Name','Ticket','Cabin'], axis = 1)

# Register the Titanic data as a Dataset to your Datastore
Dataset.Tabular.register_pandas_dataframe(titanic_data, datastore,
                                          "Titanic Training Data")

In [None]:
# View your dataset by converting to pandas
dataset.take(10).to_pandas_dataframe()

In [None]:
# Discover the mean Age by Passenger Gender
dfRaw = dataset.to_pandas_dataframe()
round(dfRaw.groupby(['Sex'])['Age'].mean())

In [None]:
# Replace null values with the mean Age by Passenger Gender
dfRaw['Age'] = dfRaw.apply(
    lambda row: 31 if np.isnan(row['Age'])\
    and row['Sex']=='male'\
    else (28 if np.isnan(row['Age'])\
          and row['Sex']=='female'\
    else row['Age']),axis=1)


In [None]:
# Bin the Age Column into 4 Age Bins
dfRaw['BinUnder15'] = np.where(dfRaw.Age < 15,1,0)
dfRaw['Bin15to34'] = np.where((dfRaw.Age>14)\
                              & (dfRaw.Age < 35),1,0)
dfRaw['Bin35to60'] = np.where((dfRaw.Age>34)\
                              & (dfRaw.Age < 61),1,0)
dfRaw['BinOver60'] = np.where(dfRaw.Age > 60,1,0)

In [None]:
# Drop the Age Column after binning it
df = dfRaw.drop(['Age'],axis=1)

In [None]:
# Register the Transformed Titanic Data to your Datastore
Dataset.Tabular.register_pandas_dataframe(df, datastore,
                                          "Titanic Transformed")

In [None]:
# Retrieve your Transformed Titanic Dataset
dataset_name = "Titanic Transformed"
dataset = Dataset.get_by_name(ws, dataset_name, version='latest')

In [None]:
# Create a name for your AutoML Titanic Classification Experiment
experiment_name = 'Titanic-Transformed-Classification'

# Set the Experiment to your Workspace
exp = Experiment(workspace=ws, name=experiment_name)

In [None]:
# Set variables for your AutoML Run
target_column = 'Survived'
task = 'classification'
primary_metric = 'accuracy'
featurization = 'auto'

In [None]:
# Set the number of unique classes in your Target Column
num_classes = df[target_column].nunique() 

In [None]:
# Set your AutoML Configuration
config = AutoMLConfig(task=task,
                     primary_metric=primary_metric,
                     num_classes=num_classes,
                     featurization=featurization,
                     compute_target=compute_target,
                     training_data=dataset,
                     label_column_name=target_column,
                     experiment_timeout_minutes=15,
                     enable_early_stopping=True,
                     n_cross_validations=5,
                     model_explainability=True,
                     enable_stack_ensemble=True,
                     enable_voting_ensemble=True)

In [None]:
# Train your AutoML Titanic Classification Model
AutoML_run = exp.submit(config, show_output = True)
RunDetails(AutoML_run).show()

In [None]:
# Register your Trained AutoML Model to your AMLS Workspace
description = 'Best AutoML Classification Run using Transformed Titanic Data.' 
tags = {'project' : "Titanic", "creator" : "your name"} 
model_name = 'Titanic-Transformed-Classification-AutoML' 

AutoML_run.register_model(model_name=model_name,description=description,tags=tags)

In [None]:
# Register your Trained AutoML Model to your AMLS Workspace using a different metric instead of accuracy
description = 'Best AutoML Classification Run using Transformed Titanic Data.' 
tags = {'project' : "Titanic", "creator" : "your name", "metric" : "Norm Macro Recall"} 
model_name = 'Titanic-Transformed-Classification-AutoML-NMR' 
AutoML_run.register_model(model_name=model_name, description=description, tags=tags, metric = 'norm_macro_recall')
