<a href="https://colab.research.google.com/github/Jushef/Azure-AutoML/blob/main/AutoML%20Jupyter%20Notebook/Titanic_Classification_AutoML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cargar librerías Azure
from azureml.core import Workspace, Dataset, Datastore, Experiment
from azureml.core.compute import ComputeTarget
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun
from azureml.widgets import RunDetails

# Cargar bibliotecas que no son de Azure
import pandas as pd
import numpy as np

In [None]:
# Compruebe la versión base de AzureMl
import azureml.core
print("You are currently using version " + azureml.core.VERSION + " of the Azure ML SDK")

You are currently using version 1.37.0 of the Azure ML SDK


In [None]:
# Recupere su espacio de trabajo 
ws = Workspace.from_config()

In [None]:
# Recuperar el objetivo informático para ejecutar AutoML de forma remota
compute_name = 'automl-compute'
compute_target = ComputeTarget(ws, compute_name)

In [None]:
# Recupere su Datastore por nombre rellenando los valores en minúsculas entre comillas dobles
datastore = Datastore.get_default(ws)
my_datastore_name = 'workspaceblobstore'
my_datastore = Datastore.get(ws, my_datastore_name)

In [None]:
# Si ya ha registrado su conjunto de datos, recupérelo
dataset_name = "Titanic Training Data"
dataset = Dataset.get_by_name(ws, dataset_name, version='latest')

In [None]:
# Vea su conjunto de datos convirtiéndolo en pandas
dataset.take(10).to_pandas_dataframe()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,False,3,male,22.0,1,0,A/5 21171,7.25,S
1,True,1,female,38.0,1,0,PC 17599,71.2833,C
2,True,3,female,26.0,0,0,STON/O2. 3101282,7.925,S
3,True,1,female,35.0,1,0,113803,53.1,S
4,False,3,male,35.0,0,0,373450,8.05,S
5,False,3,male,,0,0,330877,8.4583,Q
6,False,1,male,54.0,0,0,17463,51.8625,S
7,False,3,male,2.0,3,1,349909,21.075,S
8,True,3,female,27.0,0,2,347742,11.1333,S
9,True,2,female,14.0,1,0,237736,30.0708,C


In [None]:
# Descubra la edad media por género del pasajero
dfRaw = dataset.to_pandas_dataframe()
round(dfRaw.groupby(['Sex'])['Age'].mean())

Sex
female    28.0
male      31.0
Name: Age, dtype: float64

In [None]:
# Sustituir los valores nulos por la media de la edad por género del pasajero
dfRaw['Age'] = dfRaw.apply(
    lambda row: 31 if np.isnan(row['Age'])\
    and row['Sex']=='male'\
    else (28 if np.isnan(row['Age'])\
          and row['Sex']=='female'\
    else row['Age']),axis=1)

In [None]:
# Colocar la columna de edad en 4 franjas de edad
dfRaw['BinUnder15'] = np.where(dfRaw.Age < 15,1,0)
dfRaw['Bin15to34'] = np.where((dfRaw.Age>14)\
                              & (dfRaw.Age < 35),1,0)
dfRaw['Bin35to60'] = np.where((dfRaw.Age>34)\
                              & (dfRaw.Age < 61),1,0)
dfRaw['BinOver60'] = np.where(dfRaw.Age > 60,1,0)

In [None]:
# Borra la columna de la edad después de haberla clasificado
df = dfRaw.drop(['Age'],axis=1)

In [None]:
# Registre los datos transformados de Titanic en su almacén de datos
Dataset.Tabular.register_pandas_dataframe(df, datastore,
                                          "Titanic Transformed")

Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/d11c06a5-e00e-4820-8d58-1c8ba5571442/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


{
  "source": [
    "('workspaceblobstore', 'managed-dataset/d11c06a5-e00e-4820-8d58-1c8ba5571442/')"
  ],
  "definition": [
    "GetDatastoreFiles",
    "ReadParquetFile",
    "DropColumns"
  ],
  "registration": {
    "id": "efd849a2-b905-4b52-8495-0c62406f3c00",
    "name": "Titanic Transformed",
    "version": 1,
    "workspace": "Workspace.create(name='automl-example-workspace', subscription_id='0aad4149-65b5-42c8-9b25-ce17f2fed264', resource_group='auto-ml-example-resource-group')"
  }
}

In [None]:
#  Recuperar el conjunto de datos transformados de Titanic
dataset_name = "Titanic Transformed"
dataset = Dataset.get_by_name(ws, dataset_name, version='latest')

In [None]:
# Cree un nombre para su experimento AutoML de clasificación del Titanic
experiment_name = 'Titanic-Transformed-Classification'

# Establezca el experimento en su espacio de trabajo
exp = Experiment(workspace=ws, name=experiment_name)

In [None]:
# Establezca las variables para su ejecución de AutoML
target_column = 'Survived'
task = 'classification'
primary_metric = 'accuracy'
featurization = 'auto'

In [None]:
# Establezca el número de clases únicas en su columna de destino
num_classes = df[target_column].nunique() 

In [None]:
# Establezca su configuración AutoML
config = AutoMLConfig(task=task,
                     primary_metric=primary_metric,
                     num_classes=num_classes,
                     featurization=featurization,
                     compute_target=compute_target,
                     training_data=dataset,
                     label_column_name=target_column,
                     experiment_timeout_minutes=15,
                     enable_early_stopping=True,
                     n_cross_validations=5,
                     model_explainability=True,
                     enable_stack_ensemble=True,
                     enable_voting_ensemble=True)

In [None]:
# Entrene su modelo de clasificación AutoML Titanic
AutoML_run = exp.submit(config, show_output = True)
RunDetails(AutoML_run).show()

Submitting remote run.
No run_configuration provided, running on automl-compute with default configuration
Running on remote compute: automl-compute


Experiment,Id,Type,Status,Details Page,Docs Page
Titanic-Transformed-Classification,AutoML_cc29e750-576b-419e-b29f-01ef0d30de52,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

********************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       DONE
DESCRIPTION:  If the missing values are expected, let the run complete. Otherwise cancel the current run and use a script to customize the handling of missing featu

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [None]:
# Registre su modelo AutoML entrenado en su espacio de trabajo AMLS
description = 'Best AutoML Classification Run using Transformed Titanic Data.' 
tags = {'project' : "Titanic", "creator" : "your name"} 
model_name = 'Titanic-Transformed-Classification-AutoML' 

AutoML_run.register_model(model_name=model_name,description=description,tags=tags)

Model(workspace=Workspace.create(name='automl-example-workspace', subscription_id='0aad4149-65b5-42c8-9b25-ce17f2fed264', resource_group='auto-ml-example-resource-group'), name=Titanic-Transformed-Classification-AutoML, id=Titanic-Transformed-Classification-AutoML:1, version=1, tags={'project': 'Titanic', 'creator': 'your name'}, properties={})

In [None]:
# Registre su modelo AutoML entrenado en su espacio de trabajo AMLS utilizando una métrica diferente en lugar de la precisión
description = 'Best AutoML Classification Run using Transformed Titanic Data.' 
tags = {'project' : "Titanic", "creator" : "your name", "metric" : "Norm Macro Recall"} 
model_name = 'Titanic-Transformed-Classification-AutoML-NMR' 
AutoML_run.register_model(model_name=model_name, description=description, tags=tags, metric = 'norm_macro_recall')

Model(workspace=Workspace.create(name='automl-example-workspace', subscription_id='0aad4149-65b5-42c8-9b25-ce17f2fed264', resource_group='auto-ml-example-resource-group'), name=Titanic-Transformed-Classification-AutoML-NMR, id=Titanic-Transformed-Classification-AutoML-NMR:1, version=1, tags={'project': 'Titanic', 'creator': 'your name', 'metric': 'Norm Macro Recall'}, properties={})