# Auto ML Telecom Churn

In [1]:
import azureml.core
from azureml.core import Workspace, Dataset

ws = Workspace.from_config()

In [9]:
import pandas as pd

telcom_churn_data = pd.read_csv("telcom_customer_data.csv")


# The default datastore is a blob storage container where datasets are stored
datastore = ws.get_default_datastore()

# Register the dataset
telecom_churn_ds = Dataset.Tabular.register_pandas_dataframe(
        dataframe=telcom_churn_data, 
        name='telecom_churn_ds', 
        target=datastore
    )


Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/17bafadd-ed36-440d-86ac-a787f123d646/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


## Crear cluster de Inferencia

In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "my-ml-cluster"

try:
    # Check for existing compute target
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=4)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


## Crear Configuración de Experimento Auto ML

In [None]:
!pip install --upgrade azureml-sdk[automl]

In [10]:
from azureml.train.automl import AutoMLConfig

automl_config = AutoMLConfig(name='Telecom Churn Clasificacion',
                             task='classification',
                             compute_target=training_cluster,
                             training_data = telecom_churn_ds,
                             label_column_name='churn',
                             experiment_timeout_hours= 4,
                             enable_early_stopping= True,
                             n_cross_validations= 3,
                             num_classes= 2,
                             primary_metric = 'AUC_weighted',
                             max_concurrent_iterations=4,
                             featurization='auto'
                             )

## Correr Experimento

In [11]:
from azureml.core.experiment import Experiment

automl_experiment = Experiment(ws, 'Telecom-Churn-Clasificacion-Experimento')
automl_run = automl_experiment.submit(automl_config)
automl_run.wait_for_completion(show_output=True)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
Telecom-Churn-Clasificacion-Experimento,AutoML_1a81f24e-234a-4a0c-ab39-0c288de6304b,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


Experiment,Id,Type,Status,Details Page,Docs Page
Telecom-Churn-Clasificacion-Experimento,AutoML_1a81f24e-234a-4a0c-ab39-0c288de6304b,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: DatasetEvaluation. Gathering dataset statistics.
Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

********************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       DONE
DESCRIPTION:  If the missing values are expected, let the run complete. Otherwise cancel the curren

{'runId': 'AutoML_1a81f24e-234a-4a0c-ab39-0c288de6304b',
 'target': 'my-ml-cluster',
 'status': 'Completed',
 'startTimeUtc': '2023-02-18T15:26:35.509684Z',
 'endTimeUtc': '2023-02-18T16:15:33.939297Z',
 'services': {},
   'message': 'No scores improved over last 10 iterations, so experiment stopped early. This early stopping behavior can be disabled by setting enable_early_stopping = False in AutoMLConfig for notebook/python SDK runs.'}],
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'AUC_weighted',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '3',
  'target': 'my-ml-cluster',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"10feb94d-5b0f-4a95-a884-79782e381fd8\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-w

## Obtener Mejor Modelo

In [12]:
best_run, fitted_model = automl_run.get_output()
print(best_run)

print('\nBest Run Metrics:')
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)

Run(Experiment: Telecom-Churn-Clasificacion-Experimento,
Id: AutoML_1a81f24e-234a-4a0c-ab39-0c288de6304b_37,
Type: azureml.scriptrun,
Status: Completed)

Best Run Metrics:
AUC_micro 0.697560671643788
recall_score_macro 0.6429883410762379
precision_score_macro 0.6430264691638545
average_precision_score_weighted 0.6897595691066308
AUC_weighted 0.6974576102619424
precision_score_weighted 0.6430938587661021
average_precision_score_macro 0.6896895390676617
log_loss 0.6295686587346672
recall_score_micro 0.6429099681289007
precision_score_micro 0.6429099681289007
weighted_accuracy 0.6428316240578211
balanced_accuracy 0.6429883410762379
f1_score_micro 0.6429099681289007
matthews_correlation 0.2860148055624767
accuracy 0.6429099681289007
f1_score_weighted 0.6428857209438844
AUC_macro 0.6974576102619424
norm_macro_recall 0.28597668215247585
f1_score_macro 0.6428912249989417
average_precision_score_micro 0.6899481238798982
recall_score_weighted 0.6429099681289007
confusion_matrix aml://artifactId

## Registrar Mejor Modelo

In [13]:
best_run.register_model(model_path='outputs/model.pkl', model_name='automl_telecom_churn',
                        tags={'Training context':'Auto ML'},
                        properties={'AUC': best_run_metrics['AUC_weighted'], 'Accuracy': best_run_metrics['accuracy']})



Model(workspace=Workspace.create(name='telecom_churn_azure', subscription_id='030feb6f-715f-420c-90a9-4d556309931c', resource_group='telecom_churn_azure'), name=automl_wine_classification_model, id=automl_wine_classification_model:1, version=1, tags={'Training context': 'Auto ML'}, properties={'AUC': '0.6974576102619424', 'Accuracy': '0.6429099681289007'})

## Eliminar Cluster de Inferencia

In [14]:
training_cluster.delete()