<a href="https://colab.research.google.com/github/FranciscoOcampoPredictiva/azureml_course/blob/main/Lecture_4_Train_ML_Model_for_Webservice_Deployment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Step 1 - Installation and Setup


In [None]:
# Install azureml SDK package
! pip install -q azureml-sdk

In [None]:
# Importing the class
from azureml.core import Workspace, Dataset, Experiment

In [None]:
# Access the workspace from config file and creating a workspace object
ws = Workspace.from_config(path='/content/config.json')

Performing interactive authentication. Please follow the instructions on the terminal.




Interactive authentication successfully completed.


# Step 2 - Accessing the Input Dataset

In [None]:
input_dataset = Dataset.get_by_name(workspace=ws, name='Churn')

# Step 3 - Create an Experiment

In [None]:
experiment = Experiment(workspace=ws, name='Webservice-Experiment')

# Run the experiment
new_run = experiment.start_logging()

# Step 4 - ML Model Training

In [None]:
import pandas as pd

# Load the dataset
df = input_dataset.to_pandas_dataframe()

In [None]:
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [None]:
# Define X and Y
X = df.iloc[:, :-1]
Y = df.iloc[:, -1:]

In [None]:
# Encode the categorical variables
X = pd.get_dummies(X)

In [None]:
X.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,1,0,0,1,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,1,1,0
2,502,42,8,159660.8,3,1,0,113931.57,1,0,0,1,0
3,699,39,1,0.0,2,0,0,93826.63,1,0,0,1,0
4,850,43,2,125510.82,1,1,1,79084.1,0,0,1,1,0


In [None]:
train_dummy_cols = X.columns

In [None]:
train_dummy_cols

Index(['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Geography_France',
       'Geography_Germany', 'Geography_Spain', 'Gender_Female', 'Gender_Male'],
      dtype='object')

In [None]:
# Split the dataset
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0, stratify=Y)

In [None]:
# Build and Traing ML Model
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(random_state=0)
trained_model = classifier.fit(X_train, Y_train)

  after removing the cwd from sys.path.


In [None]:
# Predict the results
Y_pred = classifier.predict(X_test)

# probability score
Y_prob = classifier.predict_proba(X_test)[:, 1]

In [None]:
# Confusion matrix and accuracy score
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(Y_test, Y_pred)
score = accuracy_score(Y_test, Y_pred)

In [None]:
print(cm)
print(score)

[[2293   96]
 [ 341  270]]
0.8543333333333333


# Step 5 - Log the primary metric

In [None]:
new_run.log("accuracy", score)

# Step 6 - Saving the transformations and models

In [None]:
import joblib
model_file = './outputs/models.pkl'

joblib.dump(value=[train_dummy_cols, trained_model], filename=model_file)

['./outputs/models.pkl']

In [None]:
new_run.complete()

In [None]:
# Getting the run ID
list(experiment.get_runs())

[Run(Experiment: Webservice-Experiment,
 Id: 2263be5b-1103-49a4-ad4f-6a5c5b5bd461,
 Type: None,
 Status: Completed)]