In [None]:
# Install packages

!pip install pycaret==2.3.6
!pip install scikit-optimize

In [None]:
# Connection to workspaces

from azureml.core import Workspace

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

In [None]:
# Get a named datastore from the current workspace
from azureml.core import Datastore
datastore = Datastore.get(ws, datastore_name='mydatastore')

In [None]:
# Get data

datastore = ws.get_default_datastore()

from azureml.core import Dataset
train_set = Dataset.Tabular.from_delimited_files(path = [(datastore, 'trainset/03-11-2022_050400_UTC/trainset_9000.csv')])
test_set = Dataset.Tabular.from_delimited_files(path = [(datastore, 'testset/03-11-2022_050451_UTC/testset_1000.csv')])

In [None]:
# Tabular dataset to pandas dataframe

import pandas as pd
train_set = train_set.to_pandas_dataframe()
test_set = test_set.to_pandas_dataframe()

**Setting up Environment in PyCaret**

The setup() function initializes the environment in pycaret and creates the transformation pipeline to prepare the data for modeling and deployment. setup() must be called before executing any other function in pycaret. It takes two mandatory parameters: a pandas dataframe and the name of the target column. All other parameters are optional and are used to customize the pre-processing pipeline 

In [None]:
# Setting up PyCaret environment
from pycaret.classification import *

setup_clf = setup(data = train_set
            , train_size = 0.95
            , target = 'Diabetic'
            , session_id=123
            , normalize=True
            , transformation=True
            , ignore_low_variance=True
            , remove_multicollinearity=True
            , multicollinearity_threshold=0.95
            , ignore_features=['PatientID']
            , preprocess = True
            , trigonometry_features=True
            , feature_selection = True
            , fold_strategy = 'kfold'
            , fold = 10
            , fold_shuffle = True
            , silent = True
            , log_experiment = True
            , experiment_name = 'diabetic prediction')

**Compare Models** 

Once the setup is executed, we can use compare_models to briefly evaluate the performance of all the models in the model library of PyCaret. This function train all the models available in the model library. The output prints a score grid with Accuracy, AUC, Recall, Precision, F1, Kappa, and MCC (averaged accross folds), determined by fold parameter.

In [None]:
# Compare all models
compare_models(sort='Accuracy', cross_validation=True)

**Create Model**

The next step is to create a model with selected algorithm using create_model function. 

In [None]:
# Create model lightgbm

lightgbm = create_model('lightgbm')

**Tune a Model**

When a model is created using the create_model() function it uses the default hyperparameters. In order to tune hyperparameters, the tune_model() function is used.

In [None]:
# Tuning lightgbm

tuned_lightgbm = tune_model(lightgbm, optimize='Accuracy', n_iter= 100
    , search_library='scikit-optimize'
    , search_algorithm='bayesian')

**Plot a Model**

Before model finalization, the plot_model() function can be used to analyze the performance across different aspects such as Residuals Plot, Prediction Error, feature importance etc. This function takes a trained model object and returns a plot based on the test / hold-out set.


In [None]:
plot_model(tuned_lightgbm)

In [None]:
plot_model(tuned_lightgbm, plot = 'pr')

In [None]:
plot_model(tuned_lightgbm, plot = 'confusion_matrix')

In [None]:
plot_model(tuned_lightgbm, plot="learning")

In [None]:
plot_model(tuned_lightgbm,plot="feature")

In [None]:
plot_model(tuned_lightgbm,plot="class_report")

**Predict on test / hold-out Sample**

The test consists of remaining 0.05 of data based on train_size defined on the setup.

Now, using our final trained model stored in the tuned_lightgbm variable we will predict the hold-out sample and evaluate the metrics to see if they are materially different than the CV results.

In [None]:
# Make predictions on the hold-out Sample

predict_model(tuned_lightgbm)

**Finalize Model for Deployment**

Model finalization is the last step in the experiment. 

A normal machine learning workflow in PyCaret starts with setup(), followed by comparing all models using compare_models() and shortlisting a few candidate models (based on the metric of interest) to perform several modeling techniques such as hyperparameter tuning, ensembling, stacking etc. 
This workflow will eventually lead you to the best model for use in making predictions on new and unseen data. 
The finalize_model() function fits the model onto the complete dataset including the test/hold-out sample (5% in this case). 
The purpose of this function is to train the model on the complete dataset before it is deployed in production.

In [None]:
# Finalize model

final_lightgbm = finalize_model(tuned_lightgbm)

In [None]:
#Final Light Gradient Boosting Machine parameters for deployment

print(final_lightgbm)

**caution**

Once the model is finalized using finalize_model(), the entire dataset including the test/hold-out set is used for training. 

As such, if the model is used for predictions on the hold-out set after finalize_model() is used, the information grid printed will be misleading as you are trying to predict on the same data that was used for modeling.

In [None]:
predict_model(final_lightgbm);

**Predict on unseen data**

The predict_model() function is also used to predict on the unseen dataset.

In [None]:
predict_model(final_lightgbm, data=test_set)

**Saving the model**

In [None]:
# Save model, save pkl file local

save_model(final_lightgbm,'lgbm')

**Save model on azure blob (deploy finalized model), and load model to get prediction**

In [None]:
# Model deploying on azure blob storage ---->>>> deploy finalized model <<<<----
from pycaret.classification import deploy_model
from pycaret.classification import load_model 

import os
connect_str = 'DefaultEndpointsProtocol=https;AccountName=mlproject2569211567;AccountKey=sBa6OSrJ8SoIchLyqjChB4YOWX7+R2z9zcynGTy6ZO9KrX4PB2jRDV+0znxUVL/tLJHA9RJM5RHgzyuyJFYUHQ==;EndpointSuffix=core.windows.net'
os.environ['AZURE_STORAGE_CONNECTION_STRING'] = connect_str

from pycaret.classification import load_model
deploy_model(model = final_lightgbm, model_name = 'lgbm', platform = 'azure', authentication = {'container' : 'models'})

In [None]:
# Load model to get prediction on new data
import os
os.environ['AZURE_STORAGE_CONNECTION_STRING'] = connect_str

from pycaret.classification import load_model
loaded_model = load_model(model_name = 'lgbm', platform = 'azure', authentication = {'container' : 'models'})

In [None]:
from pycaret.classification import predict_model
predictions = predict_model(loaded_model, data = test_set)
predict_model(loaded_model)

**Deploy on Azure as Web Service**

In [None]:
# Model registration on azure ml studio
import sklearn

from azureml.core import Workspace
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

ws = Workspace.from_config()

model = Model.register(workspace=ws,
                       model_name='lgbm',                               # Name of the registered model in your workspace.
                       model_path='lgbm.pkl',                           # Local file to upload and register as a model.
                       model_framework=Model.Framework.SCIKITLEARN,     # Framework used to create the model.
                       model_framework_version=sklearn.__version__,     # Version of scikit-learn used to create the model.
                       #sample_input_dataset=X_train,
                       #sample_output_dataset=y_train,
                       #description='Decision tree model to predict diabetes progression.',
                       tags={'area': 'classification', 'type': 'classification'}
                       )

print('Name:', model.name)
print('Version:', model.version)

In [None]:
%%writefile lgbmscore.py

# Create scoring srcript

import json
import pickle
import numpy as np
import pandas as pd
import os
import joblib
from azureml.core.model import Model

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType


def init():
    global model
    # Replace filename if needed.
    path = os.getenv('AZUREML_MODEL_DIR') 
    model_path = os.path.join(path, 'lgbm.pkl')
    # Deserialize the model file back into a sklearn model.
    model = joblib.load(model_path)


input_sample =pd.DataFrame({"PatientID": pd.Series([0], dtype="int64"), "Pregnancies": pd.Series([0], dtype="int64")
, "PlasmaGlucose": pd.Series([0], dtype="int64"), "DiastolicBloodPressure": pd.Series([0], dtype="int64")
, "TricepsThickness": pd.Series([0], dtype="int64"), "SerumInsulin": pd.Series([0], dtype="int64")
, "BMI": pd.Series([0.0], dtype="float64"), "DiabetesPedigree": pd.Series([0.0], dtype="float64"), "Age": pd.Series([0], dtype="int64")}) 


# This is an integer type sample. Use the data type that reflects the expected result.
output_sample = np.array([0])

# To indicate that we support a variable length of data input,
# set enforce_shape=False
@input_schema('data', PandasParameterType(input_sample))
@output_schema(NumpyParameterType(output_sample))
def run(data):
    try:
        print("input_data....")
        print(data.columns)
        print(type(data))
        result = model.predict(data)
        print("result.....")
        print(result)
    # You can return any data type, as long as it can be serialized by JSON.
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error

In [None]:
# Create environment configuration for inference

from azureml.core.model import InferenceConfig
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

environment = Environment('my-env')
environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[
    'azureml-defaults',
    'inference-schema[numpy-support]',
    'joblib',
    'numpy',
    'pandas',
    'pycaret',
    'scikit-learn=={}'.format(sklearn.__version__)
])

inference_config = InferenceConfig(entry_script='./lgbmscore.py',environment=environment)

In [None]:
# Model deploying on azure
from pycaret.classification import deploy_model
from pycaret.classification import load_model 

service_name = 'lgbm-cls'

service = Model.deploy(ws, service_name, [model], inference_config, overwrite=True)
service.wait_for_deployment(show_output=True)