[Reference](https://blog.devops.dev/deploying-an-ml-model-is-easy-with-mlflow-and-aws-sagemaker-step-by-step-tutorial-d2bde5701d04)

In [1]:
pip install mlflow

Collecting mlflow
  Downloading mlflow-2.7.1-py3-none-any.whl (18.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.5/18.5 MB[0m [31m57.0 MB/s[0m eta [36m0:00:00[0m
Collecting databricks-cli<1,>=0.8.7 (from mlflow)
  Downloading databricks_cli-0.18.0-py2.py3-none-any.whl (150 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.3/150.3 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
Collecting gitpython<4,>=2.1.0 (from mlflow)
  Downloading GitPython-3.1.38-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.12.0-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.0/226.0 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker<7,>=4.0.0 (from mlflow)
  Downloading docker-6.1.3-py3-none-any.whl (148 kB)
[2K     [90m━━━━━━

In [2]:
import mlflow
import mlflow.sklearn

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
# define dataset
X, y = make_classification(n_samples=50000, n_features=3, n_informative=3, n_redundant=0, n_classes=2, random_state=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

mlflow.set_experiment('classification_model')

with mlflow.start_run(run_name='My model experiment') as run:

    # add parameters for tuning
    c = 0.1
    solver = 'liblinear'

    # Log parameters to MLFlow
    mlflow.log_param('c', c)
    mlflow.log_param('solver', solver)

    # train the model
    lr = LogisticRegression(C = c, solver = solver)
    lr.fit(X_train, y_train)
    predictions = lr.predict(X_test)

    # Log the model to MLFlow
    mlflow.sklearn.log_model(lr, 'logistic-regression-model')

    # log model performance to MLFlow
    mse = mean_squared_error(y_test, predictions)
    mlflow.log_metric('mse', mse)
    print('mse: %f' % mse)

2023/10/17 10:18:03 INFO mlflow.tracking.fluent: Experiment with name 'classification_model' does not exist. Creating a new experiment.


mse: 0.106600




In [4]:
# !mlflow ui

In [5]:
# !pip install awscli
# !pip install boto3

In [6]:
# from mlflow.deployments import get_deploy_client

# region = 'us-east-2'
# # You can run "aws sts get-caller-identity" to get your AWS ID
# aws_id = '243729631277'
# # Use ARN from the role we created in AWS with the full permission to Sagemaker
# arn = 'arn:aws:iam::*******(*******:user/sagemaker_deploy_user'
# #Create your name of the future application
# app_name = 'OK_model_application'
# # you may find model uri in "mlflow ui" recorded as "logged_model"
# model_uri = f'runs:/47f04d7e6dfc41658e7b7472a501f183/classification_model'
# # tag_id is equal to the mlflow version. It is used in Docker url
# tag_id = '2.5.0'


# image_url = aws_id + '.dkr.ecr.' + region + '.amazonaws.com/mlflow-pyfunc:' + tag_id


# config = dict(
#     execution_role_arn=arn,
#     bucket_name="New-s3-bucket",
#     image_url=image_url,
#     region_name="us-east-2",
#     archive=False,
#     instance_type="ml.t2.medium",
#     instance_count=1,
#     synchronous=True,
#     timeout_seconds=3600,
#     variant_name="prod-variant-1",
#     tags={"training_timestamp": "2023-08-31"},
# )

# client = get_deploy_client("sagemaker")

# client.create_deployment(
#     #you can use any name you want to see in Sagemaker"
#     "my-deployment-attemp",
#     model_uri=model_uri,
#     flavor="python_function",
#     config=config,
# )

In [7]:
import pandas as pd
import numpy as np
import json
import boto3

global app_name
global region
app_name = 'my-deployment-logisticRegression'
region = 'us-east-2'

def check_status(app_name):
    sage_client = boto3.client('sagemaker', region_name=region)
    endpoint_description = sage_client.describe_endpoint(EndpointName=app_name)
    endpoint_status = endpoint_description['EndpointStatus']
    return endpoint_status

def query_endpoint(app_name, input_json):
    client = boto3.session.Session().client('sagemaker-runtime', region)

    response = client.invoke_endpoint(
        EndpointName = app_name,
        Body = input_json,
        ContentType = 'application/json'#'; format=pandas-split',
        )

    preds = response['Body'].read().decode('ascii')
    preds = json.loads(preds)
    print('Received response: {}'.format(preds))
    return preds

# Check endpoint status
print('Application status is {}'.format(check_status(app_name)))

#Let's create a test array that we'll use to test our model
arr_predict = np.random.randn(2,3)

# Create test data and make inference from endpoint
query_input = pd.DataFrame(arr_predict).to_dict(orient='split')
print(query_input)

data = {"dataframe_split": query_input}

byte_data = json.dumps(data).encode('utf-8')

predictions = query_endpoint(app_name=app_name, input_json=byte_data)
print(predictions)