# Model Evaluation & Artifacts Generation
## Evaluate model metrics & Create artiacts for trained models  

In [2]:
import os
import sagemaker
import logging
import boto3
import time
import pandas as pd
import json
import botocore
from botocore.exceptions import ClientError


# ========================== low-level service client of the boto3 session ==========================
config = botocore.config.Config(user_agent_extra='bedissj-1699438736259')


sm = boto3.client(service_name='sagemaker', 
                  config=config)

sm_runtime = boto3.client('sagemaker-runtime',
                          config=config)

sess = sagemaker.Session(sagemaker_client=sm,
                         sagemaker_runtime_client=sm_runtime)

bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = sess.boto_region_name


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


## 1. Evaluate & Register custom `SKLearn` Models

![Model_evaluation](./img/aws_model_evaluation.png)


### 1.1 Configure model evaluation as a SageMaker Processing Job

#### 1.1.1 Configure Processing Parameters

In [3]:
# ========================== Processing parameters ==========================
FRAMEWORK_VERSION = '1.2-1'
instance_type = 'ml.t3.medium'
instance_count = 1

model_s3_uri = 's3://{}/sagemaker-scikit-lea-240604-1642-002-59cbda6b/output/'.format(bucket)
test_data_s3_uri = 's3://{}/sagemaker-scikit-learn-2024-03-06-21-05-51-569/output/bank-churn-test/'.format(bucket)


#### 1.1.2 Configure Pocessing Inputs & Outputs

In [4]:
from sagemaker.processing import ProcessingInput, ProcessingOutput


input_model = '/opt/ml/processing/input/model'
input_data = '/opt/ml/processing/input/data'
output_data = '/opt/ml/processing/output'


# ========================== Processing inputs ==========================
processing_inputs = [
    #### Trained model ####
    ProcessingInput(input_name= 'model',
                    source=model_s3_uri,
                    destination=input_model),
    
    #### Test subset ####
    ProcessingInput(input_name='testSubset',
                    source=test_data_s3_uri,
                    destination=input_data)
]


# ========================== Processing outputs ==========================
processing_outputs = [
    ProcessingOutput(output_name='metrics',
                     source=output_data,
                     s3_upload_mode='EndOfJob')

]


# ========================== Processing arguments ==========================
processing_args=[
    '--input-model', str(input_model),
    '--input-data', str(input_data),
    '--output-data', str(output_data)
]


#### 1.1.3 Instanciate `SKLearn` Processor

In [5]:
from sagemaker.sklearn.processing import SKLearnProcessor


evaluation_processor = SKLearnProcessor(
    framework_version=FRAMEWORK_VERSION,
    instance_type=instance_type,
    instance_count=instance_count,
    env={'AWS_DEFAULT_REGION': region},
    role=role,
)


#### 1.1.4 Run processor to perform model evaluation

In [6]:
evaluation_processor.run(
    code='src/evaluate_metrics.py',
    inputs=processing_inputs,
    outputs=processing_outputs,
    arguments=processing_args,
    wait=False
)


INFO:sagemaker:Creating processing-job with name sagemaker-scikit-learn-2024-06-20-08-01-15-372


#### 1.1.5 Review Processing Job

In [15]:
from pprint import pprint


pprint(evaluation_processor.latest_job.describe(), compact=True)

{'AppSpecification': {'ContainerArguments': ['--input-model',
                                             '/opt/ml/processing/input/model',
                                             '--input-data',
                                             '/opt/ml/processing/input/data',
                                             '--output-data',
                                             '/opt/ml/processing/output'],
                      'ContainerEntrypoint': ['python3',
                                              '/opt/ml/processing/input/code/evaluate_metrics.py'],
                      'ImageUri': '659782779980.dkr.ecr.eu-west-3.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3'},
 'CreationTime': datetime.datetime(2024, 6, 20, 8, 1, 15, 805000, tzinfo=tzlocal()),
 'Environment': {'AWS_DEFAULT_REGION': 'eu-west-3'},
 'LastModifiedTime': datetime.datetime(2024, 6, 20, 8, 17, 30, 820000, tzinfo=tzlocal()),
 'ProcessingEndTime': datetime.datetime(2024, 6, 20, 8, 17, 30, 437000, tzinfo

#### 1.1.6 Review model metrics

In [16]:
fig_name = 'confusion_roc_auc.png'
dic_name = 'evaluation.json'


metrics_s3_uri = evaluation_processor.latest_job.outputs[0].destination
metrics_jpg_s3_uri = os.path.join(metrics_s3_uri, fig_name)
metrics_dic_s3_uri = os.path.join(metrics_s3_uri, dic_name)


print(metrics_jpg_s3_uri)
print(metrics_dic_s3_uri)

s3://sagemaker-eu-west-3-668303144976/sagemaker-scikit-learn-2024-06-20-08-01-15-372/output/metrics/confusion_roc_auc.png
s3://sagemaker-eu-west-3-668303144976/sagemaker-scikit-learn-2024-06-20-08-01-15-372/output/metrics/evaluation.json


In [20]:
!aws s3 cp 's3://sagemaker-eu-west-3-668303144976/sagemaker-scikit-learn-2024-06-20-08-01-15-372/output/metrics/metrics/confusion_roc_auc.png' './metrics/confusion.jpg'

download: s3://sagemaker-eu-west-3-668303144976/sagemaker-scikit-learn-2024-06-20-08-01-15-372/output/metrics/metrics/confusion_roc_auc.png to metrics/confusion.jpg


In [21]:
!aws s3 cp 's3://sagemaker-eu-west-3-668303144976/sagemaker-scikit-learn-2024-06-20-08-01-15-372/output/metrics/metrics/confusion_roc_auc.png' './metrics/eval.json'

download: s3://sagemaker-eu-west-3-668303144976/sagemaker-scikit-learn-2024-06-20-08-01-15-372/output/metrics/metrics/confusion_roc_auc.png to metrics/eval.json


![Model_evaluation](./metrics/confusion.jpg)


In [None]:
from pprint import pprint

dic_corrected_s3_uri = "s3://sagemaker-eu-west-3-668303144976/sagemaker-scikit-learn-2024-06-20-08-01-15-372/output/metrics/metrics/evaluation.json"
evaluation_json = sagemaker.s3.S3Downloader.read_file(dic_corrected_s3_uri)


pprint(json.loads(evaluation_json))

{'metrics': {'accuracy': {'value': 1.0}, 'f1_score': {'value': 1.0}}}


### 1.2 Register model to prepare for inference

![Model_evaluation](./img/aws_model_hosting.png)


In [112]:
from sagemaker.workflow.parameters import ParameterString, ParameterInteger


model_approval_status = ParameterString(
    name='ApprovalStatus',
    default_value='PendingManualApproval'
)


deploy_instance_type = ParameterString(
    name='DeployInstanceType',
    default_value='ml.m5.large'
)


deploy_instance_count = ParameterInteger(
    name='DeployInstanceCount',
    default_value=1
)


In [113]:
import time
 
    
model_package_group_name = f'Bank-Churn-prediction-{int(time.time())}'
print(model_package_group_name)


Bank-Churn-prediction-1718877096


In [114]:
from sagemaker.model_metrics import ModelMetrics, MetricsSource


model_metrics = ModelMetrics(
    model_statistics=MetricsSource(
        s3_uri="s3://sagemaker-eu-west-3-668303144976/sagemaker-scikit-learn-2024-06-20-08-01-15-372/output/metrics/metrics/evaluation.json",
        content_type="text/csv"
    )
)


print(model_metrics.model_statistics)

<sagemaker.model_metrics.MetricsSource object at 0x7f44c1fcfd90>


In [115]:
from sagemaker.sklearn.model import SKLearnModel


PY_VERSION= 'py3'

sklearn_model = SKLearnModel(
    entry_point='src/inference.py',
    model_data=model_s3_uri+'model.tar.gz',
    framework_version=FRAMEWORK_VERSION,
    py_version=PY_VERSION,
    role=role,  
) 

In [116]:
sklearn_model.register(
    content_types=["text/csv"],
    response_types=["text/csv"],
    #inference_instances=[deploy_instance_type],
    #transform_instances=[deploy_instance_type],
    model_package_group_name=model_package_group_name,
    model_metrics=model_metrics,
    #approval_status=model_approval_status
)

INFO:sagemaker.image_uris:Defaulting to only supported image scope: cpu.


<sagemaker.model.ModelPackage at 0x7f44c1fcfd60>

### 1.3 Create endpoint to perform inference

In [117]:
sklearn_model.create(
    instance_type=instance_type
)


INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2024-06-20-09-53-00-399


In [None]:
sklearn_model.deploy(
    initial_instance_count=instance_count,
    instance_type='ml.m5.large'
)


INFO:sagemaker:Creating model with name: sagemaker-scikit-learn-2024-06-20-10-13-02-301
INFO:sagemaker:Creating endpoint-config with name sagemaker-scikit-learn-2024-06-20-10-13-03-362
INFO:sagemaker:Creating endpoint with name sagemaker-scikit-learn-2024-06-20-10-13-03-362


--------------------

### Cleanup resources
___