# Deserialize Artifact and Load Into Memory

### The Mission

This is a **very basic** continuous validation test - which acts more as a playground for you to do miscellaneous testing - that you may append to an CI testing pipeline. 
Within this test model or pipeline binary (which has been uploaded to COS in the train models notebook [train_models.ipynb](https://github.com/IBM/MLOps-CPD/blob/master/train_models.ipynb)) is downloaded. 
We then try to deserialize it and confirm it is found properly in-memory by invoking its predict method on the head of the test data.

When using this asset to construct an MLOps pipeline for a more complex Machine Learning task, we suggest you test your model/pipeline more rigorously. 
To scratch the surface on what this could look like, you may test more methods of your model/pipeline object or maybe even set a footprint threshold for the binary's filesize or the memory footprint after deserialization.

Happy testing! 🥳

In [12]:
import os
import sys
import pickle
from ibm_watson_studio_pipelines import WSPipelines
from botocore.client import Config
from ibm_botocore.client import Config
import ibm_boto3

In [13]:
import credentials  # for local development

In [14]:
## Get Cloud API Key from Global Pipeline Parameters
CLOUD_API_KEY = os.getenv('cloud_api_key')

# At this point, the filename of the binary is hardcoded. 
# With increasing complexity of your MLOps flow, you may want to add the filename as a pipeline parameter.
FILENAME = "model_pipeline.pkl"

In [15]:
## Retrieve cos credentials from pipeline parameters
import json

# Get json from environment and convert to string
project_cos_credentials = json.loads(os.environ['project_cos_credentials'])
mlops_cos_credentials = json.loads(os.getenv('mlops_cos_credentials'))

## PROJECT COS 
AUTH_ENDPOINT = project_cos_credentials['AUTH_ENDPOINT']
ENDPOINT_URL = project_cos_credentials['ENDPOINT_URL']
API_KEY_COS = project_cos_credentials['API_KEY']
#BUCKET_PROJECT_COS = project_cos_credentials['BUCKET']

## MLOPS COS
ENDPOINT_URL_MLOPS = mlops_cos_credentials['ENDPOINT_URL']
API_KEY_MLOPS = mlops_cos_credentials['API_KEY']
CRN_MLOPS = mlops_cos_credentials['CRN']
BUCKET_MLOPS = mlops_cos_credentials['BUCKET']

### Download model pipeline and deserialize

In [37]:
# Reusable methods for COS operations and reading
def read_data_from_mlops_cos(key):
    def __iter__(self): return 0

    MLOPS_DATA_STORE_client = ibm_boto3.client(
        service_name='s3',
        ibm_api_key_id=API_KEY_MLOPS,
        ibm_service_instance_id=CRN_MLOPS,
        ibm_auth_endpoint=AUTH_ENDPOINT,
        config=Config(signature_version='oauth'),
        endpoint_url=ENDPOINT_URL_MLOPS)

    body = MLOPS_DATA_STORE_client.get_object(Bucket=BUCKET_MLOPS, Key=key)['Body']
    # add missing __iter__ method, so pandas accepts body as file-like object
    if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType(__iter__, body)

    gcf_df = pd.read_csv(body)
    return gcf_df


def download_file_cos(local_file_name, key):
    cos = ibm_boto3.client(service_name='s3',
                             ibm_api_key_id=API_KEY_MLOPS,
                             ibm_service_instance_id=CRN_MLOPS,
                             ibm_auth_endpoint=AUTH_ENDPOINT,
                             config=Config(signature_version='oauth'),
                             endpoint_url=ENDPOINT_URL_MLOPS)
    try:
        res = cos.download_file(Bucket=BUCKET_MLOPS, Key=key, Filename=local_file_name)
    except Exception as e:
        print(Exception, e)
    else:
        print('File Downloaded')


def load_model(key, filename):
    download_file_cos(key, filename)
    with open(filename, "rb") as f:
        print(sys.getsizeof(f))
        artifact = pickle.load(f)
    return artifact


def get_file_size_cos(local_file_name, key):
    cos = ibm_boto3.resource(service_name='s3',
                             ibm_api_key_id=API_KEY_MLOPS,
                             ibm_service_instance_id=CRN_MLOPS,
                             ibm_auth_endpoint=AUTH_ENDPOINT,
                             config=Config(signature_version='oauth'),
                             endpoint_url=ENDPOINT_URL_MLOPS)
    try:
       # size = cos.ObjectSummary(local_file_name, key).size ### this doesnt work for some reason
        size = cos.Bucket(BUCKET_MLOPS).Object(key).content_length
    except Exception as e:
        print(Exception, e)
    return size

In [38]:
import pandas as pd

# Download test_data from cos and take first five rows
test_data = read_data_from_mlops_cos('test_tfr.csv').head(5)

# Drop only 'Risk'
X_test = test_data.drop(list(test_data.columns)[-1:], axis=1)

In [39]:
# Import the corresponding module/classifier for the module to ensure successful deserialization and subsequent usability
from lightgbm import LGBMClassifier

# Download pickled model from Cloud Object Storage and deserialize
model_pipeline = load_model(FILENAME, FILENAME)

File Downloaded
4264


In [40]:
memory_footprint = get_file_size_cos(FILENAME, FILENAME)
binary_cos_footprint = sys.getsizeof(model_pipeline)
binary_deserializable = False

try:
    # Try to predict the output variable for five rows of test data
    model_pipeline.predict(X_test)
    binary_deserializable = True
except Exception as e:
    # Could make prediction. There might be a fault in model (de)serialization or in the model itself.
    print("Could not score. Possibly because model could not be loaded into memory?")
    print(e)

## Save your test results to pipeline

In [None]:
params = {}
params['deserializable'] = binary_deserializable
params['memory_footprint'] = memory_footprint
params['binary_cos_footprint'] = binary_cos_footprint

In [None]:
pipelines_client = WSPipelines.from_apikey(apikey=CLOUD_API_KEY)
pipelines_client.store_results(params)