# 1st variant: Load locally

In [26]:
import pickle
import joblib

In [27]:
fname = '../trained_models/model_6.pkl'
model_1 = joblib.load(open(fname, 'rb'))

In [28]:
model_1

Pipeline(steps=[('dtypeselector', DTypeSelector(dtypes='number')),
                ('corrfilterhightotalcorrelation',
                 CorrFilterHighTotalCorrelation()),
                ('knnimputer', KNNImputer()), ('robustscaler', RobustScaler()),
                ('randomforestclassifier', RandomForestClassifier())])

-----

# 2nd variant: Download in memory from S3

In [9]:
import boto3
from io import BytesIO

In [10]:
# Connect to S3 bucket with the higher-level object-oriented service access
s3 = boto3.resource('s3')

In [11]:

# You could also write to an in-memory BytesIO object, which acts like a file but doesn't actually touch a disk.
with BytesIO() as file:
    s3.Bucket("ml-models-niels").download_fileobj('model_6.pkl', file)
    file.seek(0)
    model_2 = joblib.load(file)

# model = joblib.load(open('model_1.joblib', 'rb'))

In [12]:
model_2

Pipeline(steps=[('dtypeselector', DTypeSelector(dtypes='number')),
                ('corrfilterhightotalcorrelation',
                 CorrFilterHighTotalCorrelation()),
                ('knnimputer', KNNImputer()), ('robustscaler', RobustScaler()),
                ('randomforestclassifier', RandomForestClassifier())])

----

# 3rd variant: Load model from S3

In [23]:
BUCKET_NAME = 'ml-models-niels'
MODEL_FILE_NAME = 'model_6.pkl'

In [25]:
import pickle
import boto3

s3 = boto3.resource('s3')
my_pickle = joblib.load(s3.Bucket(BUCKET_NAME).Object(MODEL_FILE_NAME).get()['Body'].read())

ValueError: embedded null byte

In [13]:
import boto3
import pickle

BUCKET_NAME = 'ml-models-niels'
MODEL_FILE_NAME = 'model_6.pkl'

S3 = boto3.client('s3', region_name='eu-central-1')

In [17]:
S3.get_object(Bucket=BUCKET_NAME, Key=MODEL_FILE_NAME)

{'ResponseMetadata': {'RequestId': 'E8AB2F27E0407B93',
  'HostId': 'CXtBPlbsmqQn1PZeHcwPSu6U0pZLQFUZbmYCuPNXA+lv8ssRUamTPZkr5WfI9Om10TCa5rFmorA=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'CXtBPlbsmqQn1PZeHcwPSu6U0pZLQFUZbmYCuPNXA+lv8ssRUamTPZkr5WfI9Om10TCa5rFmorA=',
   'x-amz-request-id': 'E8AB2F27E0407B93',
   'date': 'Wed, 14 Oct 2020 11:33:31 GMT',
   'last-modified': 'Tue, 13 Oct 2020 19:35:07 GMT',
   'etag': '"814c22f434c90993f4917e808b83ada6"',
   'x-amz-version-id': 'gHxmVhGylrUO5NCAEB0Gt4CxfkJSYGqb',
   'accept-ranges': 'bytes',
   'content-type': 'binary/octet-stream',
   'content-length': '856525',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'AcceptRanges': 'bytes',
 'LastModified': datetime.datetime(2020, 10, 13, 19, 35, 7, tzinfo=tzutc()),
 'ContentLength': 856525,
 'ETag': '"814c22f434c90993f4917e808b83ada6"',
 'VersionId': 'gHxmVhGylrUO5NCAEB0Gt4CxfkJSYGqb',
 'ContentType': 'binary/octet-stream',
 'Metadata': {},
 'Body': <botocore.response.Streami

In [22]:
S3.get_object(Bucket=BUCKET_NAME, Key=MODEL_FILE_NAME)['Body']

<botocore.response.StreamingBody at 0x13a067110>

In [None]:
model = joblib.load(open('trained_models/model_1.joblib', 'rb'))

In [21]:
model_3 = joblib.load(open(S3.get_object(Bucket=BUCKET_NAME, Key=MODEL_FILE_NAME)['Body'].read(), 'rb'))

ValueError: embedded null byte

In [18]:
def memoize(f):
    memo = {}

    def helper(x):
        if x not in memo:
            memo[x] = f(x)
        return memo[x]

    return helper


@memoize
def load_model(key):
    response = S3.get_object(Bucket=BUCKET_NAME, Key=key)
    model_str = response['Body'].read()

    model = pickle.loads(model_str)

    return model

In [None]:
def load_model(key):    
    # Load model from S3 bucket
    response = S3.get_object(Bucket=BUCKET_NAME, Key=key)
    # Load pickle model
    model_str = response['Body'].read()     
    model = pickle.loads(model_str)     
    
    return model

In [6]:
import json
import boto3
from io import BytesIO
import joblib
import pickle

BUCKET_NAME = 'ml-models-niels'
MODEL_FILE_NAME = 'model_6.pkl'

S3 = boto3.client('s3', region_name='eu-central-1')


# def get_latest_obj(input_bucket):
#     """
#     This function gets the last modified file from an S3 bucket.
#     :param input_bucket: S3 bucket
#     :return: key of the last modified file from the S3 bucket
#     """
#     get_last_modified = lambda obj: int(obj.last_modified.strftime('%s'))
#     objs = [obj for obj in sorted(list(input_bucket.objects.all()), key=get_last_modified)]
#     return objs[-1].key

In [7]:
S3

<botocore.client.S3 at 0x12bc18bd0>

In [8]:
def load_model(key):    
    # Load model from S3 bucket
    response = S3.get_object(Bucket=BUCKET_NAME, Key=key)
    # Load pickle model
    model_str = response['Body'].read()     
    #model = pickle.loads(model_str)
    model =joblib.load(open(model_str, 'rb'))
    
    return model

In [9]:
load_model(MODEL_FILE_NAME)

ValueError: embedded null byte

----

# 4th variant: Download model from s3 to local disk and load subsequently

In [33]:
import boto3
import botocore

BUCKET_NAME = 'ml-models-niels' 
KEY = 'model_6.pkl'
FILE_NAME = '../trained_models/NIELS_model_1.pkl'

s3 = boto3.resource('s3')

try:
    s3.Bucket(BUCKET_NAME).download_file(KEY, FILE_NAME)
except botocore.exceptions.ClientError as e:
    if e.response['Error']['Code'] == "404":
        print("The object does not exist.")
    else:
        raise

In [34]:
fname = FILE_NAME
model_4 = joblib.load(open(fname, 'rb'))

In [35]:
model_4

Pipeline(steps=[('dtypeselector', DTypeSelector(dtypes='number')),
                ('corrfilterhightotalcorrelation',
                 CorrFilterHighTotalCorrelation()),
                ('knnimputer', KNNImputer()), ('robustscaler', RobustScaler()),
                ('randomforestclassifier', RandomForestClassifier())])