In [None]:
%pip install python-dotenv
from dotenv import load_dotenv
load_dotenv(".env")

In [None]:
import pandas as pd
import numpy as np

ratings_data_path = "../pipelines/recommendations/data/user_item_interactions.csv.gz"
input_data_path = "./data/user_item_interactions.csv.gz"

data_types = {"user_id": str, "item_id": str, "rating": np.float32}

user_item_interactions = pd.read_csv(
    ratings_data_path,
    compression="gzip",
    dtype=data_types,
    parse_dates=["date"],
).sample(n=10000, axis=0)

user_item_interactions.to_csv(input_data_path, compression="gzip", index=False)

user_item_interactions.head(3)

In [10]:
import subprocess

BUILD = False
CLEAR_CASH = False

build_cmd = "docker build -t sklearn:latest -f docker/sklearn/Dockerfile ."

if CLEAR_CASH:
    build_cmd += " --no-cache"

if BUILD:
    p = subprocess.Popen(build_cmd.split(' '), cwd='../../')
    p.wait()

In [None]:
import numpy as np
import os
from sagemaker.local import LocalSession
from sagemaker.estimator import Estimator

sagemaker_session = LocalSession()
sagemaker_session.config = {'local': {'local_code': True}}

role = os.environ.get("AWS_ROLE_ARN")

In [None]:
from sagemaker.processing import ScriptProcessor
from sagemaker.processing import ProcessingInput
from sagemaker.processing import ProcessingOutput

processor = ScriptProcessor(
    image_uri='sklearn:latest',
    role=role,
    instance_count=1,
    instance_type="local",
    sagemaker_session=sagemaker_session,
    command=['python3'],
)

user_item_interactions_input = ProcessingInput(
    source=input_data_path,
    input_name="user_item_interactions",
    destination="/opt/ml/processing/input/data",
)

processor.run(
    code="./preprocess.py",
    inputs=[
        user_item_interactions_input
    ],
    outputs=[
        ProcessingOutput(
            output_name="user_item_matrix",
            source="/opt/ml/processing/output/user_item_matrix",
        ),
        ProcessingOutput(
            output_name="item_similarity_matrix",
            source="/opt/ml/processing/output/item_similarity_matrix",
        ),
        ProcessingOutput(
            output_name="test_data",
            source="/opt/ml/processing/output/test_data",
        ),
        ProcessingOutput(
            output_name="user_encoder",
            source="/opt/ml/processing/output/user_encoder",
        ),
        ProcessingOutput(
            output_name="item_encoder",
            source="/opt/ml/processing/output/item_encoder",
        ),
    ],
    wait=True,
    logs=True,
)

In [None]:
estimator = Estimator(
    image_uri='sklearn:latest',
    role=role,
    instance_count=1,
    instance_type='local',
    entry_point='./train.py',
    sagemaker_session=sagemaker_session,
)

estimator.fit()

In [None]:
from sagemaker.base_serializers import NumpySerializer
from sagemaker.base_deserializers import NumpyDeserializer

serializer = NumpySerializer(dtype=np.int32, content_type='application/x-npy')
deserializer = NumpyDeserializer(dtype=np.int32, allow_pickle=True, accept="application/x-npy")

model = estimator.create_model(entry_point="inference.py", source_dir="./")

predictor = model.deploy(
    initial_instance_count=1,
    instance_type='local',
    serializer=serializer,
    deserializer=deserializer,
    endpoint_name="test-endpoint"
)

In [None]:
result = predictor.predict(np.array([1, 2]))
result

In [None]:
predictor.delete_endpoint()