In [41]:
!mkdir -p pipelines/code

In [42]:
import os
import sys
from sagemaker import Session

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from pipeliner.exceptions import SagemakerSessionException
from pipeliner.sagemaker.session import create_pipeline_session
from pipeliner.sagemaker.pipeline import PipelineFactory

In [43]:
%%writefile pipelines/code/user_item_similarity_matrix_transformer.py

import numpy as np
import pandas as pd

from pipeliner.recommendations.transformer import (
    UserItemMatrixTransformer,
    SimilarityTransformer,
)
from sklearn.pipeline import Pipeline as SKPipeline

data_types = {"user_id": str, "item_id": str, "rating": np.float64}

if __name__ == "__main__":
    base_dir = "/opt/ml/processing"
    data_path = "user_item_ratings.csv"

    user_item_ratings = pd.read_csv(f"{base_dir}/{data_path}", dtype=data_types)

    transformer = SKPipeline(
        [
            ("user_item", UserItemMatrixTransformer()),
            ("similarity", SimilarityTransformer(kind=kind, metric=metric)),
        ]
    )

    similarity_matrix = transformer.transform(user_item_ratings)

    similarity_matrix.to_csv(f"{base_dir}/user_item_similarity_matrix.csv", header=True, index=False)

Overwriting pipelines/code/user_item_similarity_matrix_transformer.py


In [44]:
%%writefile pipelines/code/recommender_pipeline.py
import sagemaker
from sagemaker import ScriptProcessor
from sagemaker.workflow.pipeline_context import LocalPipelineSession
from sagemaker.workflow.parameters import ParameterString
from sagemaker.workflow.pipeline import Pipeline
from sagemaker.workflow.steps import ProcessingStep

from pipeliner.factory import SagemakerPipelineFactory


class RecommenderPipeline(SagemakerPipelineFactory):
    local: bool

    def create(
        self,
        role: str,
        name: str,
        session: sagemaker.Session,
    ) -> Pipeline:
        self.local = isinstance(session, LocalPipelineSession)

        instance_type = ParameterString(
            name="InstanceType",
            default_value="local" if self.local else "ml.m5.large",
        )

        image_uri = sagemaker.image_uris.retrieve(
            framework="sklearn",
            region=session.boto_region_name,
            version="1.2-1",
        )

        processor = ScriptProcessor(
            image_uri=image_uri,
            command=["python3"],
            instance_type=instance_type,
            instance_count=1,
            role=role,
            sagemaker_session=session,
        )

        processing_step = ProcessingStep(
            name="user-item-transformer",
            step_args=processor.run(
                code="pipelines/code/transform.py",
            ),
        )

        return Pipeline(
            name=name,
            steps=[processing_step],
            sagemaker_session=session,
            parameters=[instance_type],
        )

Overwriting pipelines/code/recommender_pipeline.py
