In [None]:
import boto3
import sagemaker.session
import sagemaker
from sagemaker.workflow.parameters import (
    ParameterInteger,
    ParameterString
)
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.processing import ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep, TrainingStep

## Set up the environment

In [None]:
region = boto3.Session().region_name
sagemaker_session = sagemaker.session.Session()
default_bucket = sagemaker_session.default_bucket()
role = sagemaker.get_execution_role()
model_package_group_name = "MedicalLogistricalRegressionPackageGroup"

## Constants

In [None]:
BUCKET = 'sagemaker-medical-logistical-regression-data-storage'
DATA_KEY = 'data.xlsx'
TARGET_COLUMN = "осложнения есть/нет"

## Define a Preprocessing Step

In [None]:
framework_version = "0.23-1"
data_location = f's3://{BUCKET}/{DATA_KEY}'

processing_instance_type = ParameterString(
    name="ProcessingInstanceType",
    default_value="ml.m5.xlarge"
)
processing_instance_count = ParameterInteger(
    name="ProcessingInstanceCount",
    default_value=1
)
model_approval_status = ParameterString(
    name="ModelApprovalStatus",
    default_value="PendingManualApproval"
)
input_data = ParameterString(
    name="InputData",
    default_value=data_location,
)
target_column = ParameterString(
    name="TargetColumn",
    default_value=TARGET_COLUMN
)

In [None]:
sklearn_processor = SKLearnProcessor(
    framework_version=framework_version,
    instance_type=processing_instance_type,
    instance_count=processing_instance_count,
    sagemaker_session=sagemaker_session,
    role=role,
)

In [None]:
step_process = ProcessingStep(
    name="PreprocessingStep",
    processor=sklearn_processor,
    inputs=[
      ProcessingInput(
          source=input_data, destination="/opt/ml/processing/input"
      ),
    ],
    outputs=[
        ProcessingOutput(
            output_name="X_train", source="/opt/ml/train/X_train"
        ),
        ProcessingOutput(
            output_name="X_test", source="/opt/ml/train/X_test"
        ),
        ProcessingOutput(
            output_name="y_train", source="/opt/ml/train/y_train"
        ),
        ProcessingOutput(
            output_name="y_test", source="/opt/ml/train/y_test"
        ),
    ],
    code="./scripts/preprocessing.py",
)

## Define a Training step

In [None]:
sklearn = SKLearn(
    entry_point='train.py',
    source_dir=f's3://{default_bucket}/tests/wines/train/sourcedir.tar.gz',
    framework_version=framework_version,
    instance_type=training_instance_type,
    enable_sagemaker_metrics=True,
    metric_definitions=metric_definitions,
    role=role,
    output_path=training_output_path,
    sagemaker_session=sagemaker_session,
    #hyperparameters={key:value} we can set hyperparameters
)

In [None]:
from sagemaker import TrainingInput

step_train = TrainingStep(
    name="AbaloneTrain",
    estimator=sklearn,
    inputs={
        "train": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                "train"
            ].S3Output.S3Uri,
            content_type="text/csv"
        )
    }
)


## Define a RegisterModel Step to Create a Model Package

## Define a Condition Step to Verify Model Accuracy

## Create a pipeline