# ML Data preparation,training and Inference using MLFlow


In [None]:
!pip install --quiet mlflow==2.13.2 sagemaker-mlflow==0.1.0

In [None]:
from sagemaker_ui_helper import Project, ClientConfig
import mlflow

region='us-east-1'

client_config = ClientConfig(region=region)
project = Project(config=client_config)

# Retrieve provisioned mlflow tracking server arn from the project
tracking_server_arn = project.mlflow_tracking_server_arn

tracking_server_arn

mlflow.set_tracking_uri(tracking_server_arn) 
mlflow.set_experiment("SageMaker SDS learning series")

In [None]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.inputs import TrainingInput
from sagemaker.estimator import Estimator
import boto3
import pandas as pd
import numpy as np
import os

## Set up SageMaker execution role and session

In [None]:

role = get_execution_role()
session = sagemaker.Session()
bucket = session.default_bucket()  # Replace with your S3 bucket name if needed
region = boto3.Session().region_name


## Step 1: Generate synthetic training data

In [None]:

np.random.seed(42)
x = np.random.rand(100, 1) * 10  # Feature data
noise = np.random.randn(100, 1)  # Noise to add randomness
y = 2 * x + 3 + noise  # True relationship with some noise

# Convert to DataFrame and save to CSV
train_data = pd.DataFrame(np.hstack((y, x)), columns=["label", "feature_1"])
train_data_path = "train.csv"
train_data.to_csv(train_data_path, index=False, header=False)

mlflow.log_artifact(train_data_path)

## Step 2: Upload training data to S3

In [None]:
# Step 2: Use local training data path

s3_train_path = 'your S3 path'
session.upload_data(path=train_data_path, bucket='your S3 bucket', key_prefix='folder')
print(f"Training data uploaded to: {s3_train_path}")

mlflow.log_param("s3_train_path", s3_train_path)

## Step 3: Set up the SageMaker Linear Learner estimator

In [None]:
linear_learner = Estimator(
    image_uri=sagemaker.image_uris.retrieve('linear-learner', region),
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',  # Adjust as needed
    output_path='your S3 path',
    sagemaker_session=session
)




## Set hyperparameters for Linear Learner

In [None]:
linear_learner.set_hyperparameters(
    predictor_type='regressor',
    mini_batch_size=10  # Or an appropriate value less than 100 (number of records)
    
)

mlflow.log_params({
    "predictor_type": 'regressor',
    "mini_batch_size": 10
})


## Step 4: Set up training input

In [None]:
train_input = TrainingInput(
    s3_data=s3_train_path,
    content_type='text/csv'
)

## Step 5: Train the model

In [None]:
linear_learner.fit({'train': train_input})

## Step 6: Deploy the model to an endpoint for inference

In [None]:
predictor = linear_learner.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large'
)

## Step 7: Make predictions using the deployed endpoint

In [None]:
import json
import psutil 

test_data = [[6.5], [7.0], [8.5]]  # Test data for prediction

# Pass the data to the predictor as a CSV payload
payload = '\n'.join([','.join(map(str, row)) for row in test_data])  # Convert list of lists to CSV-formatted string
response = predictor.predict(payload, initial_args={'ContentType': 'text/csv'})  # Specify content-type as text/csv

# Parse the predictions
predictions = json.loads(response.decode('utf-8'))['predictions']
print("Predictions:", predictions)
# Log predictions
mlflow.log_metric("prediction_1", predictions[0].get('score', None) if len(predictions) > 0 else None)
mlflow.log_metric("prediction_2", predictions[1].get('score', None) if len(predictions) > 1 else None)
mlflow.log_metric("prediction_3", predictions[2].get('score', None) if len(predictions) > 2 else None)


# Log system metrics
cpu_usage = psutil.cpu_percent(interval=1)
memory_usage = psutil.virtual_memory().percent
disk_usage = psutil.disk_usage('/').percent



mlflow.log_metric("cpu_usage", cpu_usage)
mlflow.log_metric("memory_usage", memory_usage)
mlflow.log_metric("disk_usage", disk_usage)
    


In [None]:
predictor.delete_endpoint()
os.remove(train_data_path)

print("SageMaker Linear Learner example completed successfully.")