In [1]:
import pandas as pd
import numpy as np
import sys
import gc

import os
sys.path.append(os.path.abspath(".."))

In [2]:
import s3fs
from typing import List

from utils.common import *
from config.params import *
from preprocessing.transform import transform, tracking_transforming_input
from preprocessing.intervals import get_interval_from_transformed

In [3]:
from preprocessing.prepare_clustering_data import *

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [4]:
from training.models import *
from training.visualize import *

In [5]:
from pyarrow.dataset import field

In [6]:
import sagemaker
from sagemaker import get_execution_role

In [7]:
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
# get the lastest saved data from mlflow run
import mlflow
from mlflow.tracking import MlflowClient

In [9]:
from sklearn.cluster import KMeans
from datetime import datetime
from pathlib import Path

In [10]:
import sagemaker
import boto3
from sagemaker.estimator import Estimator
from sagemaker.inputs import TrainingInput

In [11]:
from io import StringIO

In [12]:
from sagemaker.model import Model
from sagemaker.transformer import Transformer

In [13]:
client = MlflowClient()

In [14]:
# Define session, role, and region so we can
# perform any SageMaker tasks we need
sagemaker_session = sagemaker.Session()
role = get_execution_role()
region = sagemaker_session.boto_region_name

In [15]:
# Provide the ARN of the tracking server that you want to track your training job with
tracking_server_arn = 'arn:aws:sagemaker:ap-southeast-1:771463264346:mlflow-tracking-server/mlflow-RCF-server'

In [16]:
mlflow.set_tracking_uri(tracking_server_arn)

In [17]:
experiment_name = "4. Training RCF Model"
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='s3://s3-assetcare-bucket/mlflow_server/20', creation_time=1745547487760, experiment_id='20', last_update_time=1745547487760, lifecycle_stage='active', name='4. Training RCF Model', tags={}>

### functions

In [18]:
def get_value(run_id, keys):
    run = mlflow.get_run(run_id)

    # Lấy toàn bộ params
    params = run.data.params
    # print(params)
    
    # Lấy giá trị cụ thể, ví dụ: cluster_id
    filepath = [params.get(key) for key in keys]
    return filepath

In [19]:
def detect_value_intervals(df: pd.DataFrame) -> pd.DataFrame:
    """
    Parameters:
    - df: DataFrame with 'value' and 'time_utc'
    
    Returns:
    - df_interval: DataFrame chứa các cột:
        - value: giá trị được giữ nguyên trong khoảng thời gian
        - segment: số thứ tự đoạn
        - start_time: thời gian bắt đầu đoạn
        - end_time: thời gian kết thúc đoạn
        - count: số dòng trong đoạn đó
    """
    df = df.copy()

    # Đảm bảo time_utc là datetime và sort theo thời gian
    df['time_utc'] = pd.to_datetime(df['time_utc'])
    df = df.sort_values('time_utc').reset_index(drop=True)

    # Đánh dấu điểm value thay đổi
    df['value_shift'] = df['value'].shift()
    df['new_segment'] = df['value'] != df['value_shift']

    # Đánh số các segment
    df['segment'] = df['new_segment'].cumsum()

    # Nhóm lại theo từng đoạn value giữ nguyên
    df_interval = df.groupby(['value', 'segment']).agg(
        start_time=('time_utc', 'min'),
        end_time=('time_utc', 'max'),
        count=('time_utc', 'count')
    ).reset_index()

    # Sắp xếp kết quả theo start_time
    df_interval = df_interval.sort_values('start_time').reset_index(drop=True)

    return df_interval


In [20]:
def prepare_cluster_interval(
    run_id,
    cluster_nr,
    
):

    # get the raw training dataset info
    saved_result, nr_clusters = get_value(run_id=run_id, keys=[
                                        "saved result destination",
                                        "n_clusters"])

    # read saved clustered result
    df = pd.read_parquet(saved_result)
    df = df[df["speed_cluster"]==cluster_nr]
    # df.shape

    df_intervals = detect_value_intervals(df)

    return run_id, saved_result, nr_clusters, df.shape, df_intervals

In [21]:
def filter_by_time_intervals(df_new: pd.DataFrame, df_interval: pd.DataFrame) -> pd.DataFrame:
    """
    Parameters:
    - df_new: DataFrame chứa cột 'time_utc'
    - df_interval: DataFrame chứa 'start_time' và 'end_time'

    Returns:
    - df_filtered: DataFrame đã lọc
    """
    df_new = df_new.copy()
    df_new['time_utc'] = pd.to_datetime(df_new['time_utc'])

    matched_rows = []

    for _, row in df_interval.iterrows():
        start = row['start_time']
        end = row['end_time']

        matched = df_new[
            (df_new['time_utc'] >= start) &
            (df_new['time_utc'] <= end)
        ]
        matched_rows.append(matched)

    # Gộp kết quả
    if matched_rows:
        df_filtered = pd.concat(matched_rows, ignore_index=True).drop_duplicates()
    else:
        df_filtered = pd.DataFrame(columns=df_new.columns)

    return df_filtered


In [22]:
# not being used jet
def prepare_analog_filtered_interval(
    df_intervals,
    tag_analog
):
    # get the run name
    run_names = [f"Transforming_{tag_analog}_{date_folder}" for date_folder in date_folders]
    print(run_names)
    # get filepath from run name - to understand which monthly dataset covered this training set
    filepaths = [get_param(
        experiment_name=experiment_name_RegularInterval,
        experiment_run=run_name,
        keyword="destination_parquet_file",
    ) for run_name in run_names]
    print(filepaths)
    # df_analog = read_df(lst_training_paths=filepaths)
    # print(df_analog.shape)
    # # filter analog input with speed cluster range
    # df_filtered = filter_by_time_intervals(df_new=df_analog, df_interval=df_intervals)

    # return run_names, filepaths, df_analog.shape, df_filtered

In [23]:
def shingle(data, shingle_size=10):
    # Chuyển DataFrame hoặc Series thành NumPy array nếu cần
    if isinstance(data, pd.Series) or isinstance(data, pd.DataFrame):
        data = data.values.flatten()
    
    num_data = len(data)
    
    # Kiểm tra nếu dữ liệu quá ít so với shingle_size
    if num_data < shingle_size:
        raise ValueError(f"Dữ liệu quá nhỏ ({num_data} điểm), cần ít nhất {shingle_size} điểm.")
    print("num data and shingle size:", num_data, shingle_size)
    # Tạo ma trận sliding window
    shingled_data = np.array([data[i : i + shingle_size] for i in range(num_data - shingle_size + 1)])
    
    # Chuyển NumPy array thành DataFrame với tên cột rõ ràng
    column_names = [f"feature_{i}" for i in range(shingle_size)]
    df_shingled = pd.DataFrame(shingled_data, columns=column_names)
    
    return df_shingled

In [24]:
# save to ...
def save_dataframe_to_s3_in_batches(
    df: pd.DataFrame, 
    s3_path_prefix: str, 
    header,
    batch_size: int = 100000,
):
    """
    Save a DataFrame to S3 in multiple CSV files, each with a batch of rows.

    Parameters:
    - df: DataFrame to save
    - s3_path_prefix: Full S3 path prefix (e.g. "s3://bucket/folder1/folder2/")
    - batch_size: Number of rows per file
    """
    import tempfile
    import s3fs

    # Tách phần bucket và prefix từ s3_path_prefix
    if not s3_path_prefix.endswith('/'):
        s3_path_prefix += '/'
    
    # Dùng s3fs để save file trực tiếp lên S3
    fs = s3fs.S3FileSystem()

    total_rows = len(df)
    total_batches = (total_rows + batch_size - 1) // batch_size

    for i in range(total_batches):
        batch_df = df[i * batch_size: (i + 1) * batch_size]
        file_name = f"part_{i:04d}.csv"
        full_s3_path = os.path.join(s3_path_prefix, file_name)

        # Save lên S3
        with fs.open(full_s3_path, 'w') as f:
            batch_df.to_csv(f, index=False, header=header)
        print(f"Saved batch {i+1}/{total_batches} to {full_s3_path}")


In [25]:
def training_model(s3_input_train, output_path, feature_dim, num_samples_per_tree, num_trees):
    # Lấy container mới nhất của Random Cut Forest trên SageMaker
    rcf_container = sagemaker.image_uris.retrieve("randomcutforest", region)

    # Cấu hình SageMaker Estimator
    rcf_estimator = Estimator(
        image_uri=rcf_container,
        role=role,
        instance_count=2,  # if huge amount of data, increase to 2-3 instance
        instance_type="ml.m5.xlarge",
        output_path=output_path,
        sagemaker_session=sagemaker_session,
        enable_network_isolation=True,
    )

    # **Thêm thông số training: feature_dim = 4**
    rcf_estimator.set_hyperparameters(
        # shingle_size=10,
        feature_dim=feature_dim,  # Số lượng cột đặc trưng
        num_samples_per_tree=num_samples_per_tree,  
        num_trees=num_trees  
    )

    # Định nghĩa dữ liệu đầu vào với "ShardedByS3Key"
    train_input = TrainingInput(
        s3_input_train,
        distribution="ShardedByS3Key",  # Chia dữ liệu theo file S3
        content_type="text/csv",
        input_mode="Pipe",  # Dùng Pipe Mode để stream data từ S3
    )

    # Train model
    rcf_estimator.fit({"train": train_input}, wait=True)

    # model info
    model_path = rcf_estimator.model_data

    
    return rcf_estimator, model_path

#### params

In [26]:
# motor = "DWA"
# motor = "DWB"
# motor = "DWC"
# motors

In [27]:
lst_run_id = [
    "783994e1b9304cf8b9eaaf1e6a16e8c9",
    "997aac8f6ea5461f9dc836e1a7987976",
    "d3586df2258b430998ec7f0d88822418"
    # "d3586df2258b430998ec7f0d88822418",
    # "997aac8f6ea5461f9dc836e1a7987976",
]

In [28]:
# tag_name_analog

In [29]:
# DWA
tag_name_analog = [
    "DWA_DSU_DC_VOLTAGE",
    "AUXILIARY_HPU_AI_PRESSURE_VALUE"
    # "...",
    # "..."
]

### read total cluster and write down info to .csv file

In [30]:
# for run_id in lst_run_id:
#     print(get_value(run_id=run_id, keys=["n_clusters"]))

In [31]:
rows = []
for run_id in lst_run_id:
    n_clusters = int(get_value(run_id=run_id, keys=["n_clusters"])[0])
    for cluster_nr in range(1, n_clusters + 1):
        rows.append({"run_id": run_id, "cluster_nr": cluster_nr})
rows

[{'run_id': '783994e1b9304cf8b9eaaf1e6a16e8c9', 'cluster_nr': 1},
 {'run_id': '783994e1b9304cf8b9eaaf1e6a16e8c9', 'cluster_nr': 2},
 {'run_id': '783994e1b9304cf8b9eaaf1e6a16e8c9', 'cluster_nr': 3},
 {'run_id': '783994e1b9304cf8b9eaaf1e6a16e8c9', 'cluster_nr': 4},
 {'run_id': '783994e1b9304cf8b9eaaf1e6a16e8c9', 'cluster_nr': 5},
 {'run_id': '997aac8f6ea5461f9dc836e1a7987976', 'cluster_nr': 1},
 {'run_id': '997aac8f6ea5461f9dc836e1a7987976', 'cluster_nr': 2},
 {'run_id': '997aac8f6ea5461f9dc836e1a7987976', 'cluster_nr': 3},
 {'run_id': '997aac8f6ea5461f9dc836e1a7987976', 'cluster_nr': 4},
 {'run_id': '997aac8f6ea5461f9dc836e1a7987976', 'cluster_nr': 5},
 {'run_id': 'd3586df2258b430998ec7f0d88822418', 'cluster_nr': 1},
 {'run_id': 'd3586df2258b430998ec7f0d88822418', 'cluster_nr': 2},
 {'run_id': 'd3586df2258b430998ec7f0d88822418', 'cluster_nr': 3},
 {'run_id': 'd3586df2258b430998ec7f0d88822418', 'cluster_nr': 4},
 {'run_id': 'd3586df2258b430998ec7f0d88822418', 'cluster_nr': 5}]

In [32]:
# Tạo DataFrame và lưu ra file
df = pd.DataFrame(rows)
df.head()

Unnamed: 0,run_id,cluster_nr
0,783994e1b9304cf8b9eaaf1e6a16e8c9,1
1,783994e1b9304cf8b9eaaf1e6a16e8c9,2
2,783994e1b9304cf8b9eaaf1e6a16e8c9,3
3,783994e1b9304cf8b9eaaf1e6a16e8c9,4
4,783994e1b9304cf8b9eaaf1e6a16e8c9,5


##### save the run_id and mapping cluster number

In [33]:
df.to_csv("cluster_by_runID", index=False)

In [35]:
df.to_csv("cluster_by_runID.csv", index=False)

In [34]:
df.shape

(15, 2)

### read info from csv file


After edit the "cluster_by_runID" file, run the following script to train the RCF models for each run_id - cluster in those file

In [36]:
filepath = "cluster_by_runID"

In [37]:
df = pd.read_csv(filepath)
df.head()

Unnamed: 0,run_id,cluster_nr
0,783994e1b9304cf8b9eaaf1e6a16e8c9,1
1,783994e1b9304cf8b9eaaf1e6a16e8c9,2
2,783994e1b9304cf8b9eaaf1e6a16e8c9,3
3,d3586df2258b430998ec7f0d88822418,3
4,d3586df2258b430998ec7f0d88822418,4


In [38]:
df.shape

(6, 2)

### training for all run_id and cluster number

In [50]:
for i in range(len(df)):
    run_id = df.iloc[i]["run_id"]
    cluster_nr = df.iloc[i]["cluster_nr"]

    for tag_analog in tag_name_analog:
        print(run_id, cluster_nr, tag_analog)

        # 1. Prepare paths and parameters
        current_time = get_current_timestamp_string()
        s3_input_filtered = f"s3://s3-assetcare-bucket/features_store/training_data/read_to_evaluated/{tag_analog}_{current_time}/"
        s3_input_scored = f"s3://s3-assetcare-bucket/features_store/training_data/read_to_scored/{tag_analog}_{current_time}/"
        s3_input_train = f"s3://s3-assetcare-bucket/features_store/training_data/read_to_train/{tag_analog}_{current_time}/"
        output_path = f"s3://s3-assetcare-bucket/features_store/models/{tag_analog}_{current_time}/"

        # 2. Get cluster intervals and metadata
        _, saved_result, nr_clusters, df_shape, df_intervals = prepare_cluster_interval(
            run_id=run_id,
            cluster_nr=cluster_nr,
        )

        # 3. Build run names and file paths
        run_names = [f"Transforming_{tag_analog}_{date_folder}" for date_folder in date_folders]
        filepaths = [get_param(
            experiment_name=experiment_name_RegularInterval,
            experiment_run=run_name,
            keyword="destination_parquet_file",
        ) for run_name in run_names]

        # 4. Read and filter analog data
        df_analog = read_df(lst_training_paths=filepaths)
        df_filtered = filter_by_time_intervals(df_new=df_analog, df_interval=df_intervals)
        # save filtered data to get evaluation later
        save_dataframe_to_s3_in_batches(df=df_filtered, s3_path_prefix=s3_input_filtered, header=False)
        
        print(df_filtered.shape)
        gc.collect()

        # 5. Time-based feature engineering
        df_filtered.sort_values(by='time_utc')
        df_filtered['time_utc'] = pd.to_datetime(df_filtered['time_utc'])
        df_filtered['time_diff'] = df_filtered['time_utc'].diff().dt.total_seconds()

        average_time_diff = round(df_filtered['time_diff'].mean(), 0)
        if pd.isna(average_time_diff):
            average_time_diff = 10

        shingle_size = int(average_time_diff)

        # 6. Check if the dataset is large enough for shingling
        if len(df_filtered["value"]) <= shingle_size:
            print(f"SKIPPING: {tag_analog} cluster {cluster_nr} - not enough data for shingling (len={len(df_filtered['value'])}, required={shingle_size})")
            with mlflow.start_run(run_name=f"{tag_analog}_cluster-{cluster_nr}"):
                mlflow.log_param("Speed dataset", run_id)
                mlflow.log_param("Analog Tag name", tag_analog)
                mlflow.log_param("Cluster Nr", cluster_nr)
                mlflow.log_param("Reason", "Not enough data for shingling")
                mlflow.log_param("Data length", len(df_filtered["value"]))
                mlflow.log_param("Required shingle size", shingle_size)
            continue  # Skip training and full logging

        # 7. Create shingled dataset
        shingled_df = shingle(df_filtered["value"], shingle_size=shingle_size)
        shingled_df = shingled_df[sorted(shingled_df.columns, key=lambda x: int(x.split("_")[1]), reverse=True)]
        shingled_df = shingled_df.astype(np.float32).round(10)

        # 8. Save datasets to S3 for inference and training
        save_dataframe_to_s3_in_batches(df=shingled_df, s3_path_prefix=s3_input_scored, header=False)

        shingled_df["scores"] = 0
        shingled_df = shingled_df.astype(np.float32).round(10)
        save_dataframe_to_s3_in_batches(df=shingled_df, s3_path_prefix=s3_input_train, header=False)

        print(tag_analog)

        # 9. Train the RCF model
        rcf_estimator, model_path = training_model(
            s3_input_train=s3_input_train,
            output_path=output_path,
            feature_dim=shingle_size,
            num_samples_per_tree=1024,
            num_trees=300
        )
        print("========================")

        # 10. Log model metadata to MLflow
        with mlflow.start_run(run_name=f"{tag_analog}_cluster-{cluster_nr}"):
            mlflow.log_param("Speed dataset", run_id)
            mlflow.log_param("Analog Tag name", tag_analog)
            mlflow.log_param("Cluster Nr", cluster_nr)
            mlflow.log_param("saved_result", saved_result)
            mlflow.log_param("df_shape", df_shape)
            mlflow.log_param("df_intervals_shape", df_intervals.shape)
            mlflow.log_param("run_names", run_names)
            mlflow.log_param("filepaths", run_names)
            mlflow.log_param("Average time different", average_time_diff)
            mlflow.log_param("Shingled size", shingled_df.shape)
            mlflow.log_param("Dataset path: S3 Input Scored", s3_input_scored)
            mlflow.log_param("Dataset path: S3 Input Train", s3_input_train)
            mlflow.log_param("Dataset path: S3 Input Evaluation", s3_input_filtered)
            mlflow.log_param("Model Path", model_path)
            mlflow.log_param("Output Path", output_path)


783994e1b9304cf8b9eaaf1e6a16e8c9 5 DWA_DSU_DC_VOLTAGE
(0, 2)
SKIPPING: DWA_DSU_DC_VOLTAGE cluster 5 - not enough data for shingling (len=0, required=10)
🏃 View run DWA_DSU_DC_VOLTAGE_cluster-5 at: https://ap-southeast-1.experiments.sagemaker.aws/#/experiments/20/runs/319749096d62498a8cbe3689c9b15bfe
🧪 View experiment at: https://ap-southeast-1.experiments.sagemaker.aws/#/experiments/20
783994e1b9304cf8b9eaaf1e6a16e8c9 5 AUXILIARY_HPU_AI_PRESSURE_VALUE
(0, 2)
SKIPPING: AUXILIARY_HPU_AI_PRESSURE_VALUE cluster 5 - not enough data for shingling (len=0, required=10)
🏃 View run AUXILIARY_HPU_AI_PRESSURE_VALUE_cluster-5 at: https://ap-southeast-1.experiments.sagemaker.aws/#/experiments/20/runs/b123b7b113df4df68dd305a33fb66f19
🧪 View experiment at: https://ap-southeast-1.experiments.sagemaker.aws/#/experiments/20
997aac8f6ea5461f9dc836e1a7987976 1 DWA_DSU_DC_VOLTAGE
Saved batch 1/1 to s3://s3-assetcare-bucket/features_store/training_data/read_to_evaluated/DWA_DSU_DC_VOLTAGE_20250509_015330/par

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: randomcutforest-2025-05-09-01-53-57-606


Saved batch 1/1 to s3://s3-assetcare-bucket/features_store/training_data/read_to_train/DWA_DSU_DC_VOLTAGE_20250509_015330/part_0000.csv
DWA_DSU_DC_VOLTAGE
2025-05-09 01:54:02 Starting - Starting the training job...
2025-05-09 01:54:18 Starting - Preparing the instances for training...
2025-05-09 01:54:57 Downloading - Downloading the training image............
2025-05-09 01:56:58 Training - Training image download completed. Training in progress...[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
  if cons['type'] is 'ineq':[0m
  if len(self.X_min) is not 0:[0m
[35mDocker entrypoint called with argument(s): train[0m
[35mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
  if cons['type'] is 'ineq':[0m
  if len(self.X_min) is not 0:[0m
[35m[05/09/2025 01:57:11 INFO 140191653799744] Reading default configuratio

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: randomcutforest-2025-05-09-01-58-38-800


Saved batch 1/1 to s3://s3-assetcare-bucket/features_store/training_data/read_to_train/DWA_DSU_DC_VOLTAGE_20250509_015759/part_0000.csv
DWA_DSU_DC_VOLTAGE
2025-05-09 01:58:38 Starting - Starting the training job...
2025-05-09 01:59:01 Starting - Preparing the instances for training...
2025-05-09 01:59:39 Downloading - Downloading the training image............
2025-05-09 02:01:31 Training - Training image download completed. Training in progress.[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
  if cons['type'] is 'ineq':[0m
  if len(self.X_min) is not 0:[0m
[34m[05/09/2025 02:01:41 INFO 140589865432896] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-conf.json: {'num_samples_per_tree': 256, 'num_trees': 100, 'force_dense': 'true', 'eval_metrics': ['accuracy', 'precision_recall_fscore'], 'epochs': 1, 'mini_batch

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: randomcutforest-2025-05-09-02-11-36-194


Saved batch 6/6 to s3://s3-assetcare-bucket/features_store/training_data/read_to_train/DWA_DSU_DC_VOLTAGE_20250509_020231/part_0005.csv
DWA_DSU_DC_VOLTAGE
2025-05-09 02:11:39 Starting - Starting the training job...
2025-05-09 02:11:56 Starting - Preparing the instances for training...
2025-05-09 02:12:33 Downloading - Downloading the training image...............
2025-05-09 02:14:55 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
  if cons['type'] is 'ineq':[0m
  if len(self.X_min) is not 0:[0m
[34m[05/09/2025 02:15:07 INFO 140464904902464] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-conf.json: {'num_samples_per_tree': 256, 'num_trees': 100, 'force_dense': 'true', 'eval_metrics': ['accuracy', 'precision_recall_fscore'], 'epochs': 1, 'mini_b

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: randomcutforest-2025-05-09-02-16-34-154


Saved batch 1/1 to s3://s3-assetcare-bucket/features_store/training_data/read_to_train/DWA_DSU_DC_VOLTAGE_20250509_021601/part_0000.csv
DWA_DSU_DC_VOLTAGE
2025-05-09 02:16:34 Starting - Starting the training job...
2025-05-09 02:16:56 Starting - Preparing the instances for training...
2025-05-09 02:17:34 Downloading - Downloading the training image............
2025-05-09 02:19:36 Training - Training image download completed. Training in progress...[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
  if cons['type'] is 'ineq':[0m
  if len(self.X_min) is not 0:[0m
[35mDocker entrypoint called with argument(s): train[0m
[35mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
  if cons['type'] is 'ineq':[0m
  if len(self.X_min) is not 0:[0m
[34m[05/09/2025 02:19:48 INFO 139631313999680] Reading default configuratio

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: randomcutforest-2025-05-09-02-21-19-913


Saved batch 1/1 to s3://s3-assetcare-bucket/features_store/training_data/read_to_train/DWA_DSU_DC_VOLTAGE_20250509_022039/part_0000.csv
DWA_DSU_DC_VOLTAGE
2025-05-09 02:21:21 Starting - Starting the training job...
2025-05-09 02:21:55 Downloading - Downloading input data...
2025-05-09 02:22:15 Downloading - Downloading the training image.........
2025-05-09 02:23:41 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
  if cons['type'] is 'ineq':[0m
  if len(self.X_min) is not 0:[0m
[34m[05/09/2025 02:23:52 INFO 139656355383104] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-conf.json: {'num_samples_per_tree': 256, 'num_trees': 100, 'force_dense': 'true', 'eval_metrics': ['accuracy', 'precision_recall_fscore'], 'epochs': 1, 'mini_batch_size': 1000,

INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: randomcutforest-2025-05-09-02-25-09-333


Saved batch 1/1 to s3://s3-assetcare-bucket/features_store/training_data/read_to_train/DWA_DSU_DC_VOLTAGE_20250509_022442/part_0000.csv
DWA_DSU_DC_VOLTAGE
2025-05-09 02:25:11 Starting - Starting the training job...
2025-05-09 02:25:44 Downloading - Downloading input data...
2025-05-09 02:26:04 Downloading - Downloading the training image.........
2025-05-09 02:27:20 Training - Training image download completed. Training in progress.[35mDocker entrypoint called with argument(s): train[0m
[35mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
  if cons['type'] is 'ineq':[0m
  if len(self.X_min) is not 0:[0m
[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
  if cons['type'] is 'ineq':[0m
  if len(self.X_min) is not 0:[0m
[34m[05/09/2025 02:27:34 INFO 139870005184320] Reading default configuration from /opt/amaz