# Example usage notebook
Objects and functions in this notebook are listed with all paramaters to ilustrate their capabillities. Most of the paramaters have default values in the implementation

## Imports

In [1]:
import os
import sys

sys.path.append(os.getcwd())
os.chdir("../..")

import pandas as pd
import mlflow

pd.set_option("display.max_columns", 200)
pd.set_option("display.max_rows", 300)
os.environ["AWS_ACCESS_KEY_ID"] = "minioadmin"
os.environ["AWS_SECRET_ACCESS_KEY"] = "minioadmin"
os.environ["MLFLOW_S3_ENDPOINT_URL"] = f"http://10.152.183.156:9000"

os.environ["REDIS_PASSWORD"] = "redis"
os.environ["REDIS_HOST"] = "10.152.183.169"
os.environ["REDIS_PORT"] = "6379"
os.environ["INFLUXDB_HOST"] = "10.152.183.219"
os.environ["INFLUXDB_PORT"] = "80"
os.environ["INFLUXDB_USER"] = "admin"
os.environ["INFLUXDB_PASS"] = "admin_pass"

In [2]:
import os
import json
from typing import Literal
import pandas as pd

from inference_model.preprocessing.preprocess import PreprocessData
from inference_model.training.trainer import Trainer, S6GTrainer
from inference_model.training.optuna_optimizer import LGBOptunaOptimizer

import dill
import numpy as np
from inference_model.utils import dill_dump, dill_load
from inference_model.training.utils import flatten_dict, get_or_create_experiment
from sklearn.model_selection import train_test_split

from pprint import pprint
import ast
import time
import redis

# workaround - horizontal scrollbar is not working in jupyterlab
from IPython.display import display, HTML
display(HTML("<style>.jp-OutputArea-output {display:flex}</style>"))

## Train a testing model 

In [3]:
def load_new_dataset(num: int):
    data_sets = [
        "DS1_stopped_with_ignition_on_22Feb24_115812.csv",
        "DS1_stopped_with_ignition_on_25Jan24_124019.csv",
        "DS1_stopped_with_ignition_on_25Jan24_151531.csv",
        "DS1_stopped_with_ignition_on_25Mar24_153740.CSV",
        "DS2_national_road_90km_h_max_25Jan24_153019.csv",
        "DS2_national_road_90km_h_max_25Mar24_133516.CSV",
        "DS3_highway_120km_h_max_22Feb24_121145.csv",
        "DS3_highway_120km_h_max_25Mar24_154857.csv"
    ]
    file = "tools/vehicle/datasets/ateca_R4_2.0l_TDI/" + data_sets[num]
    
    df = pd.read_csv(file)
    df.head()
    df.drop(columns=["Unnamed: 0", "Unnamed: 25"], inplace=True)
    df.drop(index=0, inplace=True)

    timestamp_columns = [col for col in df.columns if col.startswith("STAMP")]
    # keep only the first timestamp column
    df["timestamp"] = df["STAMP"]
    df = df.drop(columns=timestamp_columns)

    df["class"] = 0
    df["vehicle_id"] = "123abc"
    df.loc[:100, ["class"]] = 1
    df.loc[:100, ["Normed load value"]] = 100
    
    df[df.drop(columns=["class", "vehicle_id"]).columns] = df[df.drop(columns=["class", "vehicle_id"]).columns].astype(float)
    
    # Remove special characters from column names
    df.columns = df.columns.str.replace('[^A-Za-z0-9]+', '_', regex=True)

    # add some "reasonable" timestamp for testing
    df["timestamp"] = pd.to_datetime(time.time() - 3600 + df["timestamp"], unit="s")
    return df

def load_old_dataset():
    with open("data/log_tiguan_27_mar_dac.txt") as f:
        data = ast.literal_eval(f.read())
    
    df = pd.DataFrame()
    for data_value in data:
        temp_df = pd.DataFrame(data_value[list(data_value)[0]]).sort_values(
            by="ts_millis:", ascending=True
        )["value"]
        temp_df.rename(list(data_value)[0], inplace=True)
        df = pd.concat([df, temp_df], axis=1)
    
    df.dropna(inplace=True)
    df["class"] = 0
    df["vehicle_id"] = "123abc"
    df.loc[:100, ["class"]] = 1
    df.loc[:100, ["engine_load"]] = 100

    # add some "reasonable" timestamp for testing
    df["timestamp"] = pd.to_datetime([time.time() - 3600 + ix for ix in list(df.index)], unit="s")
    return df    

In [4]:
# 1. get/create some example data

# df_pd = load_old_dataset()
df_pd = load_new_dataset(num=1)

In [5]:
target_col = "class"
id_cols = ["vehicle_id", "timestamp"]
cat_cols = []
cont_cols = df_pd.drop(
    columns=id_cols + cat_cols + [target_col]
).columns.values.tolist()
df_pd[cat_cols] = df_pd[cat_cols].astype(str)

In [6]:
valid_size = 0.2
test_size = 0.5
random_state = 1
df_train, df_valid = train_test_split(
    df_pd, test_size=valid_size, stratify=df_pd[target_col], random_state=random_state
)
df_valid, df_test = train_test_split(
    df_valid,
    test_size=test_size,
    stratify=df_valid[target_col],
    random_state=random_state,
)

In [7]:
prepare_data = PreprocessData(
    id_cols=id_cols,
    target_col=target_col,
    cat_cols=cat_cols,
    cont_cols=cont_cols,
)
# this should be fitted only on training data
_ = prepare_data.fit(df=df_pd)

In [8]:
optimizer = LGBOptunaOptimizer(
    objective="binary",
    n_class=2,
)

# trainer = Trainer(
trainer = S6GTrainer(
    cat_cols=prepare_data.cat_cols,
    target_col=prepare_data.target_col,
    id_cols=id_cols,
    objective="binary",
    n_class=2,
    optimizer=optimizer,
    preprocessors=[prepare_data],
)

metrics_dict = trainer.fit(
    df_train=df_train,
    df_valid=df_valid,
    df_test=df_test,
)

[I 2024-08-07 12:09:05,154] A new study created in memory with name: LightGBMTuner
feature_fraction, val_score: inf:   0%|          | 0/7 [00:00<?, ?it/s]

Training until validation scores don't improve for 50 rounds


feature_fraction, val_score: 0.000003:  14%|#4        | 1/7 [00:01<00:06,  1.11s/it][I 2024-08-07 12:09:06,272] Trial 0 finished with value: 2.736732655192808e-06 and parameters: {'feature_fraction': 0.4}. Best is trial 0 with value: 2.736732655192808e-06.
feature_fraction, val_score: 0.000003:  14%|#4        | 1/7 [00:01<00:06,  1.11s/it]

Early stopping, best iteration is:
[136]	valid_0's binary_logloss: 2.73673e-06
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[123]	valid_0's binary_logloss: 3.04492e-06


feature_fraction, val_score: 0.000003:  29%|##8       | 2/7 [00:01<00:02,  1.74it/s][I 2024-08-07 12:09:06,471] Trial 1 finished with value: 3.044920910157273e-06 and parameters: {'feature_fraction': 1.0}. Best is trial 0 with value: 2.736732655192808e-06.
feature_fraction, val_score: 0.000003:  29%|##8       | 2/7 [00:01<00:02,  1.74it/s]

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[122]	valid_0's binary_logloss: 2.62603e-06


feature_fraction, val_score: 0.000003:  43%|####2     | 3/7 [00:01<00:01,  2.26it/s][I 2024-08-07 12:09:06,759] Trial 2 finished with value: 2.626032357902686e-06 and parameters: {'feature_fraction': 0.8}. Best is trial 2 with value: 2.626032357902686e-06.
feature_fraction, val_score: 0.000003:  43%|####2     | 3/7 [00:01<00:01,  2.26it/s]

Training until validation scores don't improve for 50 rounds


feature_fraction, val_score: 0.000003:  57%|#####7    | 4/7 [00:01<00:01,  2.79it/s][I 2024-08-07 12:09:06,987] Trial 3 finished with value: 3.0142284746906754e-06 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 2 with value: 2.626032357902686e-06.
feature_fraction, val_score: 0.000003:  57%|#####7    | 4/7 [00:01<00:01,  2.79it/s]

Early stopping, best iteration is:
[140]	valid_0's binary_logloss: 3.01423e-06
Training until validation scores don't improve for 50 rounds


feature_fraction, val_score: 0.000003:  71%|#######1  | 5/7 [00:02<00:00,  3.01it/s][I 2024-08-07 12:09:07,274] Trial 4 finished with value: 2.5088073440362348e-06 and parameters: {'feature_fraction': 0.7}. Best is trial 4 with value: 2.5088073440362348e-06.
feature_fraction, val_score: 0.000003:  71%|#######1  | 5/7 [00:02<00:00,  3.01it/s]

Early stopping, best iteration is:
[125]	valid_0's binary_logloss: 2.50881e-06
Training until validation scores don't improve for 50 rounds


feature_fraction, val_score: 0.000002:  86%|########5 | 6/7 [00:02<00:00,  3.10it/s][I 2024-08-07 12:09:07,578] Trial 5 finished with value: 2.4087932119840814e-06 and parameters: {'feature_fraction': 0.6}. Best is trial 5 with value: 2.4087932119840814e-06.
feature_fraction, val_score: 0.000002:  86%|########5 | 6/7 [00:02<00:00,  3.10it/s]

Early stopping, best iteration is:
[168]	valid_0's binary_logloss: 2.40879e-06
Training until validation scores don't improve for 50 rounds


feature_fraction, val_score: 0.000002: 100%|##########| 7/7 [00:02<00:00,  2.63it/s][I 2024-08-07 12:09:08,076] Trial 6 finished with value: 2.3026435797335585e-06 and parameters: {'feature_fraction': 0.5}. Best is trial 6 with value: 2.3026435797335585e-06.
feature_fraction, val_score: 0.000002: 100%|##########| 7/7 [00:02<00:00,  2.39it/s]


Early stopping, best iteration is:
[163]	valid_0's binary_logloss: 2.30264e-06


num_leaves, val_score: 0.000002:   0%|          | 0/20 [00:00<?, ?it/s]

Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:   5%|5         | 1/20 [00:00<00:05,  3.17it/s][I 2024-08-07 12:09:08,471] Trial 7 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 166}. Best is trial 7 with value: 2.3026435797335585e-06.
num_leaves, val_score: 0.000002:   5%|5         | 1/20 [00:00<00:05,  3.17it/s]

Early stopping, best iteration is:
[127]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  10%|#         | 2/20 [00:00<00:07,  2.36it/s][I 2024-08-07 12:09:08,972] Trial 8 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 92}. Best is trial 7 with value: 2.3026435797335585e-06.
num_leaves, val_score: 0.000002:  10%|#         | 2/20 [00:00<00:07,  2.36it/s]

Early stopping, best iteration is:
[161]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  15%|#5        | 3/20 [00:02<00:13,  1.27it/s][I 2024-08-07 12:09:10,253] Trial 9 finished with value: 2.302643579733558e-06 and parameters: {'num_leaves': 234}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  15%|#5        | 3/20 [00:02<00:13,  1.27it/s]

Early stopping, best iteration is:
[141]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  20%|##        | 4/20 [00:03<00:18,  1.15s/it][I 2024-08-07 12:09:11,953] Trial 10 finished with value: 2.302643579733558e-06 and parameters: {'num_leaves': 253}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  20%|##        | 4/20 [00:03<00:18,  1.15s/it]

Early stopping, best iteration is:
[132]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  25%|##5       | 5/20 [00:04<00:14,  1.02it/s][I 2024-08-07 12:09:12,577] Trial 11 finished with value: 2.302643579733558e-06 and parameters: {'num_leaves': 243}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  25%|##5       | 5/20 [00:04<00:14,  1.02it/s]

Early stopping, best iteration is:
[169]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  30%|###       | 6/20 [00:04<00:10,  1.28it/s][I 2024-08-07 12:09:12,974] Trial 12 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 254}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  30%|###       | 6/20 [00:04<00:10,  1.28it/s]

Early stopping, best iteration is:
[150]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  35%|###5      | 7/20 [00:05<00:08,  1.53it/s][I 2024-08-07 12:09:13,364] Trial 13 finished with value: 2.302643579733558e-06 and parameters: {'num_leaves': 184}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  35%|###5      | 7/20 [00:05<00:08,  1.53it/s]

Early stopping, best iteration is:
[146]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  40%|####      | 8/20 [00:05<00:06,  1.95it/s][I 2024-08-07 12:09:13,570] Trial 14 finished with value: 2.32688351902294e-06 and parameters: {'num_leaves': 4}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  40%|####      | 8/20 [00:05<00:06,  1.95it/s]

Early stopping, best iteration is:
[131]	valid_0's binary_logloss: 2.32688e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  45%|####5     | 9/20 [00:05<00:04,  2.25it/s][I 2024-08-07 12:09:13,870] Trial 15 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 210}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  45%|####5     | 9/20 [00:05<00:04,  2.25it/s]

Early stopping, best iteration is:
[126]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  50%|#####     | 10/20 [00:06<00:03,  2.50it/s][I 2024-08-07 12:09:14,167] Trial 16 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 112}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  50%|#####     | 10/20 [00:06<00:03,  2.50it/s]

Early stopping, best iteration is:
[138]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  55%|#####5    | 11/20 [00:06<00:03,  2.66it/s][I 2024-08-07 12:09:14,489] Trial 17 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 254}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  55%|#####5    | 11/20 [00:06<00:03,  2.66it/s]

Early stopping, best iteration is:
[133]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  60%|######    | 12/20 [00:06<00:03,  2.66it/s][I 2024-08-07 12:09:14,866] Trial 18 finished with value: 2.302643579733558e-06 and parameters: {'num_leaves': 201}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  60%|######    | 12/20 [00:06<00:03,  2.66it/s]

Early stopping, best iteration is:
[154]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  65%|######5   | 13/20 [00:07<00:02,  2.82it/s][I 2024-08-07 12:09:15,174] Trial 19 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 144}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  65%|######5   | 13/20 [00:07<00:02,  2.82it/s]

Early stopping, best iteration is:
[127]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  70%|#######   | 14/20 [00:07<00:02,  2.19it/s][I 2024-08-07 12:09:15,866] Trial 20 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 58}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  70%|#######   | 14/20 [00:07<00:02,  2.19it/s]

Early stopping, best iteration is:
[127]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  75%|#######5  | 15/20 [00:08<00:02,  2.11it/s][I 2024-08-07 12:09:16,378] Trial 21 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 230}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  75%|#######5  | 15/20 [00:08<00:02,  2.11it/s]

Early stopping, best iteration is:
[127]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  80%|########  | 16/20 [00:08<00:01,  2.38it/s][I 2024-08-07 12:09:16,673] Trial 22 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 224}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  80%|########  | 16/20 [00:08<00:01,  2.38it/s]

Early stopping, best iteration is:
[128]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  85%|########5 | 17/20 [00:08<00:01,  2.60it/s][I 2024-08-07 12:09:16,975] Trial 23 finished with value: 2.302643579733558e-06 and parameters: {'num_leaves': 255}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  85%|########5 | 17/20 [00:08<00:01,  2.60it/s]

Early stopping, best iteration is:
[151]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  90%|######### | 18/20 [00:09<00:00,  2.78it/s][I 2024-08-07 12:09:17,277] Trial 24 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 177}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  90%|######### | 18/20 [00:09<00:00,  2.78it/s]

Early stopping, best iteration is:
[128]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002:  95%|#########5| 19/20 [00:09<00:00,  2.91it/s][I 2024-08-07 12:09:17,582] Trial 25 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 228}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002:  95%|#########5| 19/20 [00:09<00:00,  2.91it/s]

Early stopping, best iteration is:
[133]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


num_leaves, val_score: 0.000002: 100%|##########| 20/20 [00:09<00:00,  2.83it/s][I 2024-08-07 12:09:17,960] Trial 26 finished with value: 2.3026435797335585e-06 and parameters: {'num_leaves': 200}. Best is trial 9 with value: 2.302643579733558e-06.
num_leaves, val_score: 0.000002: 100%|##########| 20/20 [00:09<00:00,  2.04it/s]


Early stopping, best iteration is:
[128]	valid_0's binary_logloss: 2.30264e-06


bagging, val_score: 0.000002:   0%|          | 0/10 [00:00<?, ?it/s]

Training until validation scores don't improve for 50 rounds


bagging, val_score: 0.000002:  10%|#         | 1/10 [00:00<00:02,  3.33it/s][I 2024-08-07 12:09:18,277] Trial 27 finished with value: 1.8217289583391978e-06 and parameters: {'bagging_fraction': 0.9186870037877966, 'bagging_freq': 5}. Best is trial 27 with value: 1.8217289583391978e-06.
bagging, val_score: 0.000002:  10%|#         | 1/10 [00:00<00:02,  3.33it/s]

Early stopping, best iteration is:
[131]	valid_0's binary_logloss: 1.82173e-06
Training until validation scores don't improve for 50 rounds


bagging, val_score: 0.000002:  20%|##        | 2/10 [00:00<00:03,  2.10it/s][I 2024-08-07 12:09:18,874] Trial 28 finished with value: 2.30608955175627e-06 and parameters: {'bagging_fraction': 0.937169398578819, 'bagging_freq': 5}. Best is trial 27 with value: 1.8217289583391978e-06.
bagging, val_score: 0.000002:  20%|##        | 2/10 [00:00<00:03,  2.10it/s]

Early stopping, best iteration is:
[150]	valid_0's binary_logloss: 2.30609e-06
Training until validation scores don't improve for 50 rounds


bagging, val_score: 0.000002:  30%|###       | 3/10 [00:01<00:04,  1.62it/s][I 2024-08-07 12:09:19,661] Trial 29 finished with value: 3.6023232764931705e-06 and parameters: {'bagging_fraction': 0.813748871073336, 'bagging_freq': 7}. Best is trial 27 with value: 1.8217289583391978e-06.
bagging, val_score: 0.000002:  30%|###       | 3/10 [00:01<00:04,  1.62it/s]

Early stopping, best iteration is:
[155]	valid_0's binary_logloss: 3.60232e-06
Training until validation scores don't improve for 50 rounds


bagging, val_score: 0.000002:  40%|####      | 4/10 [00:02<00:03,  1.62it/s][I 2024-08-07 12:09:20,274] Trial 30 finished with value: 8.015241021691184e-06 and parameters: {'bagging_fraction': 0.42798127832343386, 'bagging_freq': 1}. Best is trial 27 with value: 1.8217289583391978e-06.
bagging, val_score: 0.000002:  40%|####      | 4/10 [00:02<00:03,  1.62it/s]

Early stopping, best iteration is:
[287]	valid_0's binary_logloss: 8.01524e-06
Training until validation scores don't improve for 50 rounds


bagging, val_score: 0.000002:  50%|#####     | 5/10 [00:02<00:02,  1.72it/s][I 2024-08-07 12:09:20,792] Trial 31 finished with value: 2.092841100136325e-06 and parameters: {'bagging_fraction': 0.9833295811414526, 'bagging_freq': 4}. Best is trial 27 with value: 1.8217289583391978e-06.
bagging, val_score: 0.000002:  50%|#####     | 5/10 [00:02<00:02,  1.72it/s]

Early stopping, best iteration is:
[151]	valid_0's binary_logloss: 2.09284e-06
Training until validation scores don't improve for 50 rounds


bagging, val_score: 0.000002:  60%|######    | 6/10 [00:03<00:02,  1.63it/s][I 2024-08-07 12:09:21,473] Trial 32 finished with value: 2.17285755574527e-06 and parameters: {'bagging_fraction': 0.9745245955764804, 'bagging_freq': 4}. Best is trial 27 with value: 1.8217289583391978e-06.
bagging, val_score: 0.000002:  60%|######    | 6/10 [00:03<00:02,  1.63it/s]

Early stopping, best iteration is:
[149]	valid_0's binary_logloss: 2.17286e-06
Training until validation scores don't improve for 50 rounds


bagging, val_score: 0.000002:  70%|#######   | 7/10 [00:03<00:01,  1.83it/s][I 2024-08-07 12:09:21,876] Trial 33 finished with value: 2.302643579733558e-06 and parameters: {'bagging_fraction': 0.9979870799775385, 'bagging_freq': 4}. Best is trial 27 with value: 1.8217289583391978e-06.
bagging, val_score: 0.000002:  70%|#######   | 7/10 [00:03<00:01,  1.83it/s]

Early stopping, best iteration is:
[170]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


bagging, val_score: 0.000002:  80%|########  | 8/10 [00:04<00:01,  1.89it/s][I 2024-08-07 12:09:22,369] Trial 34 finished with value: 2.3026435797335585e-06 and parameters: {'bagging_fraction': 0.9990371675667474, 'bagging_freq': 4}. Best is trial 27 with value: 1.8217289583391978e-06.
bagging, val_score: 0.000002:  80%|########  | 8/10 [00:04<00:01,  1.89it/s]

Early stopping, best iteration is:
[138]	valid_0's binary_logloss: 2.30264e-06
Training until validation scores don't improve for 50 rounds


bagging, val_score: 0.000002:  90%|######### | 9/10 [00:04<00:00,  2.02it/s][I 2024-08-07 12:09:22,787] Trial 35 finished with value: 1.7345669270148782e-06 and parameters: {'bagging_fraction': 0.8333419593827704, 'bagging_freq': 5}. Best is trial 35 with value: 1.7345669270148782e-06.
bagging, val_score: 0.000002:  90%|######### | 9/10 [00:04<00:00,  2.02it/s]

Early stopping, best iteration is:
[136]	valid_0's binary_logloss: 1.73457e-06
Training until validation scores don't improve for 50 rounds


bagging, val_score: 0.000002: 100%|##########| 10/10 [00:05<00:00,  2.17it/s][I 2024-08-07 12:09:23,171] Trial 36 finished with value: 1.612267360647842e-06 and parameters: {'bagging_fraction': 0.867927872040359, 'bagging_freq': 5}. Best is trial 36 with value: 1.612267360647842e-06.
bagging, val_score: 0.000002: 100%|##########| 10/10 [00:05<00:00,  1.92it/s]


Early stopping, best iteration is:
[136]	valid_0's binary_logloss: 1.61227e-06


feature_fraction_stage2, val_score: 0.000002:   0%|          | 0/6 [00:00<?, ?it/s]

Training until validation scores don't improve for 50 rounds


feature_fraction_stage2, val_score: 0.000002:  17%|#6        | 1/6 [00:00<00:01,  2.60it/s][I 2024-08-07 12:09:23,564] Trial 37 finished with value: 1.612267360647842e-06 and parameters: {'feature_fraction': 0.5479999999999999}. Best is trial 37 with value: 1.612267360647842e-06.
feature_fraction_stage2, val_score: 0.000002:  17%|#6        | 1/6 [00:00<00:01,  2.60it/s]

Early stopping, best iteration is:
[134]	valid_0's binary_logloss: 1.61227e-06
Training until validation scores don't improve for 50 rounds


feature_fraction_stage2, val_score: 0.000002:  33%|###3      | 2/6 [00:00<00:01,  2.53it/s][I 2024-08-07 12:09:23,967] Trial 38 finished with value: 1.612267360647842e-06 and parameters: {'feature_fraction': 0.484}. Best is trial 37 with value: 1.612267360647842e-06.
feature_fraction_stage2, val_score: 0.000002:  33%|###3      | 2/6 [00:00<00:01,  2.53it/s]

Early stopping, best iteration is:
[134]	valid_0's binary_logloss: 1.61227e-06
Training until validation scores don't improve for 50 rounds


feature_fraction_stage2, val_score: 0.000002:  50%|#####     | 3/6 [00:01<00:01,  2.84it/s][I 2024-08-07 12:09:24,268] Trial 39 finished with value: 2.6859464095164043e-06 and parameters: {'feature_fraction': 0.42}. Best is trial 37 with value: 1.612267360647842e-06.
feature_fraction_stage2, val_score: 0.000002:  50%|#####     | 3/6 [00:01<00:01,  2.84it/s]

Early stopping, best iteration is:
[124]	valid_0's binary_logloss: 2.68595e-06
Training until validation scores don't improve for 50 rounds


feature_fraction_stage2, val_score: 0.000002:  67%|######6   | 4/6 [00:01<00:00,  3.01it/s][I 2024-08-07 12:09:24,570] Trial 40 finished with value: 1.612267360647842e-06 and parameters: {'feature_fraction': 0.516}. Best is trial 37 with value: 1.612267360647842e-06.
feature_fraction_stage2, val_score: 0.000002:  67%|######6   | 4/6 [00:01<00:00,  3.01it/s]

Early stopping, best iteration is:
[131]	valid_0's binary_logloss: 1.61227e-06
Training until validation scores don't improve for 50 rounds


feature_fraction_stage2, val_score: 0.000002:  83%|########3 | 5/6 [00:02<00:00,  1.87it/s][I 2024-08-07 12:09:25,461] Trial 41 finished with value: 1.6122673606478415e-06 and parameters: {'feature_fraction': 0.45199999999999996}. Best is trial 41 with value: 1.6122673606478415e-06.
feature_fraction_stage2, val_score: 0.000002:  83%|########3 | 5/6 [00:02<00:00,  1.87it/s]

Early stopping, best iteration is:
[168]	valid_0's binary_logloss: 1.61227e-06
Training until validation scores don't improve for 50 rounds


feature_fraction_stage2, val_score: 0.000002: 100%|##########| 6/6 [00:02<00:00,  2.19it/s][I 2024-08-07 12:09:25,770] Trial 42 finished with value: 1.6414877741727898e-06 and parameters: {'feature_fraction': 0.58}. Best is trial 41 with value: 1.6122673606478415e-06.
feature_fraction_stage2, val_score: 0.000002: 100%|##########| 6/6 [00:02<00:00,  2.31it/s]


Early stopping, best iteration is:
[133]	valid_0's binary_logloss: 1.64149e-06


regularization_factors, val_score: 0.000002:   0%|          | 0/20 [00:00<?, ?it/s]

Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000002:   5%|5         | 1/20 [00:00<00:03,  5.21it/s][I 2024-08-07 12:09:25,980] Trial 43 finished with value: 1.5021034465362997e-06 and parameters: {'lambda_l1': 5.763676248133896e-07, 'lambda_l2': 0.0002788392339773973}. Best is trial 43 with value: 1.5021034465362997e-06.
regularization_factors, val_score: 0.000002:   5%|5         | 1/20 [00:00<00:03,  5.21it/s]

Early stopping, best iteration is:
[132]	valid_0's binary_logloss: 1.5021e-06
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000002:  10%|#         | 2/20 [00:00<00:03,  5.04it/s][I 2024-08-07 12:09:26,182] Trial 44 finished with value: 1.5087074180044877e-06 and parameters: {'lambda_l1': 5.318989555350645e-07, 'lambda_l2': 0.0002849497788090724}. Best is trial 43 with value: 1.5021034465362997e-06.
regularization_factors, val_score: 0.000002:  10%|#         | 2/20 [00:00<00:03,  5.04it/s]

Early stopping, best iteration is:
[149]	valid_0's binary_logloss: 1.50871e-06
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001:  15%|#5        | 3/20 [00:00<00:03,  4.92it/s][I 2024-08-07 12:09:26,450] Trial 45 finished with value: 1.4632140642699954e-06 and parameters: {'lambda_l1': 2.9165437804140206e-07, 'lambda_l2': 0.00024383457900198087}. Best is trial 45 with value: 1.4632140642699954e-06.
regularization_factors, val_score: 0.000001:  15%|#5        | 3/20 [00:00<00:03,  4.92it/s]

Early stopping, best iteration is:
[132]	valid_0's binary_logloss: 1.46321e-06
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001:  20%|##        | 4/20 [00:00<00:03,  4.30it/s][I 2024-08-07 12:09:26,670] Trial 46 finished with value: 1.5015985028201733e-06 and parameters: {'lambda_l1': 2.774828158177569e-07, 'lambda_l2': 0.00027871632832015544}. Best is trial 45 with value: 1.4632140642699954e-06.
regularization_factors, val_score: 0.000001:  20%|##        | 4/20 [00:00<00:03,  4.30it/s]

Early stopping, best iteration is:
[132]	valid_0's binary_logloss: 1.5016e-06
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001:  25%|##5       | 5/20 [00:01<00:03,  4.48it/s][I 2024-08-07 12:09:26,876] Trial 47 finished with value: 1.4820977449467571e-06 and parameters: {'lambda_l1': 3.345083587194761e-07, 'lambda_l2': 0.0002608629928279968}. Best is trial 45 with value: 1.4632140642699954e-06.
regularization_factors, val_score: 0.000001:  25%|##5       | 5/20 [00:01<00:03,  4.48it/s]

Early stopping, best iteration is:
[144]	valid_0's binary_logloss: 1.4821e-06
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001:  30%|###       | 6/20 [00:01<00:03,  4.62it/s][I 2024-08-07 12:09:27,080] Trial 48 finished with value: 1.4971337845622793e-06 and parameters: {'lambda_l1': 2.9092409952035696e-07, 'lambda_l2': 0.0002746145779678195}. Best is trial 45 with value: 1.4632140642699954e-06.
regularization_factors, val_score: 0.000001:  30%|###       | 6/20 [00:01<00:03,  4.62it/s]

Early stopping, best iteration is:
[153]	valid_0's binary_logloss: 1.49713e-06
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[139]	valid_0's binary_logloss: 1.45252e-06


regularization_factors, val_score: 0.000001:  35%|###5      | 7/20 [00:01<00:02,  4.74it/s][I 2024-08-07 12:09:27,278] Trial 49 finished with value: 1.4525188456193324e-06 and parameters: {'lambda_l1': 2.9415328026214427e-07, 'lambda_l2': 0.00034581565250441413}. Best is trial 49 with value: 1.4525188456193324e-06.
regularization_factors, val_score: 0.000001:  35%|###5      | 7/20 [00:01<00:02,  4.74it/s]

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[164]	valid_0's binary_logloss: 1.26074e-06


regularization_factors, val_score: 0.000001:  40%|####      | 8/20 [00:01<00:02,  4.24it/s][I 2024-08-07 12:09:27,569] Trial 50 finished with value: 1.2607382393996627e-06 and parameters: {'lambda_l1': 1.9973782903271662e-07, 'lambda_l2': 0.00036520656813362546}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001:  45%|####5     | 9/20 [00:01<00:02,  4.41it/s][I 2024-08-07 12:09:27,775] Trial 51 finished with value: 1.2687777335577554e-06 and parameters: {'lambda_l1': 1.9029351215039642e-07, 'lambda_l2': 0.00037361771850758375}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001:  45%|####5     | 9/20 [00:01<00:02,  4.41it/s]

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[168]	valid_0's binary_logloss: 1.26878e-06


regularization_factors, val_score: 0.000001:  50%|#####     | 10/20 [00:02<00:02,  4.58it/s][I 2024-08-07 12:09:27,975] Trial 52 finished with value: 1.4939932599635608e-06 and parameters: {'lambda_l1': 1.9378958918727942e-07, 'lambda_l2': 0.0002718552016649665}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001:  50%|#####     | 10/20 [00:02<00:02,  4.58it/s]

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[133]	valid_0's binary_logloss: 1.49399e-06
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001:  55%|#####5    | 11/20 [00:02<00:01,  4.66it/s][I 2024-08-07 12:09:28,180] Trial 53 finished with value: 1.3379608714370438e-06 and parameters: {'lambda_l1': 1.6451966279925413e-07, 'lambda_l2': 0.00041445003806608074}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001:  55%|#####5    | 11/20 [00:02<00:01,  4.66it/s]

Early stopping, best iteration is:
[146]	valid_0's binary_logloss: 1.33796e-06
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[144]	valid_0's binary_logloss: 1.38103e-06


regularization_factors, val_score: 0.000001:  60%|######    | 12/20 [00:02<00:01,  4.75it/s][I 2024-08-07 12:09:28,381] Trial 54 finished with value: 1.3810301109180486e-06 and parameters: {'lambda_l1': 9.730996801182648e-08, 'lambda_l2': 0.0005627904498645886}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001:  60%|######    | 12/20 [00:02<00:01,  4.75it/s]

Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001:  65%|######5   | 13/20 [00:03<00:03,  2.10it/s][I 2024-08-07 12:09:29,473] Trial 55 finished with value: 2.280444200125636e-06 and parameters: {'lambda_l1': 4.691856188470086e-08, 'lambda_l2': 0.0984078888153355}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001:  65%|######5   | 13/20 [00:03<00:03,  2.10it/s]

Did not meet early stopping. Best iteration is:
[1000]	valid_0's binary_logloss: 2.28044e-06
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001:  70%|#######   | 14/20 [00:04<00:03,  1.74it/s][I 2024-08-07 12:09:30,278] Trial 56 finished with value: 4.131588561008407e-05 and parameters: {'lambda_l1': 0.03125532116063038, 'lambda_l2': 0.01730269625906255}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001:  70%|#######   | 14/20 [00:04<00:03,  1.74it/s]

Early stopping, best iteration is:
[748]	valid_0's binary_logloss: 4.13159e-05
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001:  75%|#######5  | 15/20 [00:04<00:02,  2.02it/s][I 2024-08-07 12:09:30,589] Trial 57 finished with value: 1.6101175754963528e-06 and parameters: {'lambda_l1': 2.0658709570602858e-08, 'lambda_l2': 1.195302210852152e-08}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001:  75%|#######5  | 15/20 [00:04<00:02,  2.02it/s]

Early stopping, best iteration is:
[132]	valid_0's binary_logloss: 1.61012e-06
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001:  80%|########  | 16/20 [00:05<00:01,  2.17it/s][I 2024-08-07 12:09:30,966] Trial 58 finished with value: 1.392852523385062e-06 and parameters: {'lambda_l1': 7.744926991878488e-05, 'lambda_l2': 6.890638137606572e-06}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001:  80%|########  | 16/20 [00:05<00:01,  2.17it/s]

Early stopping, best iteration is:
[175]	valid_0's binary_logloss: 1.39285e-06
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001:  85%|########5 | 17/20 [00:05<00:01,  2.26it/s][I 2024-08-07 12:09:31,368] Trial 59 finished with value: 1.6349366419871983e-06 and parameters: {'lambda_l1': 3.7433374518309295e-05, 'lambda_l2': 1.8596123708446957e-06}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001:  85%|########5 | 17/20 [00:05<00:01,  2.26it/s]

Early stopping, best iteration is:
[163]	valid_0's binary_logloss: 1.63494e-06
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001:  90%|######### | 18/20 [00:05<00:00,  2.31it/s][I 2024-08-07 12:09:31,775] Trial 60 finished with value: 1.6214915766613955e-06 and parameters: {'lambda_l1': 2.086313115430124e-05, 'lambda_l2': 8.598292059388441e-06}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001:  90%|######### | 18/20 [00:05<00:00,  2.31it/s]

Early stopping, best iteration is:
[164]	valid_0's binary_logloss: 1.62149e-06
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001:  95%|#########5| 19/20 [00:06<00:00,  2.38it/s][I 2024-08-07 12:09:32,169] Trial 61 finished with value: 1.4618894006765968e-06 and parameters: {'lambda_l1': 4.729897696100894e-06, 'lambda_l2': 0.0035347538263388258}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001:  95%|#########5| 19/20 [00:06<00:00,  2.38it/s]

Early stopping, best iteration is:
[221]	valid_0's binary_logloss: 1.46189e-06
Training until validation scores don't improve for 50 rounds


regularization_factors, val_score: 0.000001: 100%|##########| 20/20 [00:06<00:00,  2.42it/s][I 2024-08-07 12:09:32,564] Trial 62 finished with value: 2.0518105931435447e-06 and parameters: {'lambda_l1': 8.663601962089021e-06, 'lambda_l2': 0.005493038387145207}. Best is trial 50 with value: 1.2607382393996627e-06.
regularization_factors, val_score: 0.000001: 100%|##########| 20/20 [00:06<00:00,  2.95it/s]


Early stopping, best iteration is:
[176]	valid_0's binary_logloss: 2.05181e-06


min_child_samples, val_score: 0.000001:   0%|          | 0/5 [00:00<?, ?it/s]

Training until validation scores don't improve for 50 rounds


min_child_samples, val_score: 0.000001:  20%|##        | 1/5 [00:01<00:04,  1.18s/it][I 2024-08-07 12:09:33,762] Trial 63 finished with value: 4.156526415183335e-06 and parameters: {'min_child_samples': 100}. Best is trial 63 with value: 4.156526415183335e-06.
min_child_samples, val_score: 0.000001:  20%|##        | 1/5 [00:01<00:04,  1.18s/it]

Early stopping, best iteration is:
[340]	valid_0's binary_logloss: 4.15653e-06
Training until validation scores don't improve for 50 rounds


min_child_samples, val_score: 0.000001:  40%|####      | 2/5 [00:01<00:02,  1.19it/s][I 2024-08-07 12:09:34,358] Trial 64 finished with value: 1.3664556456410175e-06 and parameters: {'min_child_samples': 10}. Best is trial 64 with value: 1.3664556456410175e-06.
min_child_samples, val_score: 0.000001:  40%|####      | 2/5 [00:01<00:02,  1.19it/s]

Early stopping, best iteration is:
[137]	valid_0's binary_logloss: 1.36646e-06
Training until validation scores don't improve for 50 rounds


min_child_samples, val_score: 0.000001:  60%|######    | 3/5 [00:02<00:01,  1.36it/s][I 2024-08-07 12:09:34,976] Trial 65 finished with value: 1.2682356737149834e-06 and parameters: {'min_child_samples': 25}. Best is trial 65 with value: 1.2682356737149834e-06.
min_child_samples, val_score: 0.000001:  60%|######    | 3/5 [00:02<00:01,  1.36it/s]

Early stopping, best iteration is:
[149]	valid_0's binary_logloss: 1.26824e-06
Training until validation scores don't improve for 50 rounds


min_child_samples, val_score: 0.000001:  80%|########  | 4/5 [00:02<00:00,  1.53it/s][I 2024-08-07 12:09:35,553] Trial 66 finished with value: 1.4670301362813648e-06 and parameters: {'min_child_samples': 5}. Best is trial 65 with value: 1.2682356737149834e-06.
min_child_samples, val_score: 0.000001:  80%|########  | 4/5 [00:02<00:00,  1.53it/s]

Early stopping, best iteration is:
[133]	valid_0's binary_logloss: 1.46703e-06
Training until validation scores don't improve for 50 rounds


min_child_samples, val_score: 0.000001: 100%|##########| 5/5 [00:03<00:00,  1.95it/s][I 2024-08-07 12:09:35,763] Trial 67 finished with value: 1.1149564171476875e-06 and parameters: {'min_child_samples': 50}. Best is trial 67 with value: 1.1149564171476875e-06.
min_child_samples, val_score: 0.000001: 100%|##########| 5/5 [00:03<00:00,  1.57it/s]

Early stopping, best iteration is:
[172]	valid_0's binary_logloss: 1.11496e-06





In [9]:
df_test.head()

Unnamed: 0,Vehicle_speed,Time_since_engine_start,Normed_load_value,Accelerator_pedal_position,Engine_torque,Oil_fill_level,Engine_oil_temperature,Fuel_level,Fuel_consumption,Brake_pressure,Engaged_gear_raw_signal_Bits_0_7,Efficiency_of_the_SCR_catalytic_converter,timestamp,class,vehicle_id
36,0.0,120.0,100.0,14.5,40.4,30.044,13.5,9.0,0.82,-0.03,0.0,0.0043,2024-08-07 11:09:54.837124109,1,123abc
126,0.0,255.0,27.8,14.5,36.5,29.924,19.8,9.0,0.73,-0.03,0.0,0.04614,2024-08-07 11:12:09.387124062,0,123abc
102,0.0,219.0,29.8,14.5,38.8,29.804,18.5,9.0,0.77,-0.03,0.0,0.00906,2024-08-07 11:11:33.537124157,0,123abc
81,0.0,187.0,100.0,14.5,39.6,29.864,17.1,9.0,0.8,-0.03,0.0,0.05289,2024-08-07 11:11:02.127124071,1,123abc
426,0.0,705.0,25.1,14.5,30.5,35.524,29.8,8.0,0.6,-0.03,0.0,0.08121,2024-08-07 11:19:39.907124043,0,123abc


In [10]:
# save/load trainer
dill_dump("lgbm_trainer.dill", trainer)
trainer = dill_load("lgbm_trainer.dill")

In [10]:
pprint(metrics_dict)

{'cls_report': {'0': {'f1-score': 1.0,
                      'precision': 1.0,
                      'recall': 1.0,
                      'support': 35},
                '1': {'f1-score': 1.0,
                      'precision': 1.0,
                      'recall': 1.0,
                      'support': 10},
                'accuracy': 1.0,
                'macro avg': {'f1-score': 1.0,
                              'precision': 1.0,
                              'recall': 1.0,
                              'support': 45},
                'weighted avg': {'f1-score': 1.0,
                                 'precision': 1.0,
                                 'recall': 1.0,
                                 'support': 45}},
 'cm': [[35, 0], [0, 10]],
 'prec_rec_curve': [[0.2222222222222222, 1.0, 1.0],
                    [1.0, 1.0, 0.0],
                    [0.0, 1.0]]}


In [11]:
pprint(trainer.compute_metrics(df_test, with_dynamic_binary_threshold=True))

{'cls_report': {'0': {'f1-score': 1.0,
                      'precision': 1.0,
                      'recall': 1.0,
                      'support': 35},
                '1': {'f1-score': 1.0,
                      'precision': 1.0,
                      'recall': 1.0,
                      'support': 10},
                'accuracy': 1.0,
                'macro avg': {'f1-score': 1.0,
                              'precision': 1.0,
                              'recall': 1.0,
                              'support': 45},
                'weighted avg': {'f1-score': 1.0,
                                 'precision': 1.0,
                                 'recall': 1.0,
                                 'support': 45}},
 'cm': [[35, 0], [0, 10]],
 'prec_rec_curve': [[0.2222222222222222, 1.0, 1.0],
                    [1.0, 1.0, 0.0],
                    [0.0, 1.0]]}


In [12]:
trainer.ready

True

In [15]:
trainer.ready = True

In [16]:
trainer.predict(df_test.drop(columns=["class"]))

In [18]:
df_test_redis = df_test.copy()
df_test_redis["timestamp"] -= pd.to_timedelta(2, unit="h")
df_test_redis_json = df_test_redis.drop(columns=["class"]).reset_index(drop=True).to_json(orient="split")
redisClient = redis.Redis(host=os.environ["REDIS_HOST"], password=os.environ["REDIS_PASSWORD"], port=os.environ["REDIS_PORT"])


time.sleep(2)  # Wait for a few seconds before publishing
redisClient.publish("idneo_v2x", df_test_redis_json)

3

# Applicable only to `Trainer` object NOT `S6GTrainer`

SUCCESS6G project requires subscription to an edge-Redis db channel to ingest the data, and directly forward the predictions to a central Influx database. Both Redis and InfluxDB are socket objects which cause issues with MLflow with the following error: `Full serialization error: cannot pickle 'socket' object`

Workaround: [create a custom Kserve model](https://kserve.github.io/website/master/modelserving/v1beta1/custom/custom_model/#implement-custom-model-using-kserve-api)

## Mlflow
* tracking
* model registration in Minio
* [permanent experiment deletion](https://mlflow.org/docs/2.11.3/tracking/artifacts-stores.html?highlight=delete%20experiment#deletion-behavior)
```
# in python
mlflow.get_experiment_by_name("success6g_lgbm").experiment_id
# in cmd
mlflow experiments delete -x 1
```

In [23]:
mlflow_host = "10.152.183.54"
mlflow_host_url = "mlflow.mlflow.svc.cluster.local"
mlflow_port = "5000"
os.environ["AWS_ACCESS_KEY_ID"] = "minioadmin"
os.environ["AWS_SECRET_ACCESS_KEY"] = "minioadmin"
os.environ["MLFLOW_S3_ENDPOINT_URL"] = f"http://10.152.183.156:9000"

mlflow.set_tracking_uri("http://" + mlflow_host + ":" + mlflow_port)
experiment_id = get_or_create_experiment("success6g")
mlflow.set_experiment(experiment_id=experiment_id)

<Experiment: artifact_location='s3://mlflow/3', creation_time=1716388845676, experiment_id='3', last_update_time=1716388845676, lifecycle_stage='active', name='success6g', tags={}>

In [24]:
metrics_dict_flattened = flatten_dict(metrics_dict)
# mlflow metrics can be only int, float not list
del metrics_dict_flattened["cm"]
del metrics_dict_flattened["prec_rec_curve"]

In [19]:
run_name = "test"
with mlflow.start_run(
    experiment_id=experiment_id, run_name=run_name, nested=True
) as run:
    mlflow.log_params(trainer.optimizer.best)
    mlflow.log_metrics(metrics_dict_flattened)

    # Log tags
    mlflow.set_tags(
        tags={
            "project": "SUCCESS6G",
            "optimizer_engine": "optuna",
            "model_family": "ligtgbm",
            "feature_set_version": 1,
        }
    )
    # Log figure - for future fun
    # mlflow.log_figure(figure=correlation_plot, artifact_file="correlation_plot.png")

    artifact_path = "success6g_model"
    registered_model_name = "success6g_model"
    mlflow.pyfunc.log_model(
        python_model=trainer,
        artifact_path=artifact_path,
        registered_model_name=registered_model_name,
    )
    model_uri = mlflow.get_artifact_uri(artifact_path)
    print(f"Run ID:\n{run.info.run_id}\nModel uri:\n{model_uri}")

## Testing of predictions

### Example data

In [30]:
df_pd.iloc[:2]

Unnamed: 0,engine_load,engine_coolant_temp,engine_speed,vehicle_speed,intake_air_temp,maf,throttle_position,fuel_rg_pressure,barometric_pressure,control_voltage,class,car_id
0,100.0,17.0,904.5,0.0,10.0,12.55,83.14,37270.0,101.0,0.06,1,123abc
1,100.0,17.0,906.0,0.0,11.0,12.36,83.14,37800.0,101.0,14.56,1,123abc


In [31]:
# for testing using mlflow inference service
request_dict = {
    "engine_load": [100.0, 100.0],
    "engine_coolant_temp": [17.0, 17.0],
    "engine_speed": [904.5, 906.0],
    "vehicle_speed": [0.0, 0.0],
    "intake_air_temp": [10.0, 11.0],
    "maf": [12.55, 12.36],
    "throttle_position": [83.14, 83.14],
    "fuel_rg_pressure": [37270.0, 37800.0],
    "barometric_pressure": [101.0, 101.0],
    "control_voltage": [0.06, 14.56],
    "class": [1, 1],
    "car_id": ["123abc", "123abc"],
}

### Trainer

In [32]:
trainer.predict(df=df_pd.iloc[:2].drop(columns=["class"]), context={})

Unnamed: 0,class
0,14.269467
1,14.269467


### Downloaded Trainer

In [33]:
model_uri

's3://mlflow/3/24fedb96c5004767800fa32a3220c8dd/artifacts/success6g_model'

In [34]:
loaded_trainer = mlflow.pyfunc.load_model(model_uri)

Downloading artifacts:   0%|          | 0/9 [00:00<?, ?it/s]

In [35]:
loaded_trainer.predict(df_pd.iloc[:2].drop(columns=["class"]))

Unnamed: 0,class
0,14.269467
1,14.269467


### Downloaded/Served Trainer
* i.e. testing [model locally](https://mlflow.org/docs/latest/deployment/deploy-model-to-kubernetes/tutorial.html?highlight=kserve#step-6-testing-model-serving-locally)

In [36]:
model_uri

's3://mlflow/3/24fedb96c5004767800fa32a3220c8dd/artifacts/success6g_model'

In [None]:
! mlflow models serve -m "s3://mlflow/3/24fedb96c5004767800fa32a3220c8dd/artifacts/success6g_model" --env-manager local -p 5000

Downloading artifacts: 100%|████████████████████| 1/1 [00:00<00:00, 2088.80it/s]
2024/05/22 14:45:07 INFO mlflow.models.flavor_backend_registry: Selected backend for flavor 'python_function'
Downloading artifacts: 100%|██████████████████████| 9/9 [00:00<00:00, 69.72it/s]
2024/05/22 14:45:07 INFO mlflow.pyfunc.backend: === Running command 'exec gunicorn --timeout=60 -b 127.0.0.1:5000 -w 1 ${GUNICORN_CMD_ARGS} -- mlflow.pyfunc.scoring_server.wsgi:app'
[2024-05-22 14:45:07 +0000] [17971] [INFO] Starting gunicorn 22.0.0
[2024-05-22 14:45:07 +0000] [17971] [INFO] Listening at: http://127.0.0.1:5000 (17971)
[2024-05-22 14:45:07 +0000] [17971] [INFO] Using worker: sync
[2024-05-22 14:45:07 +0000] [17972] [INFO] Booting worker with pid: 17972


```
root@jupyter-5uperpalo:~/assignment# curl -X POST -H "Content-Type:application/json" --data '{"inputs": {"engine_load": [100.0 ,100.0], "engine_coolant_temp": [17.0, 17.0], "engine_speed": [904.5, 906.0], "vehicle_speed": [0.0, 0.0], "intake_air_temp": [10.0, 11.0], "maf": [12.55, 12.36], "throttle_position": [83.14, 83.14], "fuel_rg_pressure": [37270.0, 37800.0], "barometric_pressure": [101.0, 101.0], "control_voltage": [0.06, 14.56], "car_id": ["123abc", "123abc"]}}' http://127.0.0.1:5000/invocations
{"predictions": [{"class": 14.26946728558632}, {"class": 14.26946728558632}]}
```

## Model deployment using Kserve
* https://mlflow.org/docs/latest/deployment/deploy-model-to-kubernetes/tutorial.html?highlight=kserve#step-7-deploying-the-model-to-kserve

### Using `model_uri` 

* since I configured minio as artifact store I can use `model_uri` - I was **WRONG!**

procedure:
<details><summary>make sure the kserve is able to reach minio</summary>

*  by [defining s3-secret and attaching it to service account](https://kserve.github.io/website/0.7/modelserving/storage/s3/s3/#create-s3-secret)
* created ServiceAccount must be in a same namespace as the InferenceService we are going to create, [e.g. stackoverflow here](https://stackoverflow.com/a/70977798)
* `If an application does not specify a serviceAccountName, it uses the "default" service account.`[e.g. here](https://kubernetes.io/docs/reference/access-authn-authz/rbac/#role-binding-examples)

```
apiVersion: v1
kind: Secret
metadata:
  name: s3creds
  namespace: mlflow-kserve-success6g
  annotations:
     serving.kserve.io/s3-endpoint: 10.152.183.156:9000 # replace with your s3 endpoint e.g minio-service.kubeflow:9000
     serving.kserve.io/s3-usehttps: "0" # by default 1, if testing with minio you can set to 0
     # serving.kserve.io/s3-region: "us-east-2"
     serving.kserve.io/s3-useanoncredential: "false" # omitting this is the same as false, if true will ignore provided credential and use anonymous credentials
type: Opaque
stringData: # use `stringData` for raw credential string or `data` for base64 encoded string
  AWS_ACCESS_KEY_ID: "minioadmin"
  AWS_SECRET_ACCESS_KEY: "minioadmin"
```

`kubectl apply -f create-s3-secret.yaml`

</details>
<details><summary>create namespace</summary>

```
kubectl create namespace mlflow-kserve-success6g
namespace/mlflow-kserve-success6g created
```

</details>
<details><summary>specify configuration of the deployment in the file `mlflow-kserve-success6g_model_uri.yaml`</summary>

```
apiVersion: "serving.kserve.io/v1beta1"
kind: "InferenceService"
metadata:
  name: "mlflow-success6g"
  namespace: "mlflow-kserve-success6g"
spec:
  predictor:
    model:
      modelFormat:
        name: mlflow
      protocolVersion: v2
      storageUri: "s3://mlflow/1/777cf64c922149a4b77c85987865deb0/artifacts/success6g_model"
```

</details>
<details><summary>deploy</summary>

```
kubectl apply -f mlflow-kserve-success6g.yaml
inferenceservice.serving.kserve.io/mlflow-success6g created
```
</details>
<details><summary>check the status of the deployment</summary>

`kubectl get inferenceservice mlflow-success6g`

</details>
<details><summary>specify testing json in `test-input.json`</summary>

```
{
    "inputs": [
      {
        "name": "input",
        "shape": [13],
        "datatype": "FP32",
        "data": {
			"engine_load": [100.0 ,100.0],
			"engine_coolant_temp": [17.0, 17.0],
			"engine_speed": [904.5, 906.0],
			"vehicle_speed": [0.0, 0.0],
			"intake_air_temp": [10.0, 11.0],
			"maf": [12.55, 12.36],
			"throttle_position": [83.14, 83.14],
			"fuel_rg_pressure": [37270.0, 37800.0],
			"barometric_pressure": [101.0, 101.0],
			"control_voltage": [0.06, 14.56],
			"car_id": ["123abc", "123abc"],
			}
      }
    ]
}
```

</details>
<details><summary>test json</summary>

```
SERVICE_HOSTNAME=$(kubectl get inferenceservice mlflow-success6g -n mlflow-kserve-success6g -o jsonpath='{.status.url}' | cut -d "/" -f 3)
curl -v \
  -H "Host: ${SERVICE_HOSTNAME}" \
  -H "Content-Type: application/json" \
  -d @./test-input.json \
  http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models/mlflow-success6g/infer
```

</details>
<details><summary>but after all the trouble this can still fail if you developed the model in different python version than kserve is using</summary>

```
pmulinka@saiacheron:~/kubernetes/kserve$ kubectl logs -p mlflow-success6g-predictor-00001-deployment-74ccf4fb5d-lktz9 -n mlflow-kserve-success6g
Defaulted container "kserve-container" out of: kserve-container, queue-proxy, storage-initializer (init)
Environment tarball not found at '/mnt/models/environment.tar.gz'
Environment not found at './envs/environment'
2024-05-16 15:32:55,293 [mlserver.parallel] DEBUG - Starting response processing loop...
2024-05-16 15:32:55,298 [mlserver.rest] INFO - HTTP server running on http://0.0.0.0:8080
INFO:     Started server process [1]
INFO:     Waiting for application startup.
2024-05-16 15:32:55,411 [mlserver.metrics] INFO - Metrics server running on http://0.0.0.0:8082
2024-05-16 15:32:55,412 [mlserver.metrics] INFO - Prometheus scraping endpoint can be accessed on http://0.0.0.0:8082/metrics
INFO:     Started server process [1]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
2024-05-16 15:32:57,613 [mlserver.grpc] INFO - gRPC server running on http://0.0.0.0:9000
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
INFO:     Uvicorn running on http://0.0.0.0:8082 (Press CTRL+C to quit)
2024/05/16 15:32:58 WARNING mlflow.pyfunc: Detected one or more mismatches between the model's dependencies and the current Python environment:
 - mlflow (current: 2.3.1, required: mlflow==2.12.2)
 - cryptography (current: 38.0.4, required: cryptography==40.0.2)
 - cytoolz (current: uninstalled, required: cytoolz==0.12.0)
 - defusedxml (current: uninstalled, required: defusedxml==0.7.1)
 - distributed (current: uninstalled, required: distributed==2023.5.1)
 - lightgbm (current: 3.3.5, required: lightgbm==4.3.0)
 - lz4 (current: uninstalled, required: lz4==4.3.2)
 - numpy (current: 1.23.5, required: numpy==1.24.3)
 - optuna-integration (current: uninstalled, required: optuna-integration==3.6.0)
 - pandas (current: 2.0.1, required: pandas==2.2.2)
 - pyarrow (current: 11.0.0, required: pyarrow==12.0.0)
 - redis (current: uninstalled, required: redis==5.0.4)
 - scipy (current: 1.9.1, required: scipy==1.10.1)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
2024/05/16 15:32:58 WARNING mlflow.pyfunc: The version of Python that the model was saved in, `Python 3.10.11`, differs from the version of Python that is currently running, `Python 3.8.16`, and may be incompatible
2024-05-16 15:32:58,967 [mlserver] INFO - Couldn't load model 'mlflow-success6g'. Model will be removed from registry.
2024-05-16 15:32:58,967 [mlserver.parallel] ERROR - An error occurred processing a model update of type 'Load'.
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/mlserver/parallel/worker.py", line 158, in _process_model_update
    await self._model_registry.load(model_settings)
  File "/opt/conda/lib/python3.8/site-packages/mlserver/registry.py", line 293, in load
    return await self._models[model_settings.name].load(model_settings)
  File "/opt/conda/lib/python3.8/site-packages/mlserver/registry.py", line 148, in load
    await self._load_model(new_model)
  File "/opt/conda/lib/python3.8/site-packages/mlserver/registry.py", line 165, in _load_model
    model.ready = await model.load()
  File "/opt/conda/lib/python3.8/site-packages/mlserver_mlflow/runtime.py", line 155, in load
    self._model = mlflow.pyfunc.load_model(model_uri)
  File "/opt/conda/lib/python3.8/site-packages/mlflow/pyfunc/__init__.py", line 596, in load_model
    model_impl = importlib.import_module(conf[MAIN])._load_pyfunc(data_path)
  File "/opt/conda/lib/python3.8/site-packages/mlflow/pyfunc/model.py", line 291, in _load_pyfunc
    python_model = cloudpickle.load(f)
ModuleNotFoundError: No module named 'inference_model'
2024-05-16 15:32:58,970 [mlserver] INFO - Couldn't load model 'mlflow-success6g'. Model will be removed from registry.
2024-05-16 15:32:58,975 [mlserver.parallel] ERROR - An error occurred processing a model update of type 'Unload'.
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/mlserver/parallel/worker.py", line 160, in _process_model_update
    await self._model_registry.unload_version(
  File "/opt/conda/lib/python3.8/site-packages/mlserver/registry.py", line 302, in unload_version
    await model_registry.unload_version(version)
  File "/opt/conda/lib/python3.8/site-packages/mlserver/registry.py", line 201, in unload_version
    model = await self.get_model(version)
  File "/opt/conda/lib/python3.8/site-packages/mlserver/registry.py", line 237, in get_model
    raise ModelNotFound(self._name, version)
mlserver.errors.ModelNotFound: Model mlflow-success6g not found
2024-05-16 15:32:58,976 [mlserver] ERROR - Some of the models failed to load during startup!
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/mlserver/server.py", line 125, in start
    await asyncio.gather(
  File "/opt/conda/lib/python3.8/site-packages/mlserver/registry.py", line 293, in load
    return await self._models[model_settings.name].load(model_settings)
  File "/opt/conda/lib/python3.8/site-packages/mlserver/registry.py", line 148, in load
    await self._load_model(new_model)
  File "/opt/conda/lib/python3.8/site-packages/mlserver/registry.py", line 161, in _load_model
    model = await callback(model)
  File "/opt/conda/lib/python3.8/site-packages/mlserver/parallel/registry.py", line 152, in load_model
    loaded = await pool.load_model(model)
  File "/opt/conda/lib/python3.8/site-packages/mlserver/parallel/pool.py", line 74, in load_model
    await self._dispatcher.dispatch_update(load_message)
  File "/opt/conda/lib/python3.8/site-packages/mlserver/parallel/dispatcher.py", line 123, in dispatch_update
    return await asyncio.gather(
  File "/opt/conda/lib/python3.8/site-packages/mlserver/parallel/dispatcher.py", line 138, in _dispatch_update
    return await self._dispatch(worker_update)
  File "/opt/conda/lib/python3.8/site-packages/mlserver/parallel/dispatcher.py", line 146, in _dispatch
    return await self._wait_response(internal_id)
  File "/opt/conda/lib/python3.8/site-packages/mlserver/parallel/dispatcher.py", line 152, in _wait_response
    inference_response = await async_response
mlserver.parallel.errors.WorkerError: builtins.ModuleNotFoundError: No module named 'inference_model'
2024-05-16 15:32:58,978 [mlserver.parallel] INFO - Waiting for shutdown of default inference pool...
2024-05-16 15:32:59,163 [mlserver.parallel] INFO - Shutdown of default inference pool complete
2024-05-16 15:32:59,163 [mlserver.grpc] INFO - Waiting for gRPC server shutdown
2024-05-16 15:32:59,169 [mlserver.grpc] INFO - gRPC server shutdown complete
INFO:     Shutting down
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [1]
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [1]
```

</details>
<details><summary>and some possible readiness probe issues</summary>

* apparently I am not alone in this, e.g. [here](https://stackoverflow.com/questions/62569747/ml-model-pod-keeps-restarting-in-seldon-deployment)
```
pmulinka@saiacheron:~/kubernetes/kserve$ kubectl describe pod mlflow-success6g-predictor-00001-deployment-74ccf4fb5d-lktz9 -n mlflow-kserve-success6g
Events:
  Type     Reason     Age                    From     Message
  ----     ------     ----                   ----     -------
  Warning  Unhealthy  4m11s (x268 over 34m)  kubelet  Readiness probe failed: Get "http://10.1.4.204:8012/": context deadline exceeded (Client.Timeout exceeded while awaiting headers)
```
</details>

### Using docker image
<details><summary>create docker</summary>

```
sudo apt-get install python3.10-venv
python3.10 -m venv python310venv
source python310venv/bin/activate
pip install mlflow
pip install boto3
export AWS_ACCESS_KEY_ID=minioadmin
export AWS_SECRET_ACCESS_KEY=minioadmin
export MLFLOW_S3_ENDPOINT_URL=http://10.152.183.156:9000

mlflow models build-docker -m s3://mlflow/1/777cf64c922149a4b77c85987865deb0/artifacts/success6g_model -n 5uperpalo/mlflow-success6g --enable-mlserver
```
</details>
<details><summary>generate token and login to DockerHub, </summary>

[generate DockerHub token](https://hub.docker.com/settings/security?generateToken=true) and login
```
(python310venv) pmulinka@iesc-gpu3:~/scripts$ docker login  -u 5uperpalo
Password:
WARNING! Your password will be stored unencrypted in /home/pmulinka/.docker/config.json.
Configure a credential helper to remove this warning. See
https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
```
</details>
<details><summary>push the image to dockerhub</summary>

```
(python310venv) pmulinka@iesc-gpu3:~/scripts$ docker push 5uperpalo/mlflow-success6g
Using default tag: latest
The push refers to repository [docker.io/5uperpalo/mlflow-success6g]
e5f76b582f6c: Pushed
624821c51019: Pushed
93256a85bb86: Pushed
2f62dcdb5ae7: Pushed
1146ddf03b42: Pushed
639ddc87d13f: Pushed
4f18e59d57c4: Pushed
5761e01a334a: Pushed
f31b6bde159a: Pushed
c25ef1a18d31: Pushed
40a774d6610c: Pushed
a2b215b4258b: Pushed
b56a9203d2a0: Pushed
2536dbfb2294: Pushed
67ea405a7d49: Pushed
a81c3606ee5c: Pushed
4a1518ebc26e: Mounted from library/ubuntu
latest: digest: sha256:57f653a3c8d63519c07dccffc6eb12dba5006db07ebcd869467ce752fb1b0bda size: 3899
```
</details>

<details><summary>specify configuration of the deployment in the file `mlflow-kserve-success6g_model_uri.yaml`</summary>

```
apiVersion: "serving.kserve.io/v1beta1"
kind: "InferenceService"
metadata:
  name: "mlflow-success6g"
  namespace: "mlflow-kserve-success6g"
spec:
  predictor:
    containers:
      - name: "mlflow-success6g"
        image: "5uperpalo/mlflow-success6g"
        ports:
          - containerPort: 8080
            protocol: TCP
              #env:
              #- name: PROTOCOL
              #value: "v2"
        resources:
          limits:
            cpu: 2
            memory: 8Gi
          requests:
            cpu: 2
            memory: 8Gi
```
</details>
<details><summary>deploy</summary>

```
pmulinka@saiacheron:~/kubernetes/kserve$ kubectl apply -f mlflow-kserve-success6g_docker_image.yaml
inferenceservice.serving.kserve.io/mlflow-success6g created
```
</details>
<details><summary>check status of deployment</summary>

```
pmulinka@saiacheron:~$ kubectl get inferenceservice -n mlflow-kserve-success6g
NAME               URL                                                                 READY   PREV   LATEST   PREVROLLEDOUTREVISION   LATESTREADYREVISION                AGE
mlflow-success6g   http://mlflow-success6g.mlflow-kserve-success6g.svc.cluster.local   False          100                              mlflow-success6g-predictor-00001   6m4s

pmulinka@saiacheron:~$ kubectl get pods -n mlflow-kserve-success6g
NAME                                                           READY   STATUS    RESTARTS        AGE
mlflow-success6g-predictor-00001-deployment-8694fffb55-9wnb7   2/2     Running   1 (2m14s ago)   4m47s


pmulinka@saiacheron:~$ kubectl get inferenceservice mlflow-success6g -n mlflow-kserve-success6g -o yaml
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
  annotations:
    kubectl.kubernetes.io/last-applied-configuration: |
      {"apiVersion":"serving.kserve.io/v1beta1","kind":"InferenceService","metadata":{"annotations":{},"name":"mlflow-success6g","namespace":"mlflow-kserve-success6g"},"spec":{"predictor":{"containers":[{"image":"5uperpalo/mlflow-success6g","name":"mlflow-success6g","ports":[{"containerPort":8080,"protocol":"TCP"}],"resources":{"limits":{"cpu":2,"memory":"8Gi"},"requests":{"cpu":2,"memory":"8Gi"}}}]}}}
  creationTimestamp: "2024-05-28T14:33:05Z"
  finalizers:
  - inferenceservice.finalizers
  generation: 1
  name: mlflow-success6g
  namespace: mlflow-kserve-success6g
  resourceVersion: "76010518"
  uid: b9573ff3-0f67-4650-a0c7-51941a0fffe2
spec:
  predictor:
    containers:
    - image: 5uperpalo/mlflow-success6g
      name: kserve-container
      ports:
      - containerPort: 8080
        protocol: TCP
      resources:
        limits:
          cpu: "2"
          memory: 8Gi
        requests:
          cpu: "2"
          memory: 8Gi
status:
  address:
    url: http://mlflow-success6g.mlflow-kserve-success6g.svc.cluster.local
  components:
    predictor:
      address:
        url: http://mlflow-success6g-predictor.mlflow-kserve-success6g.svc.cluster.local
      latestCreatedRevision: mlflow-success6g-predictor-00001
      latestReadyRevision: mlflow-success6g-predictor-00001
      latestRolledoutRevision: mlflow-success6g-predictor-00001
      traffic:
      - latestRevision: true
        percent: 100
        revisionName: mlflow-success6g-predictor-00001
      url: http://mlflow-success6g-predictor.mlflow-kserve-success6g.svc.cluster.local
  conditions:
  - lastTransitionTime: "2024-05-28T14:36:11Z"
    reason: Predictor ingress not created
    status: "False"
    type: IngressReady
  - lastTransitionTime: "2024-05-28T14:36:11Z"
    reason: PredictorConfigurationReady not ready
    severity: Info
    status: "False"
    type: LatestDeploymentReady
  - lastTransitionTime: "2024-05-28T14:36:11Z"
    message: 'Revision "mlflow-success6g-predictor-00001" failed with message: Container
      failed with: container exited with no error.'
    reason: RevisionFailed
    severity: Info
    status: "False"
    type: PredictorConfigurationReady
  - lastTransitionTime: "2024-05-28T14:36:11Z"
    message: Revision "mlflow-success6g-predictor-00001" failed to become ready.
    reason: RevisionMissing
    status: "False"
    type: PredictorReady
  - lastTransitionTime: "2024-05-28T14:36:11Z"
    message: Revision "mlflow-success6g-predictor-00001" failed to become ready.
    reason: RevisionMissing
    severity: Info
    status: "False"
    type: PredictorRouteReady
  - lastTransitionTime: "2024-05-28T14:36:11Z"
    reason: Predictor ingress not created
    status: "False"
    type: Ready
  - lastTransitionTime: "2024-05-28T14:36:11Z"
    reason: PredictorRouteReady not ready
    severity: Info
    status: "False"
    type: RoutesReady
  modelStatus:
    copies:
      failedCopies: 0
      totalCopies: 1
    states:
      activeModelState: Loaded
      targetModelState: Pending
    transitionStatus: InProgress
  observedGeneration: 1
  url: http://mlflow-success6g.mlflow-kserve-success6g.svc.cluster.local

pmulinka@saiacheron:~/kubernetes/kserve$ kubectl get events -n mlflow-kserve-success6g
LAST SEEN   TYPE      REASON          OBJECT                                                             MESSAGE
2m28s       Warning   BackOff         pod/mlflow-success6g-predictor-00001-deployment-8694fffb55-9wnb7   Back-off restarting failed container kserve-container in pod mlflow-success6g-predictor-00001-deployment-8694fffb55-9wnb7_mlflow-kserve-success6g(1ad2d6da-3a40-4b14-956a-0faee3ca9b3c)
2m28s       Warning   InternalError   revision/mlflow-success6g-predictor-00001                          failed to update PA "mlflow-success6g-predictor-00001": Operation cannot be fulfilled on podautoscalers.autoscaling.internal.knative.dev "mlflow-success6g-predictor-00001": the object has been modified; please apply your changes to the latest version and try again

```
</details>

<details><summary>debugging</summary>

```
# https://kserve.github.io/website/0.10/developer/debug/
pmulinka@saiacheron:~/kubernetes/kserve$ kubectl get ksvc -n mlflow-kserve-success6g
NAME                         URL                                                                           LATESTCREATED                      LATESTREADY                        READY   REASON
mlflow-success6g-predictor   http://mlflow-success6g-predictor.mlflow-kserve-success6g.svc.cluster.local   mlflow-success6g-predictor-00001   mlflow-success6g-predictor-00001   False   RevisionMissing
# RevisionMissing Error
pmulinka@saiacheron:~/kubernetes/kserve$ kubectl get revision $(kubectl get configuration mlflow-success6g-predictor -n mlflow-kserve-success6g --output jsonpath="{.status.latestCreatedRevisionName}")
Error from server (NotFound): revisions.serving.knative.dev "mlflow-success6g-predictor-00001" not found
pmulinka@saiacheron:~/kubernetes/kserve$ kubectl get pod -l serving.kserve.io/inferenceservice=mlflow-success6g -n mlflow-kserve-success6g
NAME                                                           READY   STATUS             RESTARTS        AGE
mlflow-success6g-predictor-00001-deployment-8694fffb55-9wnb7   0/2     CrashLoopBackOff   8 (4m51s ago)   43m
pmulinka@saiacheron:~/kubernetes/kserve$ kubectl get pod -l serving.kserve.io/inferenceservice=mlflow-success6g -n mlflow-kserve-success6g
NAME                                                           READY   STATUS    RESTARTS       AGE
mlflow-success6g-predictor-00001-deployment-8694fffb55-9wnb7   2/2     Running   9 (7m7s ago)   45m
pmulinka@saiacheron:~/kubernetes/kserve$ kubectl logs -l model=mlflow-success6g -n mlflow-kserve-success6g -c storage-initializer
No resources found in mlflow-kserve-success6g namespace.
pmulinka@saiacheron:~/kubernetes/kserve$ kubectl delete inferenceservice mlflow-success6g -n mlflow-kserve-success6g
inferenceservice.serving.kserve.io "mlflow-success6g" deleted
```
</details>