In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import numpy as np

train_df = pd.read_csv(r"C:\Users\Alex\Desktop\STAY_SMARTER\adaptive_ARC\data\3d_print_train.csv")
test_df = pd.read_csv(r"C:\Users\Alex\Desktop\STAY_SMARTER\adaptive_ARC\data\3d_print_test.csv")

y = train_df[['height', 'width']]
X = train_df.drop(['height', 'width'], axis=1)

cat_features = X.select_dtypes(include='object').columns.tolist()
num_features = X.select_dtypes(exclude='object').columns.tolist()

X_train_raw, X_val_raw, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

encoder = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), cat_features)
    ],
    remainder='passthrough'  
)

X_train = encoder.fit_transform(X_train_raw)
X_val = encoder.transform(X_val_raw)

X_train = X_train.astype('float32')
X_val = X_val.astype('float32')
y_train = y_train.astype('float32')
y_val = y_val.astype('float32')


In [4]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 295 entries, 0 to 294
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   shape_type      295 non-null    object 
 1   material        295 non-null    object 
 2   infill_density  295 non-null    int64  
 3   layer_height    295 non-null    float64
 4   color           295 non-null    object 
 5   print_quality   295 non-null    object 
 6   height          295 non-null    float64
 7   width           295 non-null    float64
dtypes: float64(3), int64(1), object(4)
memory usage: 18.6+ KB


### Learn

In [9]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
import keras_tuner as kt
import io
import matplotlib.pyplot as plt

### Архитектура довольно простая -> повышение диапазона параметров -> повышение сложности модели

In [None]:
class MyHyperModel(kt.HyperModel):
    def build(self, hp):
        model = Sequential()
        num_layers = hp.Int("num_layers", 2, 5)
        model.add(Dense(units=hp.Int("units_1", 8, 128, step=8), activation="relu", input_shape=(X_train.shape[1],)))
        model.add(Dropout(hp.Float("dropout_1", 0.1, 0.5, step=0.1)))
        for i in range(1, num_layers):
            model.add(Dense(units=hp.Int(f"units_{i+1}", 8, 128, step=8), activation="relu"))
            model.add(Dropout(hp.Float(f"dropout_{i+1}", 0.1, 0.5, step=0.1)))
        model.add(Dense(y_train.shape[1], activation="linear",
                        kernel_regularizer=regularizers.l2(hp.Float("l2_reg", 1e-6, 1e-2, sampling="log"))))
        model.compile(optimizer=Adam(
            learning_rate=hp.Float("learning_rate", 1e-5, 1e-2, sampling="log")),
            loss="mae")
        return model

    def fit(self, hp, model, *args, **kwargs):
        return model.fit(*args, batch_size=hp.Choice("batch_size", [4, 8, 16, 32]), **kwargs)

tuner = kt.Hyperband(
    MyHyperModel(),
    objective="val_loss",
    max_epochs=300,
    factor=3,
    directory="FINAL_DL",
    project_name="hyperband",
    overwrite=True
)
early_stop = EarlyStopping(monitor="val_loss", patience=15, restore_best_weights=True)

tuner.search(X_train, y_train, validation_data=(X_val, y_val), epochs=100, callbacks=[early_stop], verbose=0)

best_trial = tuner.oracle.get_best_trials(1)[0]
checkpoint_path = tuner._get_checkpoint_fname(best_trial.trial_id)
best_hps = tuner.get_best_hyperparameters(1)[0]
best_model = tuner.hypermodel.build(best_hps)

# history = best_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=500, callbacks=[early_stop])
#------------------Сохранение весов(вариативно)
# best_model.load_weights(checkpoint_path)
# temp_dir = tempfile.mkdtemp()
# model_path = os.path.join(temp_dir, "model.h5")
# scaler_path = os.path.join(temp_dir, "scaler.pkl")
# target_scaler_path = os.path.join(temp_dir, "target_scaler.pkl")
# plot_path = os.path.join(temp_dir, "training_history.png")
# zip_path = os.path.join(temp_dir, "training_artifacts.zip")

# best_model.save(model_path)
# pickle.dump(transformer, open(scaler_path, "wb"))
# pickle.dump(target_scaler, open(target_scaler_path, "wb"))

# plt.figure()
# plt.plot(history.history['loss'], label='Training Loss')
# plt.plot(history.history['val_loss'], label='Validation Loss')
# plt.xlabel('Epochs')
# plt.ylabel('Loss')
# plt.title('Training History')
# plt.legend()
# plt.grid(True)
# plt.tight_layout()
# plt.savefig(plot_path)
# plt.close()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **

Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - loss: 61.3208 - val_loss: 47.0215
Epoch 2/500
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 47.3012 - val_loss: 33.1650
Epoch 3/500
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 35.3563 - val_loss: 30.4680
Epoch 4/500
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 33.4577 - val_loss: 30.8729
Epoch 5/500
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 31.3930 - val_loss: 29.8682
Epoch 6/500
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 32.3489 - val_loss: 29.9432
Epoch 7/500
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 32.2593 - val_loss: 29.7785
Epoch 8/500
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 31.0855 - val_loss: 29.7961
Epoch 9/500
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

### Metrics+MlFlow

In [12]:
! pip install mlflow==2.9.2


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting mlflow==2.9.2
  Downloading mlflow-2.9.2-py3-none-any.whl.metadata (13 kB)
Collecting databricks-cli<1,>=0.8.7 (from mlflow==2.9.2)
  Downloading databricks_cli-0.18.0-py2.py3-none-any.whl.metadata (4.0 kB)
Collecting entrypoints<1 (from mlflow==2.9.2)
  Downloading entrypoints-0.4-py3-none-any.whl.metadata (2.6 kB)
Collecting gitpython<4,>=2.1.0 (from mlflow==2.9.2)
  Downloading gitpython-3.1.45-py3-none-any.whl.metadata (13 kB)
Collecting protobuf<5,>=3.12.0 (from mlflow==2.9.2)
  Downloading protobuf-4.25.8-cp310-abi3-win_amd64.whl.metadata (541 bytes)
Collecting pytz<2024 (from mlflow==2.9.2)
  Downloading pytz-2023.4-py2.py3-none-any.whl.metadata (22 kB)
Collecting packaging<24 (from mlflow==2.9.2)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Collecting importlib-metadata!=4.7.0,<8,>=3.7.0 (from mlflow==2.9.2)
  Downloading importlib_metadata-7.2.1-py3-none-any.whl.metadata (4.6

  You can safely remove it manually.
  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
datasets 3.6.0 requires pyarrow>=15.0.0, but you have pyarrow 14.0.2 which is incompatible.


In [1]:
import os
import mlflow
os.environ['USER'] = 'Alex Razuvaev'
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Dl_Adaptive")


<Experiment: artifact_location='file:///G:/mlFlow_data/artefacts/188302031316848441', creation_time=1754080474671, experiment_id='188302031316848441', last_update_time=1754080474671, lifecycle_stage='active', name='Dl_Adaptive', tags={}>

In [15]:
import os
import mlflow
import mlflow.tensorflow
import matplotlib.pyplot as plt
import io

with mlflow.start_run():
    mlflow.set_tag("created_by", "Alex")

    history = best_model.fit(X_train, y_train, validation_data=(X_val, y_val),
                             epochs=500, callbacks=[early_stop], verbose=0)

    for epoch in range(len(history.history["loss"])):
        mlflow.log_metric("train_loss", history.history["loss"][epoch], step=epoch)
        mlflow.log_metric("val_loss", history.history["val_loss"][epoch], step=epoch)

    mlflow.tensorflow.log_model(best_model, artifact_path="model")

    fig, ax = plt.subplots()
    ax.plot(history.history['loss'], label='Training Loss')
    ax.plot(history.history['val_loss'], label='Validation Loss')
    ax.set_xlabel('Epochs')
    ax.set_ylabel('Loss')
    ax.set_title('Training History')
    ax.legend()
    ax.grid(True)
    plt.tight_layout()

    img_buf = io.BytesIO()
    plt.savefig(img_buf, format='png')
    img_buf.seek(0)
    plt.close(fig)

    with open("temp_plot.png", "wb") as f:
        f.write(img_buf.read())
    mlflow.log_artifact("temp_plot.png", artifact_path="plots")
    os.remove("temp_plot.png")  





### Prediction

In [None]:
import mlflow.tensorflow
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

model_uri = "runs:/6f50e2259eee4cfa8e30f3727a881c21/model" # запуск по run_id 
model_uri = "models:/DL_adaptive/latest" # запуск по имени регистр. модели
loaded_model = mlflow.tensorflow.load_model(model_uri)

X = train_df.drop(['height', 'width'], axis=1)

cat_features = X.select_dtypes(include='object').columns.tolist()
num_features = X.select_dtypes(exclude='object').columns.tolist()

single_row_raw = X.iloc[[0]] 

encoder = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), cat_features)
    ],
    remainder='passthrough'
)

encoder.fit(X)

X_single = encoder.transform(single_row_raw)
X_single = X_single.astype('float32')

prediction = loaded_model.predict(X_single)
print("🔮 Prediction:", prediction)


  latest = client.get_latest_versions(name, None if stage is None else [stage])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step
🔮 Prediction: [[68.59556 63.77798]]
