In [47]:
import keras_tuner as kt
import mlflow
import mlflow.tensorflow
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
import numpy as np
# Ensure TensorFlow 2.x is used
if not tf.__version__.startswith('2'):
    raise ImportError("This script requires TensorFlow 2.x")
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from dotenv import load_dotenv

load_dotenv()
# Load environment variables from .env file
dotenv.load_dotenv()


True

In [None]:


mlflow.set_tracking_uri("https://dagshub.com/MavinashR88/AutoTaxAI-End-to-End-Tax-Prediction-with-MLflow-DVC-and-Docker.mlflow")
mlflow.set_experiment("hyperparameter tuning")



2025/06/15 16:58:50 INFO mlflow.tracking.fluent: Experiment with name 'hyperparameter tuning' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/6927c95704e74f4c8e57d365320a06a9', creation_time=1750024730748, experiment_id='0', last_update_time=1750024730748, lifecycle_stage='active', name='hyperparameter tuning', tags={}>

In [30]:
# Create data
data = pd.read_csv("../data/raw/HousingData.csv")
data.dropna(inplace=True)

X = data.drop(columns="TAX").values
y = data['TAX'].values


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale input features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)


In [31]:
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Input(shape=(X_train.shape[1],)))

    for i in range(3):
        model.add(
            layers.Dense(64, activation=hp.Choice("activation", ["relu", "tanh"]))
        )
        model.add(
            layers.Dropout(hp.Choice("dropout", [0.1, 0.3, 0.5]))
        )

    model.add(layers.Dense(1, activation='linear'))  # Regression output

    model.compile(
        optimizer=hp.Choice("optimizer", ["adam", "rmsprop"]),
        loss="mean_squared_error",
        metrics=["mean_absolute_error"]
    )
    return model


In [32]:
tuner = kt.RandomSearch(
    build_model,
    objective="val_mean_absolute_error",
    max_trials=10,
    executions_per_trial=1,
    directory="tuner_logs",
    project_name="tax_model_param_tuning"
)

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

Reloading Tuner from tuner_logs/tax_model_param_tuning/tuner0.json


In [33]:
with mlflow.start_run(run_name="keras_tuner_search"):
    tuner.search(
        X_train, y_train,
        validation_split=0.2,
        epochs=100,
        batch_size=32,
        callbacks=[early_stop],
        verbose=1
    )


Trial 10 Complete [00h 00m 12s]
val_mean_absolute_error: 58.27195358276367

Best val_mean_absolute_error So Far: 37.59938430786133
Total elapsed time: 00h 30m 20s
🏃 View run keras_tuner_search at: https://dagshub.com/MavinashR88/AutoTaxAI-End-to-End-Tax-Prediction-with-MLflow-DVC-and-Docker.mlflow/#/experiments/0/runs/1c48408724164e0380f4ace8b0f35968
🧪 View experiment at: https://dagshub.com/MavinashR88/AutoTaxAI-End-to-End-Tax-Prediction-with-MLflow-DVC-and-Docker.mlflow/#/experiments/0


In [41]:
best_model = tuner.get_best_models(1)[0]
best_hp = tuner.get_best_hyperparameters(1)[0]

with mlflow.start_run(run_name="best_model_final", nested=True):
    mlflow.log_params(best_hp.values)
    loss, mae = best_model.evaluate(X_val, y_val, verbose=0)
    mlflow.log_metric("test_mae", mae)
    # mlflow.keras.log_model(best_model, artifact_path="model")



🏃 View run best_model_final at: https://dagshub.com/MavinashR88/AutoTaxAI-End-to-End-Tax-Prediction-with-MLflow-DVC-and-Docker.mlflow/#/experiments/0/runs/cc7e100314234d5ba98881ca72e756bc
🧪 View experiment at: https://dagshub.com/MavinashR88/AutoTaxAI-End-to-End-Tax-Prediction-with-MLflow-DVC-and-Docker.mlflow/#/experiments/0
