In [3]:
import zipfile
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from mlflow import MlflowClient, tracking
import mlflow
# Connect to the MLflow Tracking Server
client = MlflowClient(tracking_uri="http://localhost:8080")

# Define paths and read data
zip_file_path = r"D:\SirWaqas\forest+fires.zip"
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
    with zip_ref.open("forestfires.csv") as csv_file:
        df = pd.read_csv(csv_file)

# Preprocess data
df.month.replace(("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"), (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), inplace=True)
df.day.replace(("mon", "tue", "wed", "thu", "fri", "sat", "sun"), (1, 2, 3, 4, 5, 6, 7), inplace=True)
scale_columns = ["X", "Y", "month", "day", "FFMC", "DMC", "DC", "ISI", "temp", "RH", "wind", "rain"]
scaler = StandardScaler()
df[scale_columns] = scaler.fit_transform(df[scale_columns])

# Configure and build the model
X = df.drop("area", axis=1)
y = df["area"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=52)

# Start an MLflow experiment and log initial parameters
with mlflow.start_run():
    mlflow.log_param("n_features", X.shape[1])
    mlflow.log_param("epochs", 100)
    mlflow.log_param("batch_size", 13)

    model = Sequential([
        Dense(X.shape[1], input_dim=X.shape[1], activation="relu"),
        Dense(X.shape[1] // 2, activation="relu"),
        Dense(X.shape[1] // 2, activation="relu"),
        Dense(1)
    ])

    model.compile(optimizer="adam", loss="mean_squared_error")

    # Train the model and log metrics
    history = model.fit(
        X_train, y_train, validation_split=0.1, epochs=100, batch_size=13
    )
    mlflow.log_metric("train_loss", history.history["loss"][-1])
    mlflow.log_metric("val_loss", history.history["val_loss"][-1])

# Register the model and save metrics as artifacts
model.save("forest_fire_model.h5")
mlflow.log_artifact("forest_fire_model.h5", "mlflow_models")

# Evaluate the model and log additional metrics
mse_test = model.evaluate(X_test, y_test)
mlflow.log_metric("test_mse", mse_test)
y_pred = model.predict(X_test)
y_pred = y_pred.flatten()
r2 = r2_score(y_test, y_pred)
mlflow.log_metric("test_r2", r2)

# Load the model as a PyFunc model
loaded_model = mlflow.pyfunc.load_model("forest_fire_model.h5")

# Make predictions on a new DataFrame
new_data = pd.DataFrame({"X": 10, "Y": 20, "month": 5, "day": 2, "FFMC": 30, "DMC": 40, "DC": 50,
                         "ISI": 60, "temp": 70, "RH": 80, "wind": 90, "rain": 0})


Exception: Run with UUID b6e4584a48fd46d581867e8c8f27f3b1 is already active. To start a new run, first end the current run with mlflow.end_run(). To start a nested run, call start_run with nested=True