In [2]:
import mlflow
import mlflow.sklearn
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Enable autologging
mlflow.sklearn.autolog()

# Load dataset
data = fetch_california_housing()
X = data.data
y = data.target

# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training and logging
with mlflow.start_run():
    mlflow.log_param("train-data-path", "example_train_data")
    mlflow.log_param("valid-data-path", "example_valid_data")
    
    model = RandomForestRegressor()
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred, squared=False)
    mlflow.log_metric("rmse", rmse)
    
    mlflow.sklearn.log_model(model, "model")

print(f"Logged run with RMSE: {rmse}")




Logged run with RMSE: 0.5046990342705129
