In [0]:
from sklearn.datasets import make_regression
import numpy as np
import pandas as pd

# Generate synthetic data
X, y = make_regression(
    n_samples=1000,
    n_features=5,
    n_informative=3,
    noise=0.1,
    random_state=42
)

# Create DataFrame with feature names
feature_names = [f"feature_{i+1}" for i in range(X.shape[1])]
df = pd.DataFrame(X, columns=feature_names)
df["target"] = y

# Split data (80% train, 20% test)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [0]:
from sklearn.linear_model import LinearRegression
import mlflow
from mlflow.models import infer_signature

# Enable autologging
mlflow.sklearn.autolog(log_models=True)

# Initialize MLflow run
with mlflow.start_run(run_name="linear_regression") as run:
    # Train model
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    
    # Log signature for input/output schema
    signature = infer_signature(X_train, lr.predict(X_train))
    
    # Log model with required artifacts
    mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="models",
        registered_model_name="LinearRegressionModel",
        signature=signature,
        input_example=X_train[:1]
    )


In [0]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge

# Define hyperparameter grid
param_grid = {
    "alpha": [0.1, 0.5, 1.0],
    "solver": ["svd", "cholesky"]
}

# Perform grid search
with mlflow.start_run(run_name="ridge_regression") as run:
    ridge = Ridge()
    grid_search = GridSearchCV(ridge, param_grid, cv=5)
    grid_search.fit(X_train, y_train)
    
    # Log best parameters and metrics
    mlflow.log_param("best_alpha", grid_search.best_params_["alpha"])
    mlflow.log_metric("best_score", grid_search.best_score_)
    
    # Log model
    mlflow.sklearn.log_model(
        sk_model=grid_search.best_estimator_,
        artifact_path="models",
        registered_model_name="RidgeRegressionModel",
        signature=signature
    )


In [0]:
from mlflow.tracking import MlflowClient

# Create an instance of the MlflowClient
client = MlflowClient()

# Fetch all registered models
registered_models = client.search_registered_models()
print(registered_models)

# Print model names and their latest versions
for model in registered_models:
    print(f'Model name: {model.name}')
    for version in model.latest_versions:
        print(f'  Version: {version.version}, Stage: {version.current_stage}')

In [0]:
from mlflow.pyfunc import load_model

# Load model using the catalog.schema.model_name format
model_name = "kevo_390014788141199.default.linearregressionmodel"
loaded_model = mlflow.pyfunc.load_model(f"models:/{model_name}@champion")

# Make predictions on test data
predictions = loaded_model.predict(X_test)
print("Predictions for first 5 samples:", predictions[:5])

In [0]:
# Get feature names and coefficients
feature_coefficients = {feature_names[i]: lr.coef_[i] for i in range(len(feature_names))}

# Log coefficients as metrics
for feature, coeff in feature_coefficients.items():
    mlflow.log_metric(f"coeff_{feature}", coeff)
mlflow.log_metric("intercept", lr.intercept_)


In [0]:
mlflow.sklearn.save_model(
    sk_model=lr,
    path="models/cloudpickle/",
    serialization_format=mlflow.sklearn.SERIALIZATION_FORMAT_CLOUDPICKLE
)