In this notebook, you should implement a first version of a working machine learning model to predict the age of an Abalone.

A few guidelines:
- The model does not have to be complex. A simple linear regression model is enough.
- You should use MLflow to track your experiments. You can use the MLflow UI to compare your experiments.
- Do not push any MLflow data to the repository. Only the code to run the experiments is interesting and should be pushed.

In [None]:
# Client
import mlflow
from mlflow import MlflowClient
client = MlflowClient()

In [1]:
import os
import mlflow
import mlflow.sklearn
import pandas as pd
from mlflow import MlflowClient
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import accuracy_score



# Set the experiment
mlflow.set_experiment("abalone-model")

# Enable MLflow autologging
#mlflow.sklearn.autolog()

# Start an MLflow run
with mlflow.start_run() as run:

    run_id = run.info.run_id

    mlflow.set_tags({
        "model": "Lasso Regression",
        "version": "1.0",
        "author": " ",
        "description": "Lasso regression model training and logging example"
    })

    # Load the dataset
    file_path = '../data/abalone.csv'
    df = pd.read_csv(file_path)

    #preprocessing

    # Separate features and target variable
    X = df.drop(columns='Rings')
    y = df['Rings']

    # Parameters to be logged
    alpha = 0.01
    test_size = 0.2
    random_state = 50

    # Preprocess the 'Sex' feature using one-hot encoding, and scale numeric features
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), ['Length', 'Diameter', 'Height', 'Whole weight', 
                                       'Shucked weight', 'Viscera weight', 'Shell weight']),
            ('cat', OneHotEncoder(), ['Sex'])
        ]
    )

    # Create a pipeline with Lasso regression model
    model = Lasso(alpha=alpha)
    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])

    # Log parameters manually
    mlflow.log_param("alpha", alpha)
    mlflow.log_param("test_size", test_size)
    mlflow.log_param("random_state", random_state)

    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # Train the model
    pipeline.fit(X_train, y_train)

    # Predict on the test set
    y_pred = pipeline.predict(X_test)

    # Evaluate the model using Mean Squared Error
    rmse = root_mean_squared_error(y_test, y_pred)
    print(f'Root Mean Squared Error: {rmse:.2f}')

    # Log metrics manually
    mlflow.log_metric("rmse", rmse)

    # Log the model
    mlflow.sklearn.log_model(pipeline, "models")

    # Log an artifact (e.g., a text file with notes or additional information)
    artifact_dir = "artifact_dir"
    os.makedirs(artifact_dir, exist_ok=True)  # Create a directory if it doesn't exist
    
    artifact_path = os.path.join(artifact_dir, "notes.txt")
    with open(artifact_path, "w") as f:
        f.write(f"Run ID: {run_id}\n")
        f.write("Additional notes or insights can go here...\n")

    # Log the artifact
    mlflow.log_artifact(artifact_path)
    print(f"default artifacts URI: '{mlflow.get_artifact_uri()}'")

    # Optionally: print the run details
    print(f"Run ID: {run_id}")
    print(f"Model registered as: Lassomodel")
   

In [2]:
experiments = client.search_experiments()
experiments

In [3]:
result = mlflow.register_model(f"runs:/{run_id}/models", "lasso_model")

[<Experiment: artifact_location='file:///c:/Users/ZQili/Desktop/HEC/MLOps/xhec-mlops-project-student/notebooks/mlruns/0', creation_time=1729772862004, experiment_id='0', last_update_time=1729772862004, lifecycle_stage='active', name='Default', tags={}>]

In [8]:
# Set the experiment name
mlflow.set_experiment("abalone-model")

# Start a run
with mlflow.start_run() as run:
    run_id = run.info.run_id

    # Set tags for the run
    mlflow.set_tags({
        "model": "Lasso Regression",
        "version": "1.0",
        "author": "Your Name",  # Specify your name
        "description": "Lasso regression model training and logging example"
    })

    # Load the dataset
    file_path = '../data/abalone.csv'
    df = pd.read_csv(file_path)

    # Preprocessing
    # Separate features and target variable
    X = df.drop(columns='Rings')
    y = df['Rings']

    # Parameters to be logged
    alpha = 0.01
    test_size = 0.2
    random_state = 50

    # Preprocess the 'Sex' feature using one-hot encoding, and scale numeric features
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), ['Length', 'Diameter', 'Height', 'Whole weight',
                                        'Shucked weight', 'Viscera weight', 'Shell weight']),
            ('cat', OneHotEncoder(), ['Sex'])
        ]
    )

    # Create a pipeline with Lasso regression model
    model = Lasso(alpha=alpha)
    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])

    # Log parameters manually
    mlflow.log_param("alpha", alpha)
    mlflow.log_param("test_size", test_size)
    mlflow.log_param("random_state", random_state)

    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # Train the model
    pipeline.fit(X_train, y_train)

    # Predict on the test set
    y_pred = pipeline.predict(X_test)

    # Evaluate the model using Mean Squared Error
    rmse = root_mean_squared_error(y_test, y_pred)
    logging.info(f'Root Mean Squared Error: {rmse:.2f}')

    # Log metrics manually
    mlflow.log_metric("rmse", rmse)

    # Log the model
    mlflow.sklearn.log_model(pipeline, "models")

    # Register the model in MLflow model registry
    mlflow.register_model(
        f"runs:/{run_id}/models",  # Path to the model
        "LassoRegressionModel"      # Name of the model in the registry
    )

# Optionally: print the run details
print(f"Run ID: {run_id}")
print(f"Model registered as: LassoRegressionModel")




Run ID: 520c439409214c32beddc97d1555960f
Model registered as: LassoRegressionModel


Registered model 'LassoRegressionModel' already exists. Creating a new version of this model...
Created version '2' of model 'LassoRegressionModel'.


In [9]:
experiment= mlflow.search_experiments()
experiment

[<Experiment: artifact_location='file:///c:/Users/ZQili/Desktop/HEC/MLOps/xhec-mlops-project-student/notebooks/mlruns/988574509241110565', creation_time=1729772911754, experiment_id='988574509241110565', last_update_time=1729772911754, lifecycle_stage='active', name='abalone-model', tags={}>,
 <Experiment: artifact_location='file:///c:/Users/ZQili/Desktop/HEC/MLOps/xhec-mlops-project-student/notebooks/mlruns/0', creation_time=1729772862004, experiment_id='0', last_update_time=1729772862004, lifecycle_stage='active', name='Default', tags={}>]

In [10]:
!mlflow ui --host 0.0.0.0 --port 5002

^C


In [None]:
experiments = client.search_experiments()
experiments

In [None]:
!mlflow ui --host 0.0.0.0 --port 5003
