In [1]:
!pip install mlflow
!zenml integration install mlflow

^C


In [2]:
from zenml import pipeline, step
import mlflow
import mlflow.sklearn
from pydantic import BaseModel
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

class IngestDataParameters(BaseModel):
    dataset_name: str = "diabetes"

@step
def data_ingest(params: IngestDataParameters) -> pd.DataFrame:
    if params.dataset_name == "diabetes":
        data = load_diabetes()
        df = pd.DataFrame(data.data, columns=data.feature_names)
        df["target"] = data.target
    return df

@step
def data_cleaning(df: pd.DataFrame) -> pd.DataFrame:
    return df.dropna()

@step
def model_train(df: pd.DataFrame):
    mlflow.sklearn.autolog() 
    
    X = df.drop("target", axis=1)
    y = df["target"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    return model, X_test, y_test

@step
def model_evaluate(model, X_test: pd.DataFrame, y_test: pd.Series):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("r2", r2)
    
    print(f" MSE: {mse:.2f}")
    print(f" R² Score: {r2:.2f}")
    
    return mse, r2

@pipeline
def diabetes_pipeline(ingest_data_params: IngestDataParameters):
    df = data_ingest(params=ingest_data_params)
    cleaned_df = data_cleaning(df=df)
    model, X_test, y_test = model_train(df=cleaned_df)
    mse, r2 = model_evaluate(model=model, X_test=X_test, y_test=y_test)


if __name__ == "__main__":
    diabetes_pipeline(ingest_data_params=IngestDataParameters())
