In [0]:
# First we import the liberaries
from sklearn.datasets import fetch_california_housing
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import mlflow
import mlflow.sklearn
import numpy as np

In [0]:
# now we load data
housing = fetch_california_housing()
x = pd.DataFrame(housing.data, columns=housing.feature_names)
y = housing.target

In [0]:
# now we split the dataset 
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [0]:
# now we create the model
model = LinearRegression()

In [0]:
# now we run the mlflow
with mlflow.start_run():
    #now we train the model
    model.fit(x_train, y_train)
    #now we make pred
    y_pred = model.predict(x_test)
    #now we evalute
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    # now we log metric
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("mae", mae)
    mlflow.log_metric("r2_score", r2)
    #now we log the model
    mlflow.sklearn.log_model(model, "linear_regression_model")
    # now we print
    print(f"RMSE: {rmse:.3f}")
    print(f"R2: {r2:.3f}")

In [0]:
#now we train the model
model.fit(x_train, y_train)
#now we make pred
y_pred = model.predict(x_test)
#now we evaluate
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# now we display metrics
metrics_df = spark.createDataFrame([
    ("RMSE", rmse),
    ("R2", r2),
    ("MAE", mae),
    ("MSE", mse)
], ["Metric", "Value"])
display(metrics_df)