## 2- MLflow Models

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd
import pickle

# Code from KaggleHub to download dataset
import kagglehub
path = kagglehub.dataset_download("camnugent/california-housing-prices")
df = pd.read_csv(path + "/housing.csv")

import mlflow
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("california-housing-exp200")
with mlflow.start_run():
    mlflow.set_tag("developer", "Reza") 
    mlflow.log_param("data-path", path+"/housing.csv")
    params = { 'TestSize':0.3, 'ScalingNormalization':False}
    mlflow.log_params(params)

    # Training the model
    df.dropna(inplace=True)
    xtrain, xtest, ytrain, ytest = train_test_split(df.drop(columns=['median_house_value','ocean_proximity']), 
                                                    df['median_house_value'], test_size=0.3, random_state=42)
    model = LinearRegression()
    model.fit(xtrain, ytrain)
    pred = model.predict(xtest)
    mse = mean_squared_error(ytest, pred)
    mae = mean_absolute_error(ytest, pred)
    r2 = r2_score(ytest, pred)
    # with open("housing-model.pkl", "wb") as f:
    #         pickle.dump(model, f)

    mlflow.log_metric("mean_squared_error",  mse)
    mlflow.log_metric("mean_absolute_error",  mae)
    mlflow.log_metric("r2_score",  r2)
    # mlflow.log_artifact(local_path="housing-model.pkl", artifact_path="california-housing")

    mlflow.sklearn.log_model(model, name="housing_linear_regression_2")


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd
import pickle

# Code from KaggleHub to download dataset
import kagglehub
path = kagglehub.dataset_download("camnugent/california-housing-prices")
df = pd.read_csv(path + "/housing.csv")

import mlflow
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("california-housing-exp200")
with mlflow.start_run():
    mlflow.set_tag("developer", "Reza") 
    mlflow.log_param("data-path", path+"/housing.csv")
    params = { 'TestSize':0.4, 'ScalingNormalization':False}
    mlflow.log_params(params)

    # Training the model
    df.dropna(inplace=True)
    xtrain, xtest, ytrain, ytest = train_test_split(df.drop(columns=['median_house_value','ocean_proximity']), 
                                                    df['median_house_value'], test_size=0.4, random_state=42)
    model = LinearRegression()
    model.fit(xtrain, ytrain)
    pred = model.predict(xtest)
    mse = mean_squared_error(ytest, pred)
    mae = mean_absolute_error(ytest, pred)
    r2 = r2_score(ytest, pred)
    # with open("housing-model.pkl", "wb") as f:
    #         pickle.dump(model, f)

    mlflow.log_metric("mean_squared_error",  mse)
    mlflow.log_metric("mean_absolute_error",  mae)
    mlflow.log_metric("r2_score",  r2)
    # mlflow.log_artifact(local_path="housing-model.pkl", artifact_path="california-housing")

    mlflow.sklearn.log_model(model, name="housing_linear_regression_2")


In [None]:
modelid = "m-907c777a901545e482232ca3504aa045"
model_uri = 'models:/{}'.format(modelid)
Load_model = mlflow.pyfunc.load_model(model_uri)

pred = Load_model.predict(xtest)
mse = mean_squared_error(ytest, pred)
print("Mean Squared Error:", mse)