In [40]:
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
from sklearn.datasets import fetch_california_housing
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from urllib.parse import urlparse


In [41]:
housing=fetch_california_housing()
housing

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),
 'frame': None,
 'target_names': ['MedHouseVal'],
 'feature_names': ['MedInc',
  'HouseAge',
  'AveRooms',
  'AveBedrms',
  'Population',
  'AveOccup',
  'Latitude',
  'Longitude'],
 'DESCR': '.. _california_housing_dataset:\n

In [42]:
df=pd.DataFrame(housing.data, columns=housing.feature_names)
df['MedianV']=housing.target
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedianV
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [43]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
MedInc,20640.0,3.870671,1.899822,0.4999,2.5634,3.5348,4.74325,15.0001
HouseAge,20640.0,28.639486,12.585558,1.0,18.0,29.0,37.0,52.0
AveRooms,20640.0,5.429,2.474173,0.846154,4.440716,5.229129,6.052381,141.909091
AveBedrms,20640.0,1.096675,0.473911,0.333333,1.006079,1.04878,1.099526,34.066667
Population,20640.0,1425.476744,1132.462122,3.0,787.0,1166.0,1725.0,35682.0
AveOccup,20640.0,3.070655,10.38605,0.692308,2.429741,2.818116,3.282261,1243.333333
Latitude,20640.0,35.631861,2.135952,32.54,33.93,34.26,37.71,41.95
Longitude,20640.0,-119.569704,2.003532,-124.35,-121.8,-118.49,-118.01,-114.31
MedianV,20640.0,2.068558,1.153956,0.14999,1.196,1.797,2.64725,5.00001


In [44]:
df.corr()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedianV
MedInc,1.0,-0.119034,0.326895,-0.06204,0.004834,0.018766,-0.079809,-0.015176,0.688075
HouseAge,-0.119034,1.0,-0.153277,-0.077747,-0.296244,0.013191,0.011173,-0.108197,0.105623
AveRooms,0.326895,-0.153277,1.0,0.847621,-0.072213,-0.004852,0.106389,-0.02754,0.151948
AveBedrms,-0.06204,-0.077747,0.847621,1.0,-0.066197,-0.006181,0.069721,0.013344,-0.046701
Population,0.004834,-0.296244,-0.072213,-0.066197,1.0,0.069863,-0.108785,0.099773,-0.02465
AveOccup,0.018766,0.013191,-0.004852,-0.006181,0.069863,1.0,0.002366,0.002476,-0.023737
Latitude,-0.079809,0.011173,0.106389,0.069721,-0.108785,0.002366,1.0,-0.924664,-0.14416
Longitude,-0.015176,-0.108197,-0.02754,0.013344,0.099773,0.002476,-0.924664,1.0,-0.045967
MedianV,0.688075,0.105623,0.151948,-0.046701,-0.02465,-0.023737,-0.14416,-0.045967,1.0


In [45]:
y=df['MedianV']
X=df.drop(columns=['MedianV'],axis=1)

In [46]:
X.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [47]:
y.head()

0    4.526
1    3.585
2    3.521
3    3.413
4    3.422
Name: MedianV, dtype: float64

In [48]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,shuffle=True,random_state=1000)
print(f'The new dimension of the training set is {X_train.shape}')
print(f'The dimension of the Xtest is {X_test.shape}')

The new dimension of the training set is (16512, 8)
The dimension of the Xtest is (4128, 8)


In [49]:
#gridsearchcv

def hyperparameter(X_train,y_train,parameter_grid):
    rf=RandomForestRegressor()
    grd=GridSearchCV(estimator=rf,param_grid=parameter_grid,cv=5,n_jobs=-1,verbose=2,
                     scoring="neg_mean_squared_error")
    grd.fit(X_train,y_train)
    return grd
    

In [50]:
from mlflow.models import infer_signature

#log the experiment using mlflow

mlflow.set_experiment("House price prediction")

sign=infer_signature(X_train,y_train)

#hyperparameter grid

parameter_grid={
    'n_estimators':[10,100],
    'max_depth':[10,50],
    'min_samples_split':[2,5],
   'min_samples_leaf':[1,2],
}

## start mlflow experiment

with mlflow.start_run():
    #perform hyperparamter tuning 
    grd=hyperparameter(X_train,y_train,parameter_grid)
    
    #get the best model
    best_model=grd.best_estimator_
    
    ## evaluate the best model
    y_pred=best_model.predict(X_test)
    mse=mean_squared_error(y_test,y_pred)
    
    #log the parameters and metrics
    mlflow.log_param("best_n_estimator",grd.best_params_['n_estimators'])
    mlflow.log_param("best_max_depth",grd.best_params_['max_depth'])
    mlflow.log_param("best_min_samples_split",grd.best_params_['min_samples_split']) 
    mlflow.log_param("best_min_samples_leaf",grd.best_params_['min_samples_leaf'])
    mlflow.log_metric("mse",mse)
    
    
    #Tracking url
    mlflow.set_tracking_uri("http://127.0.0.1:5000")
    tracking_url_type_store=urlparse(mlflow.get_tracking_uri()).scheme
    
    if tracking_url_type_store!='file':
        mlflow.sklearn.log_model(best_model,"model",registered_model_name="Best rf model")
    else:
        mlflow.sklearn.log_model(best_model,"model",signature=sign)
        
    print(f"Best hyperparameters are: {grd.best_params_}")
    print(f'Mean squared error is :{mse}')
    

2025/01/18 18:56:24 INFO mlflow.tracking.fluent: Experiment with name 'House price prediction' does not exist. Creating a new experiment.


Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.8s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.8s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.8s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.8s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.9s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   0.9s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   0.9s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   0.9s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   0.9s
[CV] END max_depth=10, min_samples_leaf

Registered model 'Best rf model' already exists. Creating a new version of this model...
2025/01/18 18:57:24 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Best rf model, version 3
Created version '3' of model 'Best rf model'.
2025/01/18 18:57:24 INFO mlflow.tracking._tracking_service.client: 🏃 View run nimble-goose-111 at: http://127.0.0.1:5000/#/experiments/575762593548624050/runs/16a6f5d491c0485fa5e4e0742872c015.
2025/01/18 18:57:24 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/575762593548624050.


Best hyperparameters are: {'max_depth': 50, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
Mean squared error is :0.23080822485024216


In [51]:
from mlflow.models import validate_serving_input

model_uri = 'runs:/3c3326fe3ea34d4ba8b7adf9751aa91a/model'

# The logged model does not contain an input_example.
# Manually generate a serving payload to verify your model prior to deployment.
from mlflow.models import convert_input_example_to_serving_input

# Define INPUT_EXAMPLE via assignment with your own input example to the model
# A valid input example is a data instance suitable for pyfunc prediction
serving_payload = convert_input_example_to_serving_input(X_train)

# Validate the serving payload works on the model
validate_serving_input(model_uri, serving_payload)

array([1.86174  , 3.6527801, 1.74044  , ..., 2.1153202, 3.1442102,
       1.27972  ])

In [52]:
import mlflow
logged_model = 'runs:/3c3326fe3ea34d4ba8b7adf9751aa91a/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
y_pred=loaded_model.predict(pd.DataFrame(X_test))
# Create a DataFrame with results if needed
col = X_test.columns
results = pd.DataFrame(X_test, columns=col)
results['predictions'] =y_pred 
results['actual']=y_test

print(results)

        MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
11115   4.5000      23.0  4.835735   1.063401       912.0  2.628242     33.84   
17110  15.0001      40.0  8.584541   1.000000       577.0  2.787440     37.46   
14847   2.9318      26.0  5.406690   1.082746      2156.0  3.795775     32.68   
7263    2.1912      37.0  2.423077   1.128205       714.0  4.576923     33.99   
19411   5.7476      19.0  8.045356   1.172786      1412.0  3.049676     37.73   
...        ...       ...       ...        ...         ...       ...       ...   
9476    2.1625      18.0  5.488136   1.172881       904.0  3.064407     39.42   
16140   3.2396      52.0  4.021569   1.082353      1400.0  2.745098     37.78   
1187    2.6071      26.0  5.606695   1.066946       662.0  2.769874     39.44   
17543   2.8167      52.0  4.145985   1.017032      1336.0  3.250608     37.35   
11441   3.4679      18.0  3.848394   1.060241      2192.0  2.200803     33.73   

       Longitude  predictio

In [53]:
scaler=MinMaxScaler()


X_train_transformed=scaler.fit_transform(X_train)
X_test_transformed=scaler.transform(X_test)


In [54]:
import numpy as np
scaler_y = MinMaxScaler()
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

In [55]:
def hyperparameter(X_train,y_train,parameter_grid):
    rf=RandomForestRegressor()
    grd=GridSearchCV(estimator=rf,param_grid=parameter_grid,cv=5,n_jobs=-1,verbose=2,
                     scoring="neg_mean_squared_error")
    grd.fit(X_train,y_train)
    return grd

In [56]:
from mlflow.models import infer_signature

mlflow.set_experiment("House price prediction")

sign=infer_signature(X_train_transformed,y_train_scaled)

#hyperparameter grid

parameter_grid={
    'n_estimators':[10,100],
    'max_depth':[10,50],
    'min_samples_split':[2,5],
   'min_samples_leaf':[1,2],
}

## start mlflow experiment

with mlflow.start_run():
    #perform hyperparamter tuning 
    grd=hyperparameter(X_train_transformed,y_train_scaled,parameter_grid)
    
    #get the best model
    best_model=grd.best_estimator_
    
    ## evaluate the best model
    y_pred=best_model.predict(X_test_transformed)
    mse=mean_squared_error(y_test_scaled,y_pred)
    
    #log the parameters and metrics
    mlflow.log_param("best_n_estimator",grd.best_params_['n_estimators'])
    mlflow.log_param("best_max_depth",grd.best_params_['max_depth'])
    mlflow.log_param("best_min_samples_split",grd.best_params_['min_samples_split']) 
    mlflow.log_param("best_min_samples_leaf",grd.best_params_['min_samples_leaf'])
    mlflow.log_metric("mse",mse)
    
    
    #Tracking url
    mlflow.set_tracking_uri("http://127.0.0.1:5000")
    tracking_url_type_store=urlparse(mlflow.get_tracking_uri()).scheme
    
    if tracking_url_type_store!='file':
        mlflow.sklearn.log_model(best_model,"model",registered_model_name="Best rf model")
    else:
        mlflow.sklearn.log_model(best_model,"model",signature=sign)
        
    print(f"Best hyperparameters are: {grd.best_params_}")
    print(f'Mean squared error is :{mse}')
    

Fitting 5 folds for each of 16 candidates, totalling 80 fits


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.8s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.8s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.8s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.8s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   0.7s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   0.7s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   0.7s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   0.7s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   0.8s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   0.8s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   7.5s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   7.5s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   7.5s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   7.6s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   7.6s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   0.7s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   0.7s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   0.7s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   0.7s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   0.7s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   7.3s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   7.3s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   7.4s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   7.4s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   7.4s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=10; total time=   0.7s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=10; total time=   0.7s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=10; total time=   0.7s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=10; total time=   0.7s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=10; total time=   0.7s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   6.8s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   6.7s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   6.8s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   6.8s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   6.9s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   1.1s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   1.1s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   1.1s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   1.1s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   6.7s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   6.7s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=2, n_estimators=10; total time=   1.1s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   6.8s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   6.8s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   6.9s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   1.0s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   1.0s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   1.0s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   1.1s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=5, n_estimators=10; total time=   1.0s


  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=  10.9s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=  11.0s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=  11.0s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=  10.0s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=  10.0s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=  11.1s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=  11.1s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=  10.0s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=  10.1s
[CV] END max_depth=50, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=  10.1s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   0.9s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   1.0s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   1.0s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   1.0s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=2, n_estimators=10; total time=   0.9s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=5, n_estimators=10; total time=   0.9s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=5, n_estimators=10; total time=   1.0s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=5, n_estimators=10; total time=   0.9s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=5, n_estimators=10; total time=   0.9s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=5, n_estimators=10; total time=   0.9s


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=  10.2s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=  10.2s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=  10.2s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=  10.3s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=  10.4s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=  10.1s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=  10.0s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=  10.0s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=  10.1s
[CV] END max_depth=50, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total tim

  return fit_method(estimator, *args, **kwargs)
Registered model 'Best rf model' already exists. Creating a new version of this model...
2025/01/18 18:58:22 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Best rf model, version 4
Created version '4' of model 'Best rf model'.
2025/01/18 18:58:22 INFO mlflow.tracking._tracking_service.client: 🏃 View run tasteful-smelt-331 at: http://127.0.0.1:5000/#/experiments/575762593548624050/runs/0aecf02a4d014c12aee68666f4050c8a.
2025/01/18 18:58:22 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/575762593548624050.


Best hyperparameters are: {'max_depth': 50, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Mean squared error is :0.009895870015058403


In [57]:
from mlflow.models import validate_serving_input

model_uri = 'runs:/7b813df80c3f468cba0a7739405ce678/model'

# The logged model does not contain an input_example.
# Manually generate a serving payload to verify your model prior to deployment.
from mlflow.models import convert_input_example_to_serving_input

# Define INPUT_EXAMPLE via assignment with your own input example to the model
# A valid input example is a data instance suitable for pyfunc prediction
serving_payload = convert_input_example_to_serving_input(X_train_transformed)

# Validate the serving payload works on the model
validate_serving_input(model_uri, serving_payload)

array([0.35507284, 0.73619025, 0.32102076, ..., 0.40674877, 0.63083657,
       0.21960974])

In [58]:
import mlflow
logged_model = 'runs:/7b813df80c3f468cba0a7739405ce678/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
y_pred=loaded_model.predict(pd.DataFrame(X_test_transformed))


col = X_test.columns
results = pd.DataFrame(X_test_transformed, columns=col)
results['predictions'] =y_pred 
results['actual']=y_test

print(results)

        MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0     0.275865  0.431373  0.028282   0.027251    0.025395  0.001558  0.138151   
1     1.000000  0.764706  0.054858   0.024741    0.016005  0.001686  0.522848   
2     0.167715  0.490196  0.032330   0.028017    0.060265  0.002497  0.014878   
3     0.116640  0.705882  0.011179   0.029816    0.019845  0.003126  0.154091   
4     0.361905  0.352941  0.051035   0.031581    0.039410  0.001897  0.551541   
...        ...       ...       ...        ...         ...       ...       ...   
4123  0.114660  0.333333  0.032907   0.031585    0.025171  0.001909  0.731137   
4124  0.188942  1.000000  0.022511   0.028001    0.039074  0.001652  0.556854   
4125  0.145322  0.490196  0.033748   0.027391    0.018388  0.001672  0.733262   
4126  0.159777  1.000000  0.023393   0.025416    0.037280  0.002059  0.511158   
4127  0.204687  0.333333  0.021283   0.027126    0.061274  0.001214  0.126461   

      Longitude  prediction