# testing the best model

In [1]:
import os 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_absolute_error,r2_score,mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from urllib.parse import urlparse
import mlflow 
import mlflow.sklearn
import numpy as np
from SurgeSense.constants import * 
from SurgeSense.utils.common import read_yaml, create_directories

In [2]:
%pwd 

'd:\\pythonProjects\\SurgeSense\\research'

In [3]:
import os 
os.chdir('../')
%pwd

'd:\\pythonProjects\\SurgeSense'

In [4]:
from dataclasses import dataclass

@dataclass(frozen=True)
class HyperOptParamsRandomForest:
    root_dir: Path
    train_data_path: Path 
    test_data_path: Path
    model_name: str
    n_estimators: list 
    max_depth: list 
    min_samples_split: int 
    target_column: str

In [5]:
# configuration 
class HyperOptParamsConfigManagerRandomForest:
    def __init__(
            self,
            config_file_path=CONFIG_FILE_PATH,
            params_file_path=PARAMS_FILE_PATH,
            schema_file_path=SCHEMA_FILE_PATH,
            ):
        self.config=read_yaml(config_file_path)
        self.params=read_yaml(params_file_path)
        self.schema=read_yaml(schema_file_path)
    
    def get_hyperopt_config(self)->HyperOptParamsRandomForest:
        config=self.config.model_trainer 
        params=self.params.Hyperopt_params.RANDOM_FOREST
        schema=self.schema.TARGET_COLUMN

        hypoeropt_config=HyperOptParamsRandomForest(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            n_estimators=params.n_estimators,
            max_depth=params.max_depth,
            min_samples_split=params.min_samples_split,
            target_column=schema.name
        )
        return hypoeropt_config

In [6]:
# components
from hyperopt import STATUS_OK, hp, fmin, tpe, Trials
import dagshub
from functools import partial 
import mlflow
import pandas as pd 
import os 
from SurgeSense import logger
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
import joblib
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import pandas as pd 
import numpy as np

class hyperOptTraining:
    def __init__(self,config=HyperOptParamsRandomForest):
          self.config=config

    def create_pipeline(self):
        
        categorical_columns=['cab_type','destination','source','name']
        numerical_columns=['distance','surge_multiplier','temp','clouds','pressure','rain','humidity','wind','day','hour','month']

        numerical_preprocessor=Pipeline(
            steps=[
                ('imputation_menu',SimpleImputer(missing_values=np.nan,strategy='median')),
                ('scalar',StandardScaler())
            ]
        )

        categorical_preprocessor=Pipeline(
            steps=[
                ('imputation_constant',SimpleImputer(strategy='most_frequent')),
                ('encode',OneHotEncoder(handle_unknown='ignore'))
            ]
        )

        preprocessor=ColumnTransformer(
            transformers=[
                ('categorical_columns',categorical_preprocessor,categorical_columns),
                ('numerical_columns',numerical_preprocessor,numerical_columns)
            ]
        )

        
        pipe=Pipeline(
            steps=[
                ('preprocessor',preprocessor),
                ('model', RandomForestRegressor())
            ]
        )
        

        return pipe
    def evaluation_metrics(self,actual,pred):
        rmse=np.sqrt(mean_squared_error(actual,pred))
        mae=mean_absolute_error(actual,pred)
        r2=r2_score(actual,pred)
        return rmse, mae, r2
    
    def objective(self,params,xtrain,ytrain,xtest,ytest):
        with mlflow.start_run():
            mlflow.set_tag('model','RandomForestRegressor')

            pipe=self.create_pipeline()
            model=pipe.set_params(**params)
            model.fit(xtrain,ytrain)
            ypred=model.predict(xtest)
            mlflow.log_params(model.get_params())
            rmse,mae,r2=self.evaluation_metrics(ytest,ypred)
            mlflow.log_metrics({'rmse':rmse,'mse': mae, 'r2':r2})
        return {'loss':rmse, 'status':STATUS_OK,'model':model}

    def train(self):
        train_data=pd.read_csv(self.config.train_data_path)
        test_data=pd.read_csv(self.config.test_data_path)

        xtrain=train_data.drop([self.config.target_column],axis=1)
        xtest=test_data.drop([self.config.target_column],axis=1)
        ytrain=train_data[[self.config.target_column]]
        ytest=test_data[[self.config.target_column]]

        search_space={
            'model__n_estimators':hp.uniformint('n_estimators',self.config.n_estimators[0],self.config.n_estimators[1]),
            'model__max_depth':hp.uniformint('max_depth',self.config.max_depth[0],self.config.max_depth[1]),
            'model__min_samples_split':hp.uniformint('min_samples_split',self.config.min_samples_split[0],self.config.min_samples_split[1])
        }
        dagshub.init(repo_owner='Immortal-Pi',repo_name='SurgeSense',mlflow=True)
        experiment_name='hyperopt_test_random_forest'
        existing_experiment=mlflow.get_experiment_by_name(experiment_name)

        if existing_experiment is None:
                experiment_id = mlflow.create_experiment(name=experiment_name,artifact_location='hyperopt-test')
        else:
            experiment_id = existing_experiment.experiment_id
        mlflow.set_experiment(experiment_id=experiment_id) 

        trials=Trials()
        best_results=fmin(
            fn=partial(
                self.objective,
                xtrain=xtrain[:1000],
                ytrain=ytrain[:1000],
                xtest=xtest[:1000],
                ytest=ytest[:1000]
            ),
            space=search_space,
            algo=tpe.suggest,
            max_evals=10, # change to config
            trials=trials
        )
        return best_results,trials

    def register_best_model(self,best_results,trials):
        best_index=np.argmin([trial['result']['loss'] for trial in trials.trials])
        best_model=trials.trials[best_index]['result']['model']

        with mlflow.start_run() as run:
            mlflow.sklearn.log_model(best_model,artifact_path='model')
            mlflow.log_params(trials.trials[best_index]['misc']['vals'])
            model_uri=f'runs:/{run.info.run_id}/best_model'
            mlflow.register_model(model_uri=model_uri,name='best_model')




In [7]:
# pipeline 
try: 
    config=HyperOptParamsConfigManagerRandomForest()
    hyperopt_config=config.get_hyperopt_config()
    hyperopt_config_training=hyperOptTraining(config=hyperopt_config)
    best_results,trails=hyperopt_config_training.train()
    hyperopt_config_training.register_best_model(best_results,trails)
except Exception as e:
    raise e 

[2025-03-24 12:03:32,647: INFO :common : yaml file: config\config.yaml loaded successfully]
[2025-03-24 12:03:32,654: INFO :common : yaml file: params.yaml loaded successfully]
[2025-03-24 12:03:32,660: INFO :common : yaml file: schema.yaml loaded successfully]
[2025-03-24 12:03:35,316: INFO :_client : HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"]


[2025-03-24 12:03:35,327: INFO :helpers : Accessing as Immortal-Pi]
[2025-03-24 12:03:35,514: INFO :_client : HTTP Request: GET https://dagshub.com/api/v1/repos/Immortal-Pi/SurgeSense "HTTP/1.1 200 OK"]
[2025-03-24 12:03:35,691: INFO :_client : HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"]


[2025-03-24 12:03:35,697: INFO :helpers : Initialized MLflow to track repo "Immortal-Pi/SurgeSense"]


[2025-03-24 12:03:35,699: INFO :helpers : Repository Immortal-Pi/SurgeSense initialized!]
  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?][2025-03-24 12:03:36,184: INFO :tpe : build_posterior_wrapper took 0.001355 seconds]
[2025-03-24 12:03:36,186: INFO :tpe : TPE using 0 trials]


  return fit_method(estimator, *args, **kwargs)



🏃 View run gifted-hog-537 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/138a84739e1f45588a82c991c1811bd2

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 10%|█         | 1/10 [00:19<02:59, 19.95s/trial, best loss: 2.9330167371480855][2025-03-24 12:03:56,143: INFO :tpe : build_posterior_wrapper took 0.004055 seconds]
[2025-03-24 12:03:56,143: INFO :tpe : TPE using 1/1 trials with best loss 2.933017]


  return fit_method(estimator, *args, **kwargs)



🏃 View run merciful-doe-240 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/917c9b2d4a204965b1db48d4ea41cc8c

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 20%|██        | 2/10 [00:38<02:35, 19.42s/trial, best loss: 2.9330167371480855][2025-03-24 12:04:15,177: INFO :tpe : build_posterior_wrapper took 0.001000 seconds]
[2025-03-24 12:04:15,178: INFO :tpe : TPE using 2/2 trials with best loss 2.933017]


  return fit_method(estimator, *args, **kwargs)



🏃 View run tasteful-penguin-784 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/9eae66fb91bd486baaa881d44644a05f

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 30%|███       | 3/10 [00:55<02:08, 18.30s/trial, best loss: 2.9330167371480855][2025-03-24 12:04:32,147: INFO :tpe : build_posterior_wrapper took 0.000635 seconds]
[2025-03-24 12:04:32,147: INFO :tpe : TPE using 3/3 trials with best loss 2.933017]


  return fit_method(estimator, *args, **kwargs)



🏃 View run sneaky-snake-490 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/561d1da7b7034c49abf65c8409024c29

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 40%|████      | 4/10 [01:15<01:53, 18.97s/trial, best loss: 2.921669329640159] [2025-03-24 12:04:52,149: INFO :tpe : build_posterior_wrapper took 0.001000 seconds]
[2025-03-24 12:04:52,151: INFO :tpe : TPE using 4/4 trials with best loss 2.921669]


  return fit_method(estimator, *args, **kwargs)



🏃 View run unique-worm-996 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/d683e303fb9544e890fdcabcf1613ca7

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 50%|█████     | 5/10 [01:33<01:33, 18.61s/trial, best loss: 2.896092302381945][2025-03-24 12:05:10,122: INFO :tpe : build_posterior_wrapper took 0.001000 seconds]
[2025-03-24 12:05:10,123: INFO :tpe : TPE using 5/5 trials with best loss 2.896092]


  return fit_method(estimator, *args, **kwargs)



🏃 View run defiant-fowl-843 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/ee421921b82b4873939b379a4a52663c

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 60%|██████    | 6/10 [01:50<01:12, 18.06s/trial, best loss: 2.896092302381945][2025-03-24 12:05:27,126: INFO :tpe : build_posterior_wrapper took 0.001024 seconds]
[2025-03-24 12:05:27,127: INFO :tpe : TPE using 6/6 trials with best loss 2.896092]


  return fit_method(estimator, *args, **kwargs)



🏃 View run unique-skink-398 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/36893c160ad24c03832913a128e7fa68

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 70%|███████   | 7/10 [02:05<00:51, 17.07s/trial, best loss: 2.896092302381945][2025-03-24 12:05:42,150: INFO :tpe : build_posterior_wrapper took 0.001021 seconds]
[2025-03-24 12:05:42,152: INFO :tpe : TPE using 7/7 trials with best loss 2.896092]


  return fit_method(estimator, *args, **kwargs)



🏃 View run flawless-snake-509 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/c4b31f90b6384d25bdd1483468402b1b

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 80%|████████  | 8/10 [02:22<00:34, 17.04s/trial, best loss: 2.896092302381945][2025-03-24 12:05:59,130: INFO :tpe : build_posterior_wrapper took 0.001918 seconds]
[2025-03-24 12:05:59,132: INFO :tpe : TPE using 8/8 trials with best loss 2.896092]


  return fit_method(estimator, *args, **kwargs)



🏃 View run adaptable-lark-388 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/3cbfa80d241d4b65a90400a6fbe17bb6

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 90%|█████████ | 9/10 [02:36<00:16, 16.10s/trial, best loss: 2.896092302381945][2025-03-24 12:06:13,163: INFO :tpe : build_posterior_wrapper took 0.000000 seconds]
[2025-03-24 12:06:13,164: INFO :tpe : TPE using 9/9 trials with best loss 2.896092]


  return fit_method(estimator, *args, **kwargs)



🏃 View run popular-gnat-64 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/9760c7f5d19848da91faf666e978231c

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

100%|██████████| 10/10 [02:44<00:00, 16.49s/trial, best loss: 2.896092302381945]


Registered model 'best_model' already exists. Creating a new version of this model...
2025/03/24 12:06:34 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: best_model, version 10
Created version '10' of model 'best_model'.


🏃 View run flawless-elk-326 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/a96db22954b346edbe95bcc832c843cd
🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5
