# testing the best model

In [1]:
import os 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_absolute_error,r2_score,mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from urllib.parse import urlparse
import mlflow 
import mlflow.sklearn
import numpy as np
from SurgeSense.constants import * 
from SurgeSense.utils.common import read_yaml, create_directories

In [2]:
%pwd 

'd:\\pythonProjects\\SurgeSense\\research'

In [3]:
import os 
os.chdir('../')
%pwd

'd:\\pythonProjects\\SurgeSense'

In [4]:
def evaluation_metrics(actual,pred):
    rmse=np.sqrt(mean_squared_error(actual,pred))
    mae=mean_absolute_error(actual,pred)
    r2=r2_score(actual,pred)
    return rmse, mae, r2 

In [5]:
config=read_yaml(CONFIG_FILE_PATH)
params=read_yaml(PARAMS_FILE_PATH)


[2025-03-23 22:17:02,028: INFO :common : yaml file: config\config.yaml loaded successfully]
[2025-03-23 22:17:02,040: INFO :common : yaml file: params.yaml loaded successfully]


In [6]:
from dataclasses import dataclass

@dataclass(frozen=True)
class HyperOptParams:
    root_dir: Path
    train_data_path: Path 
    test_data_path: Path
    model_name: str
    n_estimators: list 
    max_depth: list 
    min_samples_split: int 
    target_column: str

In [7]:
# configuration 
class HyperOptParamsConfigManager:
    def __init__(
            self,
            config_file_path=CONFIG_FILE_PATH,
            params_file_path=PARAMS_FILE_PATH,
            schema_file_path=SCHEMA_FILE_PATH,
            ):
        self.config=read_yaml(config_file_path)
        self.params=read_yaml(params_file_path)
        self.schema=read_yaml(schema_file_path)
    
    def get_hyperopt_config(self)->HyperOptParams:
        config=self.config.model_trainer 
        params=self.params.Hyperopt_params.RANDOM_FOREST
        schema=self.schema.TARGET_COLUMN

        hypoeropt_config=HyperOptParams(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            n_estimators=params.n_estimators,
            max_depth=params.max_depth,
            min_samples_split=params.min_samples_split,
            target_column=schema.name
        )
        return hypoeropt_config

In [12]:
# components
from hyperopt import STATUS_OK, hp, fmin, tpe, Trials
import dagshub
from functools import partial 
import mlflow
import pandas as pd 
import os 
from SurgeSense import logger
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
import joblib
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import pandas as pd 
import numpy as np

class hyperOptTraining:
    def __init__(self,config=HyperOptParams):
          self.config=config

    def create_pipeline(self):
        
        categorical_columns=['cab_type','destination','source','name']
        numerical_columns=['distance','surge_multiplier','temp','clouds','pressure','rain','humidity','wind','day','hour','month']

        numerical_preprocessor=Pipeline(
            steps=[
                ('imputation_menu',SimpleImputer(missing_values=np.nan,strategy='median')),
                ('scalar',StandardScaler())
            ]
        )

        categorical_preprocessor=Pipeline(
            steps=[
                ('imputation_constant',SimpleImputer(strategy='most_frequent')),
                ('encode',OneHotEncoder(handle_unknown='ignore'))
            ]
        )

        preprocessor=ColumnTransformer(
            transformers=[
                ('categorical_columns',categorical_preprocessor,categorical_columns),
                ('numerical_columns',numerical_preprocessor,numerical_columns)
            ]
        )

        
        pipe=Pipeline(
            steps=[
                ('preprocessor',preprocessor),
                ('model', RandomForestRegressor())
            ]
        )
        

        return pipe

    def objective(self,params,xtrain,ytrain,xtest,ytest):
        with mlflow.start_run():
            mlflow.set_tag('model','RandomForestRegressor')

            pipe=self.create_pipeline()
            model=pipe.set_params(**params)
            model.fit(xtrain,ytrain)
            ypred=model.predict(xtest)
            mlflow.log_params(model.get_params())
            rmse,mae,r2=evaluation_metrics(ytest,ypred)
            mlflow.log_metrics({'rmse':rmse,'mse': mae, 'r2':r2})
        return {'loss':rmse, 'status':STATUS_OK}

    def train(self):
        train_data=pd.read_csv(self.config.train_data_path)
        test_data=pd.read_csv(self.config.test_data_path)

        xtrain=train_data.drop([self.config.target_column],axis=1)
        xtest=test_data.drop([self.config.target_column],axis=1)
        ytrain=train_data[[self.config.target_column]]
        ytest=test_data[[self.config.target_column]]

        search_space={
            'model__n_estimators':hp.uniformint('n_estimators',self.config.n_estimators[0],self.config.n_estimators[1]),
            'model__max_depth':hp.uniformint('max_depth',self.config.max_depth[0],self.config.max_depth[1]),
            'model__min_samples_split':hp.uniformint('min_samples_split',self.config.min_samples_split[0],self.config.min_samples_split[1])
        }
        dagshub.init(repo_owner='Immortal-Pi',repo_name='SurgeSense',mlflow=True)
        experiment_name='hyperopt_test_random_forest'
        existing_experiment=mlflow.get_experiment_by_name(experiment_name)

        if existing_experiment is None:
                experiment_id = mlflow.create_experiment(name=experiment_name,artifact_location='hyperopt-test')
        else:
            experiment_id = existing_experiment.experiment_id
        mlflow.set_experiment(experiment_id=experiment_id) 

        trials=Trials()
        best_results=fmin(
            fn=partial(
                self.objective,
                xtrain=xtrain[:5000],
                ytrain=ytrain[:5000],
                xtest=xtest[:5000],
                ytest=ytest[:5000]
            ),
            space=search_space,
            algo=tpe.suggest,
            max_evals=10, # change to config
            trials=trials
        )
        return best_results,trials

    def register_best_model(self,best_results,trials):
        best_index=np.argmin([trial['result']['loss'] for trial in trials.trials])
        best_model=trials.trials[best_index]['result']['loss']

        with mlflow.start_run() as run:
            mlflow.sklearn.log_model(sk_model=best_model,artifact_path='best_model')
            mlflow.log_params(trials.trials[best_index]['mics']['vals'])
            model_uri=f'runs:{run.info.run_id}/best_model'
            mlflow.register_model(model_uri=model_uri,name='best_model')




In [13]:
# pipeline 
try: 
    config=HyperOptParamsConfigManager()
    hyperopt_config=config.get_hyperopt_config()
    hyperopt_config_training=hyperOptTraining(config=hyperopt_config)
    best_results,trails=hyperopt_config_training.train()
    #hyperopt_config_training.register_best_model(best_results,trails)
except Exception as e:
    raise e 

[2025-03-23 22:17:55,504: INFO :common : yaml file: config\config.yaml loaded successfully]
[2025-03-23 22:17:55,508: INFO :common : yaml file: params.yaml loaded successfully]
[2025-03-23 22:17:55,513: INFO :common : yaml file: schema.yaml loaded successfully]
[2025-03-23 22:17:58,117: INFO :_client : HTTP Request: GET https://dagshub.com/api/v1/repos/Immortal-Pi/SurgeSense "HTTP/1.1 200 OK"]


[2025-03-23 22:17:58,122: INFO :helpers : Initialized MLflow to track repo "Immortal-Pi/SurgeSense"]


[2025-03-23 22:17:58,125: INFO :helpers : Repository Immortal-Pi/SurgeSense initialized!]
  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?][2025-03-23 22:17:58,492: INFO :tpe : build_posterior_wrapper took 0.001077 seconds]
[2025-03-23 22:17:58,493: INFO :tpe : TPE using 0 trials]


  return fit_method(estimator, *args, **kwargs)



🏃 View run redolent-mole-354 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/ff83142c7b52438c90e84a75b543c6fe

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 10%|█         | 1/10 [00:10<01:33, 10.39s/trial, best loss: 2.4645627119971594][2025-03-23 22:18:08,880: INFO :tpe : build_posterior_wrapper took 0.001998 seconds]
[2025-03-23 22:18:08,882: INFO :tpe : TPE using 1/1 trials with best loss 2.464563]


  return fit_method(estimator, *args, **kwargs)



🏃 View run traveling-ray-429 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/012a52ac3da94d1691a88cd1554c68a9

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 20%|██        | 2/10 [00:17<01:06,  8.37s/trial, best loss: 2.4645627119971594][2025-03-23 22:18:15,829: INFO :tpe : build_posterior_wrapper took 0.000999 seconds]
[2025-03-23 22:18:15,830: INFO :tpe : TPE using 2/2 trials with best loss 2.464563]


  return fit_method(estimator, *args, **kwargs)



🏃 View run learned-tern-299 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/df073c9656214a76af876521b6e475f5

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 30%|███       | 3/10 [00:31<01:17, 11.03s/trial, best loss: 2.1272060699458435][2025-03-23 22:18:30,038: INFO :tpe : build_posterior_wrapper took 0.001003 seconds]
[2025-03-23 22:18:30,039: INFO :tpe : TPE using 3/3 trials with best loss 2.127206]


  return fit_method(estimator, *args, **kwargs)



🏃 View run tasteful-quail-116 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/69c5469a06234fea8e8e1a702f99e91b

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 40%|████      | 4/10 [00:39<00:59,  9.84s/trial, best loss: 2.1272060699458435][2025-03-23 22:18:38,046: INFO :tpe : build_posterior_wrapper took 0.001007 seconds]
[2025-03-23 22:18:38,047: INFO :tpe : TPE using 4/4 trials with best loss 2.127206]


  return fit_method(estimator, *args, **kwargs)



🏃 View run salty-zebra-403 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/d08e5e9d28cf45a4a29ee9578b0bce00

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 50%|█████     | 5/10 [00:43<00:37,  7.60s/trial, best loss: 2.1272060699458435][2025-03-23 22:18:41,663: INFO :tpe : build_posterior_wrapper took 0.000999 seconds]
[2025-03-23 22:18:41,664: INFO :tpe : TPE using 5/5 trials with best loss 2.127206]


  return fit_method(estimator, *args, **kwargs)



🏃 View run languid-colt-365 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/69d7633d84a547e3af9a26e5d4e254da

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 60%|██████    | 6/10 [00:52<00:32,  8.14s/trial, best loss: 2.1272060699458435][2025-03-23 22:18:50,875: INFO :tpe : build_posterior_wrapper took 0.001227 seconds]
[2025-03-23 22:18:50,876: INFO :tpe : TPE using 6/6 trials with best loss 2.127206]


  return fit_method(estimator, *args, **kwargs)



🏃 View run marvelous-fish-701 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/7ee8da3b42c44917a2da71098c0c31b0

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 70%|███████   | 7/10 [00:59<00:23,  7.68s/trial, best loss: 2.1272060699458435][2025-03-23 22:18:57,587: INFO :tpe : build_posterior_wrapper took 0.000906 seconds]
[2025-03-23 22:18:57,587: INFO :tpe : TPE using 7/7 trials with best loss 2.127206]


  return fit_method(estimator, *args, **kwargs)



🏃 View run overjoyed-fish-903 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/08a31fa7491b4cbcac084553e36a5d9d

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 80%|████████  | 8/10 [01:06<00:15,  7.52s/trial, best loss: 2.1272060699458435][2025-03-23 22:19:04,764: INFO :tpe : build_posterior_wrapper took 0.000000 seconds]
[2025-03-23 22:19:04,765: INFO :tpe : TPE using 8/8 trials with best loss 2.127206]


  return fit_method(estimator, *args, **kwargs)



🏃 View run amazing-penguin-340 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/e767edea042748d8aebd7d36e33ac8be

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

 90%|█████████ | 9/10 [01:10<00:06,  6.61s/trial, best loss: 2.1272060699458435][2025-03-23 22:19:09,388: INFO :tpe : build_posterior_wrapper took 0.000000 seconds]
[2025-03-23 22:19:09,390: INFO :tpe : TPE using 9/9 trials with best loss 2.127206]


  return fit_method(estimator, *args, **kwargs)



🏃 View run fun-robin-730 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5/runs/52b9482854d844d3a3fb49441672d3e0

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/5

100%|██████████| 10/10 [01:14<00:00,  7.50s/trial, best loss: 2.1272060699458435]


In [None]:
data=read_yaml(PARAMS_FILE_PATH)
data.Hyperopt_params.XGBoostRegressor.n_estimators[0]

In [None]:
hyperopt_config_training.register_best_model(best_results,trails)