# testing the best model

In [2]:
import os 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_absolute_error,r2_score,mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from urllib.parse import urlparse
import mlflow 
import mlflow.sklearn
import numpy as np
from SurgeSense.constants import * 
from SurgeSense.utils.common import read_yaml, create_directories

In [3]:
%pwd 

'd:\\pythonProjects\\SurgeSense\\research'

In [4]:
import os 
os.chdir('../')
%pwd

'd:\\pythonProjects\\SurgeSense'

In [5]:
def evaluation_metrics(actual,pred):
    rmse=np.sqrt(mean_squared_error(actual,pred))
    mae=mean_absolute_error(actual,pred)
    r2=r2_score(actual,pred)
    return rmse, mae, r2 

In [6]:
config=read_yaml(CONFIG_FILE_PATH)
params=read_yaml(PARAMS_FILE_PATH)


[2025-03-23 21:51:30,273: INFO :common : yaml file: config\config.yaml loaded successfully]
[2025-03-23 21:51:30,278: INFO :common : yaml file: params.yaml loaded successfully]


In [7]:
from dataclasses import dataclass

@dataclass(frozen=True)
class HyperOptParams:
    root_dir: Path
    train_data_path: Path 
    test_data_path: Path
    model_name: str
    n_estimators: list 
    max_depth: list 
    learning_rate: str 
    target_column: str

In [8]:
# configuration 
class HyperOptParamsConfigManager:
    def __init__(
            self,
            config_file_path=CONFIG_FILE_PATH,
            params_file_path=PARAMS_FILE_PATH,
            schema_file_path=SCHEMA_FILE_PATH,
            ):
        self.config=read_yaml(config_file_path)
        self.params=read_yaml(params_file_path)
        self.schema=read_yaml(schema_file_path)
    
    def get_hyperopt_config(self)->HyperOptParams:
        config=self.config.model_trainer 
        params=self.params.Hyperopt_params.GRADIENT_BOOSTING
        schema=self.schema.TARGET_COLUMN

        hypoeropt_config=HyperOptParams(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            n_estimators=params.n_estimators,
            max_depth=params.max_depth,
            learning_rate=params.learning_rate,
            target_column=schema.name
        )
        return hypoeropt_config

In [13]:
# components
from hyperopt import STATUS_OK, hp, fmin, tpe, Trials
import dagshub
from functools import partial 
import mlflow
import pandas as pd 
import os 
from SurgeSense import logger
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
import joblib
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import pandas as pd 
import numpy as np

class hyperOptTraining:
    def __init__(self,config=HyperOptParams):
          self.config=config

    def create_pipeline(self):
        
        categorical_columns=['cab_type','destination','source','name']
        numerical_columns=['distance','surge_multiplier','temp','clouds','pressure','rain','humidity','wind','day','hour','month']

        numerical_preprocessor=Pipeline(
            steps=[
                ('imputation_menu',SimpleImputer(missing_values=np.nan,strategy='median')),
                ('scalar',StandardScaler())
            ]
        )

        categorical_preprocessor=Pipeline(
            steps=[
                ('imputation_constant',SimpleImputer(strategy='most_frequent')),
                ('encode',OneHotEncoder(handle_unknown='ignore'))
            ]
        )

        preprocessor=ColumnTransformer(
            transformers=[
                ('categorical_columns',categorical_preprocessor,categorical_columns),
                ('numerical_columns',numerical_preprocessor,numerical_columns)
            ]
        )

        
        pipe=Pipeline(
            steps=[
                ('preprocessor',preprocessor),
                ('model', GradientBoostingRegressor())
            ]
        )
        

        return pipe

    def objective(self,params,xtrain,ytrain,xtest,ytest):
        with mlflow.start_run():
            mlflow.set_tag('model','XGBoostRegressor')

            pipe=self.create_pipeline()
            model=pipe.set_params(**params)
            model.fit(xtrain,ytrain)
            ypred=model.predict(xtest)
            mlflow.log_params(model.get_params())
            rmse,mae,r2=evaluation_metrics(ytest,ypred)
            mlflow.log_metrics({'rmse':rmse,'mse': mae, 'r2':r2})
        return {'loss':rmse, 'status':STATUS_OK}

    def train(self):
        train_data=pd.read_csv(self.config.train_data_path)
        test_data=pd.read_csv(self.config.test_data_path)

        xtrain=train_data.drop([self.config.target_column],axis=1)
        xtest=test_data.drop([self.config.target_column],axis=1)
        ytrain=train_data[[self.config.target_column]]
        ytest=test_data[[self.config.target_column]]

        search_space={
            'model__n_estimators':hp.uniformint('n_estimators',self.config.n_estimators[0],self.config.n_estimators[1]),
            'model__max_depth':hp.uniformint('max_depth',self.config.max_depth[0],self.config.max_depth[1]),
            'model__learning_rate':hp.uniform('learning_rate',self.config.learning_rate[0],self.config.learning_rate[1])
        }
        dagshub.init(repo_owner='Immortal-Pi',repo_name='SurgeSense',mlflow=True)
        experiment_name='hyperopt_test_gradient_boosting'
        existing_experiment=mlflow.get_experiment_by_name(experiment_name)

        if existing_experiment is None:
                experiment_id = mlflow.create_experiment(name=experiment_name,artifact_location='hyperopt-test')
        else:
            experiment_id = existing_experiment.experiment_id
        mlflow.set_experiment(experiment_id=experiment_id) 

        trials=Trials()
        best_results=fmin(
            fn=partial(
                self.objective,
                xtrain=xtrain[:5000],
                ytrain=ytrain[:5000],
                xtest=xtest[:5000],
                ytest=ytest[:5000]
            ),
            space=search_space,
            algo=tpe.suggest,
            max_evals=10, # change to config
            trials=trials
        )
        return best_results,trials

    def register_best_model(self,best_results,trials):
        best_index=np.argmin([trial['result']['loss'] for trial in trials.trials])
        best_model=trials.trials[best_index]['result']['loss']

        with mlflow.start_run() as run:
            mlflow.sklearn.log_model(sk_model=best_model,artifact_path='best_model')
            mlflow.log_params(trials.trials[best_index]['mics']['vals'])
            model_uri=f'runs:{run.info.run_id}/best_model'
            mlflow.register_model(model_uri=model_uri,name='best_model')




In [14]:
# pipeline 
try: 
    config=HyperOptParamsConfigManager()
    hyperopt_config=config.get_hyperopt_config()
    hyperopt_config_training=hyperOptTraining(config=hyperopt_config)
    best_results,trails=hyperopt_config_training.train()
    #hyperopt_config_training.register_best_model(best_results,trails)
except Exception as e:
    raise e 

[2025-03-23 22:09:06,411: INFO :common : yaml file: config\config.yaml loaded successfully]
[2025-03-23 22:09:06,416: INFO :common : yaml file: params.yaml loaded successfully]
[2025-03-23 22:09:06,420: INFO :common : yaml file: schema.yaml loaded successfully]
[2025-03-23 22:09:09,297: INFO :_client : HTTP Request: GET https://dagshub.com/api/v1/repos/Immortal-Pi/SurgeSense "HTTP/1.1 200 OK"]


[2025-03-23 22:09:09,301: INFO :helpers : Initialized MLflow to track repo "Immortal-Pi/SurgeSense"]


[2025-03-23 22:09:09,305: INFO :helpers : Repository Immortal-Pi/SurgeSense initialized!]
  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?][2025-03-23 22:09:09,883: INFO :tpe : build_posterior_wrapper took 0.002952 seconds]
[2025-03-23 22:09:09,884: INFO :tpe : TPE using 0 trials]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run bold-ox-780 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/f12651ebb0ff4353b55a2c42f2bd4f23

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 10%|█         | 1/10 [00:06<00:55,  6.14s/trial, best loss: 2.0659955427422556][2025-03-23 22:09:16,023: INFO :tpe : build_posterior_wrapper took 0.002006 seconds]
[2025-03-23 22:09:16,025: INFO :tpe : TPE using 1/1 trials with best loss 2.065996]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run calm-shrew-435 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/96068d0f03814f489aff02ab4e5ff119

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 20%|██        | 2/10 [00:12<00:47,  5.98s/trial, best loss: 2.0659955427422556][2025-03-23 22:09:21,882: INFO :tpe : build_posterior_wrapper took 0.000000 seconds]
[2025-03-23 22:09:21,883: INFO :tpe : TPE using 2/2 trials with best loss 2.065996]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run bright-dove-944 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/bff61461c2ae4931af4476bda9dd0b4d

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 30%|███       | 3/10 [00:28<01:14, 10.62s/trial, best loss: 2.0659955427422556][2025-03-23 22:09:38,035: INFO :tpe : build_posterior_wrapper took 0.001283 seconds]
[2025-03-23 22:09:38,036: INFO :tpe : TPE using 3/3 trials with best loss 2.065996]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run redolent-whale-11 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/cbd1c5a0816842b99a65ec02c72dc39f

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 40%|████      | 4/10 [00:43<01:15, 12.64s/trial, best loss: 2.0659955427422556][2025-03-23 22:09:53,755: INFO :tpe : build_posterior_wrapper took 0.000998 seconds]
[2025-03-23 22:09:53,756: INFO :tpe : TPE using 4/4 trials with best loss 2.065996]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run receptive-shrew-262 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/05fb52c73dd144a4bf20c4fa563c89ab

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 50%|█████     | 5/10 [01:07<01:22, 16.42s/trial, best loss: 2.0659955427422556][2025-03-23 22:10:16,894: INFO :tpe : build_posterior_wrapper took 0.003995 seconds]
[2025-03-23 22:10:16,895: INFO :tpe : TPE using 5/5 trials with best loss 2.065996]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run carefree-doe-927 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/4eacd71f43394c57aa2b3dfd351fdd52

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 60%|██████    | 6/10 [01:13<00:51, 13.00s/trial, best loss: 2.0659955427422556][2025-03-23 22:10:23,248: INFO :tpe : build_posterior_wrapper took 0.000993 seconds]
[2025-03-23 22:10:23,250: INFO :tpe : TPE using 6/6 trials with best loss 2.065996]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run debonair-flea-388 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/ccc87e71e75c4a4f9b5d928708a8bb4d

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 70%|███████   | 7/10 [01:17<00:29,  9.97s/trial, best loss: 2.0659955427422556][2025-03-23 22:10:26,977: INFO :tpe : build_posterior_wrapper took 0.000930 seconds]
[2025-03-23 22:10:26,979: INFO :tpe : TPE using 7/7 trials with best loss 2.065996]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run blushing-ape-138 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/1bbae7ad89f04469bf0688ddcef981a4

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 80%|████████  | 8/10 [01:26<00:19,  9.82s/trial, best loss: 2.0659955427422556][2025-03-23 22:10:36,472: INFO :tpe : build_posterior_wrapper took 0.001014 seconds]
[2025-03-23 22:10:36,473: INFO :tpe : TPE using 8/8 trials with best loss 2.065996]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run beautiful-toad-512 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/af20f2246b4a452c9292ad16ae990c24

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 90%|█████████ | 9/10 [01:36<00:09,  9.78s/trial, best loss: 2.0659955427422556][2025-03-23 22:10:46,155: INFO :tpe : build_posterior_wrapper took 0.001996 seconds]
[2025-03-23 22:10:46,159: INFO :tpe : TPE using 9/9 trials with best loss 2.065996]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run fearless-trout-172 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/d5ecd833ec834c32bb52021ac09e704b

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

100%|██████████| 10/10 [01:51<00:00, 11.17s/trial, best loss: 2.0659955427422556]


In [None]:
data=read_yaml(PARAMS_FILE_PATH)
data.Hyperopt_params.XGBoostRegressor.n_estimators[0]

[2025-03-23 18:15:35,418: INFO :common : yaml file: params.yaml loaded successfully]


10

In [None]:
hyperopt_config_training.register_best_model(best_results,trails)

NameError: name 'hyperopt_config_training' is not defined