# testing the best model

In [1]:
import os 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_absolute_error,r2_score,mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from urllib.parse import urlparse
import mlflow 
import mlflow.sklearn
import numpy as np
from SurgeSense.constants import * 
from SurgeSense.utils.common import read_yaml, create_directories

In [2]:
%pwd 

'd:\\pythonProjects\\SurgeSense\\research'

In [3]:
import os 
os.chdir('../')
%pwd

'd:\\pythonProjects\\SurgeSense'

In [4]:
from dataclasses import dataclass

@dataclass(frozen=True)
class HyperOptParamsGradientBoosting:
    root_dir: Path
    train_data_path: Path 
    test_data_path: Path
    model_name: str
    n_estimators: list 
    max_depth: list 
    learning_rate: str 
    target_column: str

In [5]:
# configuration 
class HyperOptParamsConfigManagerGradientBoosting:
    def __init__(
            self,
            config_file_path=CONFIG_FILE_PATH,
            params_file_path=PARAMS_FILE_PATH,
            schema_file_path=SCHEMA_FILE_PATH,
            ):
        self.config=read_yaml(config_file_path)
        self.params=read_yaml(params_file_path)
        self.schema=read_yaml(schema_file_path)
    
    def get_hyperopt_config(self)->HyperOptParamsGradientBoosting:
        config=self.config.model_trainer 
        params=self.params.Hyperopt_params.GRADIENT_BOOSTING
        schema=self.schema.TARGET_COLUMN

        hypoeropt_config=HyperOptParamsGradientBoosting(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            n_estimators=params.n_estimators,
            max_depth=params.max_depth,
            learning_rate=params.learning_rate,
            target_column=schema.name
        )
        return hypoeropt_config

In [6]:
# components
from hyperopt import STATUS_OK, hp, fmin, tpe, Trials
import dagshub
from functools import partial 
import mlflow
import pandas as pd 
import os 
from SurgeSense import logger
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
import joblib
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import pandas as pd 
import numpy as np

class hyperOptTraining:
    def __init__(self,config=HyperOptParamsGradientBoosting):
          self.config=config


    def evaluation_metrics(actual,pred):
        rmse=np.sqrt(mean_squared_error(actual,pred))
        mae=mean_absolute_error(actual,pred)
        r2=r2_score(actual,pred)
        return rmse, mae, r2 


    def create_pipeline(self):
        
        categorical_columns=['cab_type','destination','source','name']
        numerical_columns=['distance','surge_multiplier','temp','clouds','pressure','rain','humidity','wind','day','hour','month']

        numerical_preprocessor=Pipeline(
            steps=[
                ('imputation_menu',SimpleImputer(missing_values=np.nan,strategy='median')),
                ('scalar',StandardScaler())
            ]
        )

        categorical_preprocessor=Pipeline(
            steps=[
                ('imputation_constant',SimpleImputer(strategy='most_frequent')),
                ('encode',OneHotEncoder(handle_unknown='ignore'))
            ]
        )

        preprocessor=ColumnTransformer(
            transformers=[
                ('categorical_columns',categorical_preprocessor,categorical_columns),
                ('numerical_columns',numerical_preprocessor,numerical_columns)
            ]
        )

        
        pipe=Pipeline(
            steps=[
                ('preprocessor',preprocessor),
                ('model', GradientBoostingRegressor())
            ]
        )
        

        return pipe

    def evaluation_metrics(self,actual,pred):
        rmse=np.sqrt(mean_squared_error(actual,pred))
        mae=mean_absolute_error(actual,pred)
        r2=r2_score(actual,pred)
        return rmse, mae, r2

    def objective(self,params,xtrain,ytrain,xtest,ytest):
        with mlflow.start_run():
            mlflow.set_tag('model','XGBoostRegressor')

            pipe=self.create_pipeline()
            model=pipe.set_params(**params)
            model.fit(xtrain,ytrain)
            ypred=model.predict(xtest)
            mlflow.log_params(model.get_params())
            rmse,mae,r2=self.evaluation_metrics(ytest,ypred)
            mlflow.log_metrics({'rmse':rmse,'mse': mae, 'r2':r2})
        return {'loss':rmse, 'status':STATUS_OK, 'model':model}


    def train(self):
        train_data=pd.read_csv(self.config.train_data_path)
        test_data=pd.read_csv(self.config.test_data_path)

        xtrain=train_data.drop([self.config.target_column],axis=1)
        xtest=test_data.drop([self.config.target_column],axis=1)
        ytrain=train_data[[self.config.target_column]]
        ytest=test_data[[self.config.target_column]]

        search_space={
            'model__n_estimators':hp.uniformint('n_estimators',self.config.n_estimators[0],self.config.n_estimators[1]),
            'model__max_depth':hp.uniformint('max_depth',self.config.max_depth[0],self.config.max_depth[1]),
            'model__learning_rate':hp.uniform('learning_rate',self.config.learning_rate[0],self.config.learning_rate[1])
        }
        dagshub.init(repo_owner='Immortal-Pi',repo_name='SurgeSense',mlflow=True)
        experiment_name='hyperopt_test_gradient_boosting'
        existing_experiment=mlflow.get_experiment_by_name(experiment_name)

        if existing_experiment is None:
                experiment_id = mlflow.create_experiment(name=experiment_name,artifact_location='hyperopt-test')
        else:
            experiment_id = existing_experiment.experiment_id
        mlflow.set_experiment(experiment_id=experiment_id) 

        trials=Trials()
        best_results=fmin(
            fn=partial(
                self.objective,
                xtrain=xtrain[:1000],
                ytrain=ytrain[:1000],
                xtest=xtest[:1000],
                ytest=ytest[:1000]
            ),
            space=search_space,
            algo=tpe.suggest,
            max_evals=10, # change to config
            trials=trials
        )
        return best_results,trials

    def register_best_model(self,best_results,trials):
        best_index=np.argmin([trial['result']['loss'] for trial in trials.trials])
        best_model=trials.trials[best_index]['result']['model']

        with mlflow.start_run() as run:
            mlflow.sklearn.log_model(best_model,artifact_path='model')
            mlflow.log_params(trials.trials[best_index]['misc']['vals'])
            model_uri=f'runs:/{run.info.run_id}/best_model'
            mlflow.register_model(model_uri=model_uri,name='best_model')




In [7]:
# pipeline 
try: 
    config=HyperOptParamsConfigManagerGradientBoosting()
    hyperopt_config=config.get_hyperopt_config()
    hyperopt_config_training=hyperOptTraining(config=hyperopt_config)
    best_results,trails=hyperopt_config_training.train()
    hyperopt_config_training.register_best_model(best_results,trails)
except Exception as e:
    raise e 

[2025-03-24 12:03:21,168: INFO :common : yaml file: config\config.yaml loaded successfully]
[2025-03-24 12:03:21,173: INFO :common : yaml file: params.yaml loaded successfully]
[2025-03-24 12:03:21,178: INFO :common : yaml file: schema.yaml loaded successfully]
[2025-03-24 12:03:23,897: INFO :_client : HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"]


[2025-03-24 12:03:23,907: INFO :helpers : Accessing as Immortal-Pi]
[2025-03-24 12:03:24,109: INFO :_client : HTTP Request: GET https://dagshub.com/api/v1/repos/Immortal-Pi/SurgeSense "HTTP/1.1 200 OK"]
[2025-03-24 12:03:24,227: INFO :_client : HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"]


[2025-03-24 12:03:24,232: INFO :helpers : Initialized MLflow to track repo "Immortal-Pi/SurgeSense"]


[2025-03-24 12:03:24,235: INFO :helpers : Repository Immortal-Pi/SurgeSense initialized!]
  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?][2025-03-24 12:03:24,585: INFO :tpe : build_posterior_wrapper took 0.001616 seconds]
[2025-03-24 12:03:24,587: INFO :tpe : TPE using 0 trials]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run melodic-donkey-778 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/243488ca18884cd4a37e4fbef09cf4fb

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 10%|█         | 1/10 [00:11<01:43, 11.54s/trial, best loss: 2.9903814644606768][2025-03-24 12:03:36,127: INFO :tpe : build_posterior_wrapper took 0.001000 seconds]
[2025-03-24 12:03:36,127: INFO :tpe : TPE using 1/1 trials with best loss 2.990381]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run brawny-gnu-531 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/fdcdd2a6f193402f890771d7745fc7ad

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 20%|██        | 2/10 [00:27<01:53, 14.19s/trial, best loss: 2.965866192689635] [2025-03-24 12:03:52,176: INFO :tpe : build_posterior_wrapper took 0.005964 seconds]
[2025-03-24 12:03:52,179: INFO :tpe : TPE using 2/2 trials with best loss 2.965866]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run treasured-carp-15 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/37b23673363649958779d4ce211c0caf

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 30%|███       | 3/10 [00:43<01:44, 14.99s/trial, best loss: 2.965866192689635][2025-03-24 12:04:08,122: INFO :tpe : build_posterior_wrapper took 0.000982 seconds]
[2025-03-24 12:04:08,124: INFO :tpe : TPE using 3/3 trials with best loss 2.965866]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run industrious-bass-95 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/2cd12d5bc20c437881ee5431fd893fb6

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 40%|████      | 4/10 [01:02<01:39, 16.58s/trial, best loss: 2.965866192689635][2025-03-24 12:04:27,130: INFO :tpe : build_posterior_wrapper took 0.001009 seconds]
[2025-03-24 12:04:27,131: INFO :tpe : TPE using 4/4 trials with best loss 2.965866]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run incongruous-bear-930 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/4c58913ccd9b4c01bb66a0fc4f815887

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 50%|█████     | 5/10 [01:19<01:23, 16.74s/trial, best loss: 2.965866192689635][2025-03-24 12:04:44,159: INFO :tpe : build_posterior_wrapper took 0.000994 seconds]
[2025-03-24 12:04:44,160: INFO :tpe : TPE using 5/5 trials with best loss 2.965866]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run dapper-gnat-390 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/5ae8c6d2a56949a1bc46880bb2610222

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 60%|██████    | 6/10 [01:37<01:08, 17.16s/trial, best loss: 2.965866192689635][2025-03-24 12:05:02,122: INFO :tpe : build_posterior_wrapper took 0.000908 seconds]
[2025-03-24 12:05:02,122: INFO :tpe : TPE using 6/6 trials with best loss 2.965866]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run salty-squirrel-744 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/87d5f70ea07443bfa0dfee4c318f7b1a

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 70%|███████   | 7/10 [01:57<00:54, 18.10s/trial, best loss: 2.965866192689635][2025-03-24 12:05:22,154: INFO :tpe : build_posterior_wrapper took 0.001140 seconds]
[2025-03-24 12:05:22,155: INFO :tpe : TPE using 7/7 trials with best loss 2.965866]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run kindly-koi-864 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/2ec8ca60b36f4f719f9de35609557889

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 80%|████████  | 8/10 [02:13<00:34, 17.42s/trial, best loss: 2.965866192689635][2025-03-24 12:05:38,125: INFO :tpe : build_posterior_wrapper took 0.000999 seconds]
[2025-03-24 12:05:38,126: INFO :tpe : TPE using 8/8 trials with best loss 2.965866]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run bald-squid-533 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/2aa19a2278c44ac291b9f1d9a1f2d890

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

 90%|█████████ | 9/10 [02:32<00:17, 17.92s/trial, best loss: 2.965866192689635][2025-03-24 12:05:57,156: INFO :tpe : build_posterior_wrapper took 0.002056 seconds]
[2025-03-24 12:05:57,158: INFO :tpe : TPE using 9/9 trials with best loss 2.965866]


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?



🏃 View run classy-shrimp-548 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/f4828a616eb5464da1fad1911818ee18

🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4

100%|██████████| 10/10 [02:44<00:00, 16.46s/trial, best loss: 2.965866192689635]


Registered model 'best_model' already exists. Creating a new version of this model...
2025/03/24 12:06:26 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: best_model, version 9
Created version '9' of model 'best_model'.


🏃 View run dapper-auk-698 at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4/runs/65dcdc352c3d46b5870474b62a312c92
🧪 View experiment at: https://dagshub.com/Immortal-Pi/SurgeSense.mlflow/#/experiments/4
