In [2]:
import os

In [3]:
%pwd

'f:\\Files\\DS&ML\\Flight-Fare-Price-Prediction\\Exp'

In [4]:
os.chdir('../')
%pwd

'f:\\Files\\DS&ML\\Flight-Fare-Price-Prediction'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    target_column: str
    random_search_params: dict
    n_iter: int     
    cv_folds: int
    scoring: str 
    n_jobs: int

In [6]:
from mlproject.constants import *
from mlproject.utils.common import read_yaml,create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_model_training_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.GradientBoostingRegressor
        schema = self.schema
        random_search_params = params.random_search
        cv_params = params.cross_validation

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            target_column=schema.target_column.name,
            random_search_params=random_search_params, 
            cv_folds=cv_params.cv_folds,            
            scoring=cv_params.scoring,             
            n_jobs=cv_params.n_jobs,
            n_iter=cv_params.n_iter          
        )
        
        return model_trainer_config

In [8]:
from sklearn.ensemble import GradientBoostingRegressor
from mlproject import logger
import pandas as pd
import joblib
import numpy as np
import mlflow
import mlflow.sklearn
import dagshub
from sklearn.model_selection import RandomizedSearchCV

In [None]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        dagshub.init(repo_owner="JavithNaseem-J", repo_name="Flight-Fare-Price-Prediction")
        mlflow.set_tracking_uri("https://dagshub.com/JavithNaseem-J/Flight-Fare-Price-Prediction.mlflow")
        mlflow.set_experiment("Flight-Fare-Price-Prediction")

    def train(self):
        # Validate file paths
        if not os.path.exists(self.config.train_data_path):
            logger.error(f"Train preprocessed file not found at: {self.config.train_data_path}")
            raise FileNotFoundError("Train preprocessed file not found")
        if not os.path.exists(self.config.test_data_path):
            logger.error(f"Test preprocessed file not found at: {self.config.test_data_path}")
            raise FileNotFoundError("Test preprocessed file not found")

        # Load preprocessed data
        train_data = np.load(self.config.train_data_path, allow_pickle=True)
        test_data = np.load(self.config.test_data_path, allow_pickle=True)

        logger.info(f'Loaded train and test data')
        logger.info(f'Train data shape: {train_data.shape}')
        logger.info(f'Test data shape: {test_data.shape}')

        train_x = train_data[:, :-1]
        train_y = train_data[:, -1]
        test_x = test_data[:, :-1]
        test_y = test_data[:, -1]

    
        mlflow.sklearn.autolog()  
        with mlflow.start_run(run_name="RandomizedSearchCV_Tuning"):
            mlflow.set_tag("run_type", "hyperparameter_tuning")
            mlflow.set_tag("model", "GradientBoostingRegressor")

            logger.info('Initializing Randomized Search')

            gradient_model = GradientBoostingRegressor()

            param_dist = self.config.random_search_params

            logger.info('>>>>>>>>>> ......Performing Randomized Search - this may take some time...... <<<<<<<<<')


            random_search = RandomizedSearchCV(
                estimator=gradient_model,
                param_distributions=param_dist,
                n_iter=self.config.n_iter,
                cv=self.config.cv_folds,
                scoring= self.config.scoring,
                verbose=1,
                n_jobs=self.config.n_jobs,
                return_train_score=True
            )
            random_search.fit(train_x, train_y)

            for i, (params, mean_score, std_score) in enumerate(zip(
                    random_search.cv_results_["params"],
                    random_search.cv_results_["mean_test_score"],
                    random_search.cv_results_["std_test_score"])):
                
                with mlflow.start_run(nested=True, run_name=f"Trial_{i+1}"):
                    mlflow.set_tag("trial_number", i + 1)
                    mlflow.log_params(params)
                    mlflow.log_metric("mean_accuracy", mean_score)
                    mlflow.log_metric("std_accuracy", std_score)  
                    logger.info(f"Trial {i+1}: params={params}, mean_accuracy={mean_score:.4f}, std_accuracy={std_score:.4f}")


            best_model = random_search.best_estimator_
            mlflow.sklearn.log_model(
                gradient_model=best_model,
                artifact_path="gradient_model",
                registered_model_name="Flight Fare Prediction"
            )
            logger.info("Best model logged to MLflow")

            model_path = os.path.join(self.config.root_dir, self.config.model_name)
            joblib.dump(random_search, model_path)
            logger.info(f'Model saved locally at {model_path}')

In [10]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_training_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()

except FileNotFoundError as e:
    logger.error(f"File not found: {e}")
except KeyError as e:
    logger.error(f"Missing key in configuration: {e}")
except Exception as e:
    logger.error(f"Unexpected error: {e}")

[2025-04-24 20:16:46,341: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-04-24 20:16:46,341: INFO: common: yaml file: params.yaml loaded successfully]
[2025-04-24 20:16:46,356: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-04-24 20:16:46,358: INFO: common: created directory at: artifacts]
[2025-04-24 20:16:46,358: INFO: common: created directory at: artifacts/model_trainer]
[2025-04-24 20:16:46,736: INFO: _client: HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"]


[2025-04-24 20:16:46,744: INFO: helpers: Accessing as JavithNaseem-J]
[2025-04-24 20:16:47,118: INFO: _client: HTTP Request: GET https://dagshub.com/api/v1/repos/JavithNaseem-J/Flight-Fare-Price-Prediction "HTTP/1.1 200 OK"]
[2025-04-24 20:16:47,452: INFO: _client: HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"]


[2025-04-24 20:16:47,452: INFO: helpers: Initialized MLflow to track repo "JavithNaseem-J/Flight-Fare-Price-Prediction"]


[2025-04-24 20:16:47,452: INFO: helpers: Repository JavithNaseem-J/Flight-Fare-Price-Prediction initialized!]
[2025-04-24 20:16:47,920: INFO: 1598331663: Loaded train and test data]
[2025-04-24 20:16:47,921: INFO: 1598331663: Train data shape: (42750, 10)]
[2025-04-24 20:16:47,922: INFO: 1598331663: Test data shape: (14250, 10)]
[2025-04-24 20:16:49,768: INFO: 1598331663: Initializing Randomized Search]
[2025-04-24 20:16:49,769: INFO: 1598331663: >>>>>>>>>> ......Performing Randomized Search - this may take some time...... <<<<<<<<<]
Fitting 5 folds for each of 10 candidates, totalling 50 fits


20 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
14 fits failed with the following error:
Traceback (most recent call last):
  File "f:\ProgramFiles\anaconda3\envs\flight-fare\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "f:\ProgramFiles\anaconda3\envs\flight-fare\lib\site-packages\sklearn\base.py", line 1382, in wrapper
    estimator._validate_params()
  File "f:\ProgramFiles\anaconda3\envs\flight-fare\lib\site-packages\sklearn\base.py", line 436, in _validate_params
    validate_parameter_constraints(
  File "f:\ProgramFiles\anaconda3\envs\flight-fare\lib\site-packages\sklearn\utils\_param_validation.py

🏃 View run clean-ray-155 at: https://dagshub.com/JavithNaseem-J/Flight-Fare-Price-Prediction.mlflow/#/experiments/0/runs/624659eae8d1435fa27363ab8072d02b
🧪 View experiment at: https://dagshub.com/JavithNaseem-J/Flight-Fare-Price-Prediction.mlflow/#/experiments/0
🏃 View run respected-koi-57 at: https://dagshub.com/JavithNaseem-J/Flight-Fare-Price-Prediction.mlflow/#/experiments/0/runs/09b57cc5de1d45109fe36f8217ba6473
🧪 View experiment at: https://dagshub.com/JavithNaseem-J/Flight-Fare-Price-Prediction.mlflow/#/experiments/0
🏃 View run powerful-bee-841 at: https://dagshub.com/JavithNaseem-J/Flight-Fare-Price-Prediction.mlflow/#/experiments/0/runs/edcbf0cbabe44f9f9ad20068324df96b
🧪 View experiment at: https://dagshub.com/JavithNaseem-J/Flight-Fare-Price-Prediction.mlflow/#/experiments/0
🏃 View run placid-snipe-801 at: https://dagshub.com/JavithNaseem-J/Flight-Fare-Price-Prediction.mlflow/#/experiments/0/runs/8020473755184160987a4255ef52ef2a
🧪 View experiment at: https://dagshub.com/Javith