In [1]:
%pwd

'c:\\Users\\Kshitij\\Downloads\\F1_CICD\\F1_ML_Ops_CI-CD\\research'

In [2]:
import os

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\Kshitij\\Downloads\\F1_CICD\\F1_ML_Ops_CI-CD'

In [29]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    n_estimators: float
    early_stopping_rounds: float
    learning_rate: float
    

In [30]:
from F1_Stint_Prediction.constants import *
from F1_Stint_Prediction.utils.common import read_yaml, create_directories

In [31]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.Xgboost

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            n_estimators= params.n_estimators,
            early_stopping_rounds= params.early_stopping_rounds,
            learning_rate= params.learning_rate
        )

        return model_trainer_config

In [32]:
import pandas as pd
import os
from F1_Stint_Prediction import logger
import xgboost as xgb
import joblib

In [35]:
import pandas as pd
import xgboost as xgb
import joblib
import os

class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def get_data(self):
        # Load Stint Count (Regression)
        X_train_stint_count = pd.read_csv(self.config.train_data_path + "/X_train_stint_count_0.csv")
        y_train_stint_count = pd.read_csv(self.config.train_data_path + "/y_train_stint_count_0.csv")

        X_test_stint_count = pd.read_csv(self.config.train_data_path + "/X_test_stint_count_0.csv")
        y_test_stint_count = pd.read_csv(self.config.train_data_path + "/y_test_stint_count_0.csv")

        self.train_Regressor(X_train_stint_count, y_train_stint_count,X_test_stint_count,y_test_stint_count ,"stint_count")

        # Compound Classification (per stint)
        for i in range(1, 5):
            X_train = pd.read_csv(self.config.train_data_path + f"/X_train_compound_{i}.csv")
            y_train = pd.read_csv(self.config.train_data_path + f"/y_train_compound_{i}.csv")

            X_test = pd.read_csv(self.config.train_data_path + f"/X_test_compound_{i}.csv")
            y_test = pd.read_csv(self.config.train_data_path + f"/y_test_compound_{i}.csv")

            self.train_Classifier(X_train, y_train,X_test,y_test, f"Compound_Stint_{i}")
            # You can now use X_test and y_test if you want to evaluate

        # Stint Length Regression (per stint)
        for i in range(1, 5):
            X_train = pd.read_csv(self.config.train_data_path + f"/X_train_stint_len_{i}.csv")
            y_train = pd.read_csv(self.config.train_data_path + f"/y_train_stint_len_{i}.csv")

            X_test = pd.read_csv(self.config.train_data_path + f"/X_test_stint_len_{i}.csv")
            y_test = pd.read_csv(self.config.train_data_path + f"/y_test_stint_len_{i}.csv")

            self.train_Regressor(X_train, y_train,X_test,y_test, f"Stint_len_{i}")
            # Use X_test and y_test as needed!


    def train_Classifier(self,X_train,y_train,X_test,y_test, name):
        model = xgb.XGBClassifier(
            n_estimators=self.config.n_estimators,
            early_stopping_rounds=self.config.early_stopping_rounds,
            learning_rate=self.config.learning_rate,
            random_state=42
        )
        model.fit(X_train, y_train,eval_set=[(X_train, y_train), (X_test, y_test)])

        joblib.dump(model, os.path.join(self.config.root_dir, f"{name}.joblib"))
    
    def train_Regressor(self,X_train,y_train,X_test,y_test, name):
        model = xgb.XGBRegressor(
            n_estimators=self.config.n_estimators,
            early_stopping_rounds=self.config.early_stopping_rounds,
            learning_rate=self.config.learning_rate,
            random_state=42
        )
        model.fit(X_train, y_train,eval_set=[(X_train, y_train), (X_test, y_test)])

        joblib.dump(model, os.path.join(self.config.root_dir, f"{name}.joblib"))

In [36]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.get_data()
except Exception as e:
    raise e

[2025-03-24 23:13:49,248: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-03-24 23:13:49,256: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-24 23:13:49,268: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-03-24 23:13:49,272: INFO: common: created directory at: artifacts]
[2025-03-24 23:13:49,272: INFO: common: created directory at: artifacts/model_trainer]
[0]	validation_0-rmse:0.73375	validation_1-rmse:0.73504
[1]	validation_0-rmse:0.72727	validation_1-rmse:0.73098
[2]	validation_0-rmse:0.72098	validation_1-rmse:0.72708
[3]	validation_0-rmse:0.71488	validation_1-rmse:0.72331
[4]	validation_0-rmse:0.70895	validation_1-rmse:0.71939
[5]	validation_0-rmse:0.70320	validation_1-rmse:0.71591
[6]	validation_0-rmse:0.69762	validation_1-rmse:0.71226
[7]	validation_0-rmse:0.69222	validation_1-rmse:0.70875
[8]	validation_0-rmse:0.68696	validation_1-rmse:0.70564
[9]	validation_0-rmse:0.68187	validation_1-rmse:0.70255
[10]	validation_0-rms