In [1]:
import os
os.chdir('../')
%pwd

'c:\\Users\\EI13136\\Documents\\mlops'

In [2]:
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost.sklearn import XGBClassifier
from sklearn.naive_bayes import GaussianNB
import lightgbm as lgb
from sklearn.ensemble import AdaBoostClassifier

In [3]:
train_df = pd.read_csv("artifacts/data_transformation/train.csv")
test_df = pd.read_csv("artifacts/data_transformation/test.csv")

In [4]:
x_train = train_df.drop(columns=['Satisfaction'])
y_train = train_df['Satisfaction']

In [5]:
import joblib

models = {
    "Logistic Regression": LogisticRegression(),
    "KNeighbors Classifier": KNeighborsClassifier(),
    "Decision Tree Classifier": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "Support Vector Machine": SVC(),
    "Gradient Descent": SGDClassifier(),
    "Random Forest Classifier": RandomForestClassifier(),
    "Xgboost Classifier": XGBClassifier(),
    "Adaboost Classifier": AdaBoostClassifier(),
    "Gradient Boosting Classifier": GradientBoostingClassifier(),
    "Lightgbm": lgb.LGBMClassifier()
}


for model_name, model_instance in models.items():
    model_instance.fit(x_train, y_train)
   
    joblib.dump(model_instance, f'artifacts/models/{model_name}.joblib')
    print(f"Model '{model_name}' saved to 'models/{model_name}.joblib'.")

Model 'Logistic Regression' saved to 'models/Logistic Regression.joblib'.
Model 'KNeighbors Classifier' saved to 'models/KNeighbors Classifier.joblib'.
Model 'Decision Tree Classifier' saved to 'models/Decision Tree Classifier.joblib'.
Model 'Naive Bayes' saved to 'models/Naive Bayes.joblib'.
Model 'Support Vector Machine' saved to 'models/Support Vector Machine.joblib'.
Model 'Gradient Descent' saved to 'models/Gradient Descent.joblib'.
Model 'Random Forest Classifier' saved to 'models/Random Forest Classifier.joblib'.
Model 'Xgboost Classifier' saved to 'models/Xgboost Classifier.joblib'.




Model 'Adaboost Classifier' saved to 'models/Adaboost Classifier.joblib'.
Model 'Gradient Boosting Classifier' saved to 'models/Gradient Boosting Classifier.joblib'.
[LightGBM] [Info] Number of positive: 42406, number of negative: 55004
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002206 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 97410, number of used features: 27
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.435335 -> initscore=-0.260116
[LightGBM] [Info] Start training from score -0.260116
Model 'Lightgbm' saved to 'models/Lightgbm.joblib'.


In [55]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    target_column: str

In [56]:
from airline_passenger_satisfaction.constants import *
from airline_passenger_satisfaction.utils.common import read_yaml, create_directories

In [57]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH,
                 schema_filepath = SCHEMA_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir= config.root_dir,
            train_data_path= config.train_data_path,
            target_column= schema.name
        )

        return model_trainer_config

In [58]:
import pandas as pd
import os
import joblib
from airline_passenger_satisfaction.logger import logger
from airline_passenger_satisfaction.exception import CustomException

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost.sklearn import XGBClassifier
from sklearn.naive_bayes import GaussianNB
import lightgbm as lgb
from sklearn.ensemble import AdaBoostClassifier

In [59]:
class ModelTrainer:
    def __init__(self, config=ModelTrainerConfig) -> None:
        self.config = config

    def models_trainer(self):
        train_df = pd.read_csv(self.config.train_data_path)

        x_train = train_df.drop(columns=[self.config.target_column])
        y_train = train_df[self.config.target_column]

        models = {
            "Logistic Regression": LogisticRegression(),
            "KNeighbors Classifier": KNeighborsClassifier(),
            "Decision Tree Classifier": DecisionTreeClassifier(),
            "Naive Bayes": GaussianNB(),
            "Support Vector Machine": SVC(),
            "Gradient Descent": SGDClassifier(),
            "Random Forest Classifier": RandomForestClassifier(),
            "Xgboost Classifier": XGBClassifier(),
            "Adaboost Classifier": AdaBoostClassifier(),
            "Gradient Boosting Classifier": GradientBoostingClassifier(),
            "Lightgbm": lgb.LGBMClassifier()
            }

        for model_name, model_instance in models.items():
            model_instance.fit(x_train, y_train)
   
            joblib.dump(model_instance, f'{self.config.root_dir}/{model_name}.joblib')
            print(f"Model '{model_name}' saved to '{self.config.root_dir}/{model_name}.joblib'.")
            logger.info(f"Model '{model_name}' saved to '{self.config.root_dir}/{model_name}.joblib'.")

In [60]:
import sys

try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.models_trainer()
except Exception as e:
    raise CustomException(e, sys)

[2024-02-12 15:29:55,798] [INFO] [Airline Passenger Reviews Logger] [common] : yaml file config\config.yaml loaded successfully
[2024-02-12 15:29:55,799] [INFO] [Airline Passenger Reviews Logger] [common] : yaml file params.yaml loaded successfully
[2024-02-12 15:29:55,801] [INFO] [Airline Passenger Reviews Logger] [common] : yaml file schema.yaml loaded successfully
[2024-02-12 15:29:55,803] [INFO] [Airline Passenger Reviews Logger] [common] : Created directory at : artifacts
[2024-02-12 15:29:55,804] [INFO] [Airline Passenger Reviews Logger] [common] : Created directory at : artifacts/model_trainer
Model 'Logistic Regression' saved to 'artifacts/model_trainer/Logistic Regression.joblib'.
[2024-02-12 15:29:56,190] [INFO] [Airline Passenger Reviews Logger] [1177539164] : Model 'Logistic Regression' saved to 'artifacts/model_trainer/Logistic Regression.joblib'.
Model 'KNeighbors Classifier' saved to 'artifacts/model_trainer/KNeighbors Classifier.joblib'.
[2024-02-12 15:29:56,226] [INFO]



Model 'Adaboost Classifier' saved to 'artifacts/model_trainer/Adaboost Classifier.joblib'.
[2024-02-12 15:31:50,870] [INFO] [Airline Passenger Reviews Logger] [1177539164] : Model 'Adaboost Classifier' saved to 'artifacts/model_trainer/Adaboost Classifier.joblib'.
Model 'Gradient Boosting Classifier' saved to 'artifacts/model_trainer/Gradient Boosting Classifier.joblib'.
[2024-02-12 15:32:01,798] [INFO] [Airline Passenger Reviews Logger] [1177539164] : Model 'Gradient Boosting Classifier' saved to 'artifacts/model_trainer/Gradient Boosting Classifier.joblib'.
[LightGBM] [Info] Number of positive: 42406, number of negative: 55004
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002302 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 97410, number of used features: 27
[LightGBM] 