In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import VotingClassifier
import joblib

In [6]:
data_path = "Real_Time_Features_Fleet_Maintenance_Dataset.csv"
data = pd.read_csv(data_path)

In [7]:

#Preprocessing
X = data.drop(columns=["Vehicle_ID", "Maintenance_Status"])

In [8]:
label_encoder = LabelEncoder()
data["Maintenance_Status"] = label_encoder.fit_transform(data["Maintenance_Status"])
y = data["Maintenance_Status"]

In [9]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [11]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
lgbm = LGBMClassifier(random_state=42)

define_grid = {
    'xgb': {
        'n_estimators': [100, 200],
        'learning_rate': [0.01, 0.1],
        'max_depth': [3, 5, 7],
        'subsample': [0.8, 1.0]
    },
    'lgbm': {
        'n_estimators': [100, 200],
        'learning_rate': [0.01, 0.1],
        'max_depth': [3, 5, 7]
    }
}

In [13]:
xgb_grid_search = GridSearchCV(estimator=xgb, param_grid=define_grid['xgb'], scoring='accuracy', cv=3, verbose=2)
xgb_grid_search.fit(X_train, y_train)
best_xgb = xgb_grid_search.best_estimator_

lgbm_grid_search = GridSearchCV(estimator=lgbm, param_grid=define_grid['lgbm'], scoring='accuracy', cv=3, verbose=2)
lgbm_grid_search.fit(X_train, y_train)
best_lgbm = lgbm_grid_search.best_estimator_

Fitting 3 folds for each of 24 candidates, totalling 72 fits


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8; total time=   0.2s
[CV] END learning_rate=0.01, max_depth=3, n_estimators=100, subsample=0.8; total time=   0.0s


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0; total time=   0.1s
[CV] END learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0; total time=   0.1s


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=3, n_estimators=100, subsample=1.0; total time=   0.1s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=3, n_estimators=200, subsample=0.8; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=3, n_estimators=200, subsample=1.0; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=5, n_estimators=100, subsample=0.8; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0; total time=   0.2s
[CV] END learning_rate=0.01, max_depth=5, n_estimators=100, subsample=1.0; total time=   0.1s


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.4s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.4s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.5s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.5s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.4s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=7, n_estimators=100, subsample=0.8; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=7, n_estimators=100, subsample=0.8; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=7, n_estimators=100, subsample=0.8; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=7, n_estimators=100, subsample=1.0; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=7, n_estimators=100, subsample=1.0; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=7, n_estimators=100, subsample=1.0; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.7s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.5s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.7s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.6s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.6s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.7s
[CV] END learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time=   0.1s


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time=   0.1s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=3, n_estimators=100, subsample=0.8; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=3, n_estimators=100, subsample=1.0; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8; total time=   0.5s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8; total time=   0.4s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=3, n_estimators=200, subsample=0.8; total time=   0.4s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0; total time=   0.4s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0; total time=   0.4s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=3, n_estimators=200, subsample=1.0; total time=   0.4s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=5, n_estimators=100, subsample=0.8; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time=   0.4s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=5, n_estimators=100, subsample=1.0; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.5s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.4s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.4s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.6s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.3s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=7, n_estimators=100, subsample=0.8; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=7, n_estimators=100, subsample=0.8; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=7, n_estimators=100, subsample=0.8; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=7, n_estimators=100, subsample=1.0; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=7, n_estimators=100, subsample=1.0; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=7, n_estimators=100, subsample=1.0; total time=   0.2s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.5s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.5s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.5s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.5s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.5s


Parameters: { "use_label_encoder" } are not used.



[CV] END learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.4s


Parameters: { "use_label_encoder" } are not used.



Fitting 3 folds for each of 12 candidates, totalling 36 fits
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000197 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1785
[LightGBM] [Info] Number of data points in the train set: 2138, number of used features: 7
[LightGBM] [Info] Start training from score -1.096743
[LightGBM] [Info] Start training from score -1.102361
[LightGBM] [Info] Start training from score -1.096743
[CV] END ..learning_rate=0.01, max_depth=3, n_estimators=100; total time=   0.0s
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000179 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1785
[LightGBM] [Info] Number of data points in the train set: 2139, number of used features: 7
[LightGBM] [Info] Start training from score -1.095811
[LightGBM] [Info] Start training from score -1.104238
[LightGBM] [In

In [14]:
voting_clf = VotingClassifier(estimators=[
    ('XGBoost', best_xgb),
    ('LightGBM', best_lgbm)
], voting='soft')

voting_clf.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000177 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1785
[LightGBM] [Info] Number of data points in the train set: 3208, number of used features: 7
[LightGBM] [Info] Start training from score -1.096122
[LightGBM] [Info] Start training from score -1.103612
[LightGBM] [Info] Start training from score -1.096122


In [15]:
y_pred = voting_clf.predict(X_test)

conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)
print("Accuracy Score:", accuracy)

Confusion Matrix:
 [[262   3   0]
 [ 17 187  69]
 [  0  89 176]]
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.99      0.96       265
           1       0.67      0.68      0.68       273
           2       0.72      0.66      0.69       265

    accuracy                           0.78       803
   macro avg       0.78      0.78      0.78       803
weighted avg       0.77      0.78      0.78       803

Accuracy Score: 0.7783312577833126


In [16]:
model_path = "fleet_maintenance_voting_model.pkl"
joblib.dump(voting_clf, model_path)

print(f"saved to {model_path}")


saved to fleet_maintenance_voting_model.pkl
