In [None]:
# Basic Decision Tree for Jet Engine Fault Detection
import numpy as np
import pandas as pd
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

DATA_PATH = 'engine_fault_detection_dataset.csv'
TARGET_COL = 'Engine_Condition'


In [None]:
# Load data
engine_df = pd.read_csv(DATA_PATH)
print('Loaded:', engine_df.shape)
engine_df.head()


In [None]:
# Split and preprocessing
X = engine_df.drop(columns=[TARGET_COL])
y = engine_df[TARGET_COL]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

numeric_features = X_train.select_dtypes(include=np.number).columns.tolist()
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_features),
], remainder='drop')

print('Train:', X_train.shape, 'Test:', X_test.shape)


In [None]:
# Basic Decision Tree model
basic_dt = DecisionTreeClassifier(random_state=42)

pipeline = Pipeline([
    ('preprocess', preprocessor),
    ('model', basic_dt)
])

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))


In [None]:
# Save model and metadata for frontend use
import joblib
artifacts_dir = Path('artifacts')
artifacts_dir.mkdir(exist_ok=True)

model_path = artifacts_dir / 'jet_fault_model_decision_tree.pkl'
joblib.dump(pipeline, model_path)

feature_metadata = {
    'numeric_features': numeric_features,
    'target': TARGET_COL,
    'classes_': sorted(y.unique())
}
joblib.dump(feature_metadata, artifacts_dir / 'feature_metadata.pkl')

print(f"Saved model to: {model_path}")
