In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer, accuracy_score, f1_score, recall_score, precision_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

# Veriyi yükle
df = pd.read_csv("brake_health_realistic.csv")

# X ve y ayrımı
X = df.drop(columns=["brake_health"])
y = df["brake_health"]

# One-Hot Encoding yapılacak kategorik sütun olmadığından, pipeline sadece passthrough üzerinden tanımlanacak.
# Ancak genel yapı OneHotEncoder ile de çalışacak şekilde bırakıldı.
categorical_features = []  # şu anlık kategorik sütun yok
numerical_features = X.columns.tolist()

# Preprocessor
preprocessor = ColumnTransformer(
    transformers=[("num", "passthrough", numerical_features)]
)

# Modeller
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Decision Tree": DecisionTreeClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Naive Bayes": GaussianNB()
}

# Değerlendirme metrikleri
scoring = {
    'accuracy': 'accuracy',
    'f1_macro': 'f1_macro',
    'recall_macro': 'recall_macro',
    'precision_macro': 'precision_macro'
}

# Her model için pipeline kurup cross-validate et
cv_results = {}

for name, model in models.items():
    pipeline = Pipeline(steps=[
        ("preprocessor", preprocessor),
        ("classifier", model)
    ])
    scores = cross_validate(pipeline, X, y, cv=5, scoring=scoring)
    cv_results[name] = {
        "accuracy": round(scores["test_accuracy"].mean(), 4),
        "f1_macro": round(scores["test_f1_macro"].mean(), 4),
        "recall_macro": round(scores["test_recall_macro"].mean(), 4),
        "precision_macro": round(scores["test_precision_macro"].mean(), 4),
    }

cv_results


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the 

{'Random Forest': {'accuracy': np.float64(0.957),
  'f1_macro': np.float64(0.8523),
  'recall_macro': np.float64(0.8071),
  'precision_macro': np.float64(0.9678)},
 'Decision Tree': {'accuracy': np.float64(0.965),
  'f1_macro': np.float64(0.9034),
  'recall_macro': np.float64(0.8985),
  'precision_macro': np.float64(0.9162)},
 'Gradient Boosting': {'accuracy': np.float64(0.972),
  'f1_macro': np.float64(0.895),
  'recall_macro': np.float64(0.8661),
  'precision_macro': np.float64(0.9563)},
 'Logistic Regression': {'accuracy': np.float64(0.798),
  'f1_macro': np.float64(0.5239),
  'recall_macro': np.float64(0.5342),
  'precision_macro': np.float64(0.5144)},
 'Naive Bayes': {'accuracy': np.float64(0.85),
  'f1_macro': np.float64(0.6813),
  'recall_macro': np.float64(0.6487),
  'precision_macro': np.float64(0.8193)}}

In [3]:
# En iyi model: Gradient Boosting
best_model = GradientBoostingClassifier()
best_model.fit(X, y)

# Örnek kullanıcıdan parametre alma simülasyonu
def predict_brake_health(total_km, harsh_braking, avg_speed, ignition_duration, engine_rpm, brake_temp):
    input_data = pd.DataFrame([{
        "total_km": total_km,
        "harsh_braking": harsh_braking,
        "avg_speed": avg_speed,
        "ignition_duration": ignition_duration,
        "engine_rpm": engine_rpm,
        "brake_temp": brake_temp
    }])
    prediction = best_model.predict(input_data)[0]
    return prediction

# Örnek kullanıcı girdisi
sample_prediction = predict_brake_health(
    total_km=420000,
    harsh_braking=85,
    avg_speed=135,
    ignition_duration=3.5,
    engine_rpm=3400,
    brake_temp=250
)

sample_prediction


'normal'