In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score
import numpy as np
import scipy.special

Accuracy: 0.755
Precision (macro): 0.6923873228221054
Precision (weighted): 0.7375505653766523
Recall (macro): 0.6498015873015872
Recall (weighted): 0.755
F1 Score (macro): 0.6617773943054357
F1 Score (weighted): 0.7399068162208801
Confusion Matrix:
[[128  16]
 [ 33  23]]
ROC AUC: 0.8690476190476191
Accuracy: 0.755
Precision (macro): 0.6929990539262063
Precision (weighted): 0.7464900662251657
Recall (macro): 0.6770833333333333
Recall (weighted): 0.755
F1 Score (macro): 0.6836158192090396
F1 Score (weighted): 0.7497401129943502
Confusion Matrix:
[[123  21]
 [ 28  28]]
ROC AUC: 0.8516865079365079
Accuracy: 0.72
Precision (macro): 0.36
Precision (weighted): 0.5184
Recall (macro): 0.5
Recall (weighted): 0.72
F1 Score (macro): 0.4186046511627907
F1 Score (weighted): 0.6027906976744186
Confusion Matrix:
[[144   0]
 [ 56   0]]
ROC AUC: 0.8410218253968254
Accuracy: 0.79
Precision (macro): 0.7416713721061547
Precision (weighted): 0.7810502540937324
Recall (macro): 0.7123015873015872
Recall (wei

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [4]:
data = pd.read_csv("hyphomz_indian_cleaning_data.csv")

data.columns = data.columns.str.strip()

le_cleaning = LabelEncoder()
le_city = LabelEncoder()

data["cleaning_type_enc"] = le_cleaning.fit_transform(data["cleaning_type"])
data["city_tier_enc"] = le_city.fit_transform(data["city_tier"])

# Features and target
X = data[["cleaning_type_enc", "bhk", "area_sqft", "has_heavy_furniture", "customer_rating", "city_tier_enc", "last_cleaning_days_ago"]]
y = data["customer_returned"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestClassifier(random_state=42)
lr = LogisticRegression(max_iter=500)
svm = SVC(probability=True)
knn = KNeighborsClassifier()

models = [("Random Forest", rf), ("Logistic Regression", lr), ("SVM", svm), ("KNN", knn)]

for name, model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    if hasattr(model, "predict_proba"):
        y_prob = model.predict_proba(X_test)
    elif hasattr(model, "decision_function"):
        scores = model.decision_function(X_test)
        probs = scipy.special.expit(scores)
        y_prob = np.vstack([1 - probs, probs]).T
    else:
        y_prob = None
    
    print(f"======= {name} =======")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision (macro):", precision_score(y_test, y_pred, average="macro"))
    print("Precision (weighted):", precision_score(y_test, y_pred, average="weighted"))
    print("Recall (macro):", recall_score(y_test, y_pred, average="macro"))
    print("Recall (weighted):", recall_score(y_test, y_pred, average="weighted"))
    print("F1 Score (macro):", f1_score(y_test, y_pred, average="macro"))
    print("F1 Score (weighted):", f1_score(y_test, y_pred, average="weighted"))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    
    if y_prob is not None:
        try:
            auc = roc_auc_score(y_test, y_prob[:, 1])
            print("ROC AUC:", auc)
        except:
            print("ROC AUC: Not available")
    else:
        print("ROC AUC: Not available")

Accuracy: 0.755
Precision (macro): 0.6923873228221054
Precision (weighted): 0.7375505653766523
Recall (macro): 0.6498015873015872
Recall (weighted): 0.755
F1 Score (macro): 0.6617773943054357
F1 Score (weighted): 0.7399068162208801
Confusion Matrix:
[[128  16]
 [ 33  23]]
ROC AUC: 0.8690476190476191
Accuracy: 0.755
Precision (macro): 0.6929990539262063
Precision (weighted): 0.7464900662251657
Recall (macro): 0.6770833333333333
Recall (weighted): 0.755
F1 Score (macro): 0.6836158192090396
F1 Score (weighted): 0.7497401129943502
Confusion Matrix:
[[123  21]
 [ 28  28]]
ROC AUC: 0.8516865079365079
Accuracy: 0.72
Precision (macro): 0.36
Precision (weighted): 0.5184
Recall (macro): 0.5
Recall (weighted): 0.72
F1 Score (macro): 0.4186046511627907
F1 Score (weighted): 0.6027906976744186
Confusion Matrix:
[[144   0]
 [ 56   0]]
ROC AUC: 0.8410218253968254
Accuracy: 0.79
Precision (macro): 0.7416713721061547
Precision (weighted): 0.7810502540937324
Recall (macro): 0.7123015873015872
Recall (wei

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
