In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from sklearn.naive_bayes import CategoricalNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Simulated dataset with better features
data = {
    'Service_Quality': ['Poor', 'Average', 'Good', 'Excellent', 'Good', 'Poor', 'Excellent', 'Average', 'Good', 'Poor', 'Average', 'Excellent', 'Good', 'Average', 'Poor'],
    'Response_Time': ['Slow', 'Medium', 'Fast', 'Fast', 'Medium', 'Slow', 'Fast', 'Medium', 'Fast', 'Slow', 'Medium', 'Fast', 'Fast', 'Medium', 'Slow'],
    'Product_Quality': ['Poor', 'Average', 'Good', 'Excellent', 'Good', 'Poor', 'Excellent', 'Average', 'Good', 'Poor', 'Average', 'Excellent', 'Good', 'Average', 'Poor'],
    'Issue_Resolved': ['Yes', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'No'],
    'Satisfaction_Level': ['Low', 'Medium', 'High', 'High', 'Medium', 'Low', 'High', 'Medium', 'High', 'Low', 'Medium', 'High', 'High', 'Medium', 'Low']
}

df = pd.DataFrame(data)

# Step 2: Encode features

# Ordinal Encoding for ordinal features
ordinal_features = ['Service_Quality', 'Response_Time', 'Product_Quality']
ordinal_mapping = [
    ['Poor', 'Average', 'Good', 'Excellent'],
    ['Slow', 'Medium', 'Fast'],
    ['Poor', 'Average', 'Good', 'Excellent']
]
ordinal_encoder = OrdinalEncoder(categories=ordinal_mapping)
df[ordinal_features] = ordinal_encoder.fit_transform(df[ordinal_features])

# Label Encoding for categorical features and target
label_encoders = {}
for col in ['Issue_Resolved', 'Satisfaction_Level']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Step 3: Split features and labels
X = df.drop('Satisfaction_Level', axis=1).values
y = df['Satisfaction_Level'].values

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 5: Train and evaluate models
classifiers = {
    "Naive Bayes": CategoricalNB(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "k-NN": KNeighborsClassifier(n_neighbors=3)
}

best_model = None
best_accuracy = 0

print("\n=== Model Comparison ===")
for name, clf in classifiers.items():
    print(f"\n--- {name} ---")
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.2f}")
    print(classification_report(y_test, y_pred, target_names=label_encoders['Satisfaction_Level'].classes_))

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = clf
        best_model_name = name

print(f"\n✅ Best model selected: {best_model_name} (Accuracy: {best_accuracy:.2f})")

# Step 6: Interactive user predictor
def predict_new_customer():
    print("\nEnter new customer survey details below:")
    try:
        # Inputs for ordinal features
        service_quality_input = input("Service Quality (Poor/Average/Good/Excellent): ").title()
        response_time_input = input("Response Time (Slow/Medium/Fast): ").title()
        product_quality_input = input("Product Quality (Poor/Average/Good/Excellent): ").title()

        # Input for categorical feature
        issue_resolved_input = input("Issue Resolved (Yes/No): ").title()

        # Encoding inputs
        service_quality = ordinal_mapping[0].index(service_quality_input)
        response_time = ordinal_mapping[1].index(response_time_input)
        product_quality = ordinal_mapping[2].index(product_quality_input)
        issue_resolved = label_encoders['Issue_Resolved'].transform([issue_resolved_input])[0]

        input_data = np.array([[service_quality, response_time, product_quality, issue_resolved]])
        prediction = best_model.predict(input_data)[0]
        result_label = label_encoders['Satisfaction_Level'].inverse_transform([prediction])[0]

        print(f"\n🔎 Predicted Customer Satisfaction Level: {result_label}")

    except Exception as e:
        print("\n❌ Invalid input. Please follow the expected categories carefully.")

# Step 7: Multiple prediction loop
while True:
    user_choice = input("\nDo you want to predict satisfaction for a new customer? (yes/no): ").lower()
    if user_choice in ['no', 'n', 'exit']:
        print("Exiting prediction tool.")
        break
    elif user_choice in ['yes', 'y']:
        predict_new_customer()
    else:
        print("Please type 'yes' or 'no'.")



=== Model Comparison ===

--- Naive Bayes ---
Accuracy: 1.00
              precision    recall  f1-score   support

        High       1.00      1.00      1.00         1
         Low       1.00      1.00      1.00         3
      Medium       1.00      1.00      1.00         1

    accuracy                           1.00         5
   macro avg       1.00      1.00      1.00         5
weighted avg       1.00      1.00      1.00         5


--- Decision Tree ---
Accuracy: 0.80
              precision    recall  f1-score   support

        High       0.50      1.00      0.67         1
         Low       1.00      0.67      0.80         3
      Medium       1.00      1.00      1.00         1

    accuracy                           0.80         5
   macro avg       0.83      0.89      0.82         5
weighted avg       0.90      0.80      0.81         5


--- Random Forest ---


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy: 1.00
              precision    recall  f1-score   support

        High       1.00      1.00      1.00         1
         Low       1.00      1.00      1.00         3
      Medium       1.00      1.00      1.00         1

    accuracy                           1.00         5
   macro avg       1.00      1.00      1.00         5
weighted avg       1.00      1.00      1.00         5


--- k-NN ---
Accuracy: 0.40
              precision    recall  f1-score   support

        High       1.00      1.00      1.00         1
         Low       0.00      0.00      0.00         3
      Medium       0.25      1.00      0.40         1

    accuracy                           0.40         5
   macro avg       0.42      0.67      0.47         5
weighted avg       0.25      0.40      0.28         5


✅ Best model selected: Naive Bayes (Accuracy: 1.00)

Do you want to predict satisfaction for a new customer? (yes/no): yes

Enter new customer survey details below:
Service Quality (Poor/Averag