In [2]:

# Customer Satisfaction Prediction 

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE


# For Loading Dataset

DATA_PATH = "../data/customer_support_tickets.csv"

if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"Dataset not found at {DATA_PATH}. Please check path.")

df = pd.read_csv(DATA_PATH)

target_col = "Customer Satisfaction Rating"

if target_col not in df.columns:
    raise KeyError(f"Target column '{target_col}' not found in dataset. Available: {df.columns.tolist()}")


# For Preparing Data

y = df[target_col]
X = df.drop(columns=[target_col])


le = LabelEncoder()
y = le.fit_transform(y)


cat_cols = X.select_dtypes(include=["object"]).columns.tolist()
num_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()


preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
    ("num", StandardScaler(), num_cols)
])


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(" Class balance before SMOTE:", np.bincount(y_train))


# For Balanceing Data with SMOTE

smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(preprocessor.fit_transform(X_train), y_train)

print(" Class balance after SMOTE:", np.bincount(y_train_res))


# For Training Model

xgb = XGBClassifier(
    n_estimators=100,
    max_depth=4,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    eval_metric="mlogloss"
)
xgb.fit(X_train_res, y_train_res)


X_test_trans = preprocessor.transform(X_test)
y_pred = xgb.predict(X_test_trans)


# For  Model Evaluation

print("\n📊 Model Performance")
print("Accuracy:", round(accuracy_score(y_test, y_pred), 3))
print("\nDetailed Report:\n", classification_report(y_test, y_pred, target_names=le.classes_.astype(str)))


# For Save Charts for in Output Folder

os.makedirs("outputs", exist_ok=True)

# Confusion Matrix Chart
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=le.classes_, yticklabels=le.classes_)
plt.title("Confusion Matrix")
plt.savefig("outputs/confusion_matrix.png", dpi=120, bbox_inches="tight")
plt.close()

# Classification Report Heatmap Chart
report = classification_report(y_test, y_pred, target_names=le.classes_.astype(str), output_dict=True)
df_report = pd.DataFrame(report).transpose()

plt.figure(figsize=(8, 4))
sns.heatmap(df_report.iloc[:-1, :-1], annot=True, cmap="YlGnBu", fmt=".2f")
plt.title("Classification Report (Heatmap)")
plt.savefig("outputs/classification_report.png", dpi=120, bbox_inches="tight")
plt.close()

# Satisfaction Distribution Chart 
plt.figure(figsize=(6, 4))
sns.countplot(x=target_col, data=df, palette="viridis")
plt.title("Customer Satisfaction Distribution")
plt.ylabel("Number of Customers")
plt.savefig("outputs/satisfaction_distribution.png", dpi=120, bbox_inches="tight")
plt.close()

# Satisfaction Pie Chart
df[target_col].value_counts().plot(
    kind="pie", autopct="%1.1f%%", figsize=(6, 6), 
    colors=sns.color_palette("Set2"), startangle=90
)
plt.title("Customer Satisfaction Split")
plt.ylabel("")
plt.savefig("outputs/satisfaction_pie.png", dpi=120, bbox_inches="tight")
plt.close()

# Avg Satisfaction  Chart 
for col in cat_cols[:2]:
    plt.figure(figsize=(8, 4))
    sns.barplot(x=col, y=target_col, data=df, estimator=np.mean, ci=None, palette="coolwarm")
    plt.xticks(rotation=30)
    plt.title(f"Avg Satisfaction by {col}")
    plt.savefig(f"outputs/satisfaction_by_{col}.png", dpi=120, bbox_inches="tight")
    plt.close()

# Boxplots Chart 
for col in num_cols[:2]:
    plt.figure(figsize=(8, 4))
    sns.boxplot(x=target_col, y=col, data=df, palette="Set3")
    plt.title(f"{col} vs Satisfaction")
    plt.savefig(f"outputs/{col}_vs_satisfaction.png", dpi=120, bbox_inches="tight")
    plt.close()

print("\n All charts saved inside 'outputs/' folder.")


 Class balance before SMOTE: [ 442  439  464  435  435 4560]
 Class balance after SMOTE: [4560 4560 4560 4560 4560 4560]

📊 Model Performance
Accuracy: 0.729

Detailed Report:
               precision    recall  f1-score   support

         1.0       0.23      0.32      0.26       111
         2.0       0.14      0.15      0.14       110
         3.0       0.20      0.24      0.22       116
         4.0       0.10      0.09      0.10       108
         5.0       0.12      0.06      0.08       109
         nan       1.00      1.00      1.00      1140

    accuracy                           0.73      1694
   macro avg       0.30      0.31      0.30      1694
weighted avg       0.72      0.73      0.73      1694




Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x=target_col, data=df, palette="viridis")

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x=col, y=target_col, data=df, estimator=np.mean, ci=None, palette="coolwarm")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=col, y=target_col, data=df, estimator=np.mean, ci=None, palette="coolwarm")

The `ci` parameter is deprecated. Use `errorbar=None` for the same effect.

  sns.barplot(x=col, y=target_col, data=df, estimator=np.mean, ci=None, palette="coolwarm")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barpl


 All charts saved inside 'outputs/' folder.
