In [19]:
import lime
import lime.lime_tabular
import matplotlib.pyplot as plt
import pandas as pd
import xgboost as xgb
import numpy as np

# Load churn data
file_path = "ChurnData.csv"
data = pd.read_csv(file_path)

# Print column names to verify target column name
print("Columns in dataset:", data.columns)

# Identify the correct target column
correct_target = [col for col in data.columns if 'churn' in col.lower()]
if correct_target:
    target_col = correct_target[0]
else:
    raise KeyError("Target column (Churn) not found in dataset.")

X = data.drop(columns=[target_col])
y = data[target_col]

# Train XGBoost model
best_model = xgb.XGBClassifier()
best_model.fit(X, y)

# Create LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(
    X.values, feature_names=X.columns, class_names=['Not Churn', 'Churn'], mode='classification'
)

# Explain a single instance (first sample)
idx = 0  # Change index for different samples
exp = explainer.explain_instance(X.iloc[idx].values, best_model.predict_proba)

# Save LIME explanation to an HTML file
exp.save_to_file("lime_explanation.html")
print("LIME explanation saved to lime_explanation.html")

# Document key findings
print("LIME Explanation for Sample Index:", idx)
print(exp.as_list())

Columns in dataset: Index(['tenure', 'age', 'address', 'income', 'ed', 'employ', 'equip',
       'callcard', 'wireless', 'longmon', 'tollmon', 'equipmon', 'cardmon',
       'wiremon', 'longten', 'tollten', 'cardten', 'voice', 'pager',
       'internet', 'callwait', 'confer', 'ebill', 'loglong', 'logtoll',
       'lninc', 'custcat', 'churn'],
      dtype='object')
LIME explanation saved to lime_explanation.html
LIME Explanation for Sample Index: 0
[('tenure <= 16.75', 0.20635791045269358), ('wiremon > 23.46', 0.12332716332780945), ('3.00 < employ <= 7.50', 0.09122123761353092), ('12.50 < cardmon <= 20.75', -0.08375704583739708), ('income > 80.00', -0.05551137832074229), ('longten <= 79.34', 0.05451528858719407), ('ebill <= 0.00', -0.05097587083845761), ('ed > 4.00', 0.049701935861180324), ('equipmon <= 0.00', -0.04618058983852433), ('31.00 < age <= 40.00', 0.04606795696314223)]
