In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [None]:
# Load dataset
df = pd.read_csv("Churn_Prediction_Preprocessed.csv")
df = pd.get_dummies(df, drop_first=True)

In [None]:
df.head(3)

In [None]:
from sklearn.feature_selection import SequentialFeatureSelector

def backward_select_features(X, y, estimator, k_features, cv=5):
    sbs = SequentialFeatureSelector(estimator, n_features_to_select=k_features, direction='backward',cv=cv, n_jobs=-1)
    sbs.fit(X, y)
    selected_features = X.columns[sbs.get_support()]
    return X[selected_features]

In [None]:
X = df.drop("churn_flag", axis=1)
y = df["churn_flag"]

In [None]:
# Use Logistic Regression as base estimator for feature selection
X_selected = backward_select_features(X, y, LogisticRegression(max_iter=1000), k_features=5, cv=3)

In [None]:
# Split and scale
def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)    
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = split_scalar(X_selected, y)

In [None]:
def evaluate_model(classifier, X_train, y_train, X_test, y_test):
    classifier.fit(X_train, y_train)
    from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
    y_pred = classifier.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    return classifier, acc, report, cm

In [None]:
# Store accuracy for each model
accuracies = {}

models = {
    "Logistic": LogisticRegression(max_iter=1000),
    "SVM_Linear": SVC(kernel='linear'),
    "SVM_RBF": SVC(kernel='rbf'),
    "KNN": KNeighborsClassifier(),
    "NaiveBayes": GaussianNB(),
    "DecisionTree": DecisionTreeClassifier(),
    "RandomForest": RandomForestClassifier()
}

for name, model in models.items():
    clf, acc, report, cm = evaluate_model(model, X_train, y_train, X_test, y_test)
    accuracies[name] = acc
    print(f"{name} Accuracy: {acc:.4f}")
    # print(report)  # Optional

# Convert to DataFrame
result_df = pd.DataFrame([accuracies], index=["BackwardSelection"])
print("\nModel Accuracy Comparison:\n")
print(result_df)


In [None]:
result_df.T.plot(kind='bar', legend=False)
plt.title("Model Accuracy after Backward Selection")
plt.ylabel("Accuracy")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
df.columns

In [None]:
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
estimator = LogisticRegression(max_iter=5000,solver='saga')
sbs = SequentialFeatureSelector(estimator, n_features_to_select=5, direction='backward')
sbs.fit(X_scaled, y)

selected_feature_names = X.columns[sbs.get_support()]
print("Selected features:", list(selected_feature_names))