In [None]:
# Importing Necessary Libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.cluster import KMeans
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from imblearn.over_sampling import SMOTE

In [None]:
# Reading the Dataset
raw_data = pd.read_csv('dataset.csv')
raw_data.dtypes

In [None]:
# Display the Dataset
print(raw_data)

In [None]:
# Feature Selection
non_numeric_cols = raw_data.select_dtypes(include=['object']).columns.tolist()
raw_data_numeric = raw_data.drop(non_numeric_cols, axis=1)

In [None]:
# Impute Missing Values
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(raw_data_numeric)
raw_data_numeric_imputed = pd.DataFrame(imp.fit_transform(raw_data_numeric), columns=raw_data_numeric.columns)

In [None]:
# Concatenation
raw_data_imputed = pd.concat([raw_data[non_numeric_cols], raw_data_numeric_imputed], axis=1)
raw_data_imputed.columns
raw_data_imputed['Churn'].dtype

In [None]:
# Label Encoding
le = LabelEncoder()
raw_data_imputed['Churn'] = le.fit_transform(raw_data_imputed['Churn'].astype(str))

In [None]:
# Create a DataFrame
df = pd.DataFrame(raw_data_imputed)

In [None]:
# Convert String columns to Binary Float columns
for col in df.select_dtypes(include="object"):
    df[col] = df[col].astype("category").cat.codes.astype("float")


In [None]:
# Display Binary Dataset
print(df)

In [None]:
# Standardization
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

In [None]:
# Clustering Algorithm
kmeans = KMeans(n_clusters=3, random_state=0)
clusters = kmeans.fit_predict(X_scaled)

In [None]:
# Add Cluster labels to the Dataset
processed_data = df.copy()
processed_data['cluster'] = clusters
non_numeric_cols = raw_data.select_dtypes(include='object').columns

In [None]:
# Splitting Features and Target Variable
X = processed_data.drop(['Churn', 'cluster'], axis=1)
y = processed_data['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
# Scaling Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [None]:
# Classification Algorithms
lr = LogisticRegression(max_iter=1000)
svm = SVC(kernel='linear', probability=True, random_state=0)
rf = RandomForestClassifier(n_estimators=100, random_state=0)
bagging = BaggingClassifier(RandomForestClassifier(n_estimators=100, random_state=0),n_estimators=10, random_state=0)
stacking = RandomForestClassifier(n_estimators=100, random_state=0).set_params(n_jobs=-1)

In [None]:
# SMOTE oversampling
sm = SMOTE(random_state=0)
X_train_smote, y_train_smote = sm.fit_resample(X_train_scaled, y_train)

In [None]:
# Train and Evaluate models
models = [('Logistic Regression', lr),
          ('Support Vector Machine', svm),
          ('Random Forest', rf),
          ('Bagging Classifier', bagging),
          ('Stacking Classifier', stacking)]


In [None]:
# Performance Metrics
results = []
for name, model in models:
    model.fit(X_train_smote, y_train_smote)
    y_pred = model.predict(X_test_scaled)
    result = {
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "F1-Score": f1_score(y_test, y_pred)
    }
    results.append(result)
    print(result)

In [None]:
# Finding Best Model
best_model = max(results, key=lambda x: x["F1-Score"])
best_model_name = best_model["Model"]


In [None]:
# Results of Churn Predictions
total = len(processed_data)
churn = len(processed_data[processed_data['Churn'] == 1])
non_churn = len(processed_data[processed_data['Churn'] == 0])

print(f'Total customers: {total}')
print(f'Churn Customers: {churn}')
print(f'Non-Churn Customers: {non_churn}')