In [None]:
import pandas as pd
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from sklearn.model_selection import train_test_split, cross_validate
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Activation
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import statsmodels.api as sm
import matplotlib.pyplot as plt
from collections import Counter
from scipy.stats.mstats import winsorize
import seaborn as sns
import re
import gc
import warnings
warnings.filterwarnings('ignore')
warnings.warn("this will not show")

%matplotlib inline

pd.options.display.max_rows = 1000
pd.options.display.max_columns = 1000
pd.options.display.max_colwidth = 1000

In [None]:
telco = pd.read_csv('/kaggle/input/telecon-datasetcleaned-data/rfecv.csv')

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = telco.drop(['churn'], axis = 1)
Y = telco.churn


# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
import matplotlib.pyplot as plt
#5 Light Gradient Boosting  (Individual Prediction)
LGBM = LGBMClassifier()
LGBM.fit(X_train_scaled, Y_train)

# Get feature importances
feature_imp = pd.Series(LGBM.feature_importances_, index=X_train.columns).sort_values(ascending=False)

# Plot feature importances
plt.figure(figsize=(12, 10))
sns.barplot(x=feature_imp[:50], y=feature_imp[:50].index)
plt.title("Feature Importance")
plt.xlabel("Importance")
plt.ylabel("Features")
plt.show()

LGBM_predictions = LGBM.predict(X_test_scaled)
# LGBM (Cross-validation)
LGBM_cv = LGBMClassifier(learning_rate= 0.05, 
                    max_depth= 6, 
                    n_estimators= 1000, 
                    subsample= 0.1)
scores = cross_validate(LGBM_cv, X_train_scaled, Y_train, cv=5, scoring=['accuracy', 'precision', 'recall', 'f1', 'roc_auc'])
accuracy_mean = np.mean(scores['test_accuracy'])
precision_mean = np.mean(scores['test_precision'])
recall_mean = np.mean(scores['test_recall'])
f1_mean = np.mean(scores['test_f1'])
auc_mean = np.mean(scores['test_roc_auc'])


print("TCN Model Evaluation:")
print("Accuracy:", accuracy_mean)
print(" Precision:", precision_mean)
print(" Recall:", recall_mean)
print(" F1:", f1_mean)
print(" AUC:", auc_mean)

In [None]:
from tcn import TCN
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam


# Reshape input data to match TCN input shape
X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)

# Define the input layer
input_layer = Input(shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2]))


# Define the TCN architecture
tcn_output = TCN(nb_filters= 256, kernel_size=3, nb_stacks=2, dilations=[1, 2, 4, 8, 16, 32])(input_layer)

# Output layer
output_layer = Dense(1, activation='sigmoid')(tcn_output)

# Build the model
tcn_model = Model(inputs=input_layer, outputs=output_layer)
tcn_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
tcn_model.fit(X_train_scaled, Y_train, epochs=10, batch_size=32)
tcn_predictions = tcn_model.predict(X_test_scaled)
tcn_predictions_classes = (tcn_predictions > 0.5).astype(int)

# Evaluation metrics
accuracy = accuracy_score(Y_test, tcn_predictions_classes)
f1 = f1_score(Y_test, tcn_predictions_classes)
recall = recall_score(Y_test, tcn_predictions_classes)
precision = precision_score(Y_test, tcn_predictions_classes)
roc_auc = roc_auc_score(Y_test, tcn_predictions)

print("TCN Model Evaluation:")
print("Accuracy:", accuracy)
print("F1 Score:", f1)
print("Recall:", recall)
print("Precision:", precision)
print("ROC AUC:", roc_auc)

In [None]:
# Reshape input data to match TCN input shape
X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)

# Define the input layer for TCN
input_layer = Input(shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2]))

# Define the TCN architecture
tcn_output = TCN(nb_filters=128, kernel_size=3, nb_stacks=2, padding='causal', dilations=[1, 2, 4, 8, 16, 32])(input_layer)

# Define the input layer for LGBM
input_layer_lgbm = Input(shape=(X_train_scaled.shape[1],))

lgbm = 
# Define the LGBM model
lgbm_output = lgbm.fit(X_train_scaled, Y_train).predict(X_test_scaled)

# Take a majority vote on predictions from TCN and LGBM
ensemble_predictions = mode([tcn_output, lgbm_output], axis=0)[0]

# Calculate ensemble metrics
ensemble_accuracy = accuracy_score(Y_test, ensemble_predictions)
ensemble_f1 = f1_score(Y_test, ensemble_predictions)
ensemble_recall = recall_score(Y_test, ensemble_predictions)
ensemble_precision = precision_score(Y_test, ensemble_predictions)
ensemble_roc_auc = roc_auc_score(Y_test, ensemble_predictions)

print("Ensemble Model Evaluation:")
print("Accuracy:", ensemble_accuracy)
print("F1 Score:", ensemble_f1)
print("Recall:", ensemble_recall)
print("Precision:", ensemble_precision)
print("ROC AUC:", ensemble_roc_auc)


In [None]:
from sklearn.model_selection import cross_validate

#1 Logistic Regression (Individual Prediction)
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train_scaled, Y_train)
logreg_predictions = logreg.predict(X_test_scaled)

# Logistic Regression (Cross-validation)
logreg_cv = LogisticRegression(max_iter=1000)
scores = cross_validate(logreg_cv, X_train_scaled, Y_train, cv=5, scoring=['accuracy', 'precision', 'recall', 'f1', 'roc_auc'])
accuracy_mean = np.mean(scores['test_accuracy'])
precision_mean = np.mean(scores['test_precision'])
recall_mean = np.mean(scores['test_recall'])
f1_mean = np.mean(scores['test_f1'])
auc_mean = np.mean(scores['test_roc_auc'])

print("Logistic Regression (CV) Accuracy:", accuracy_mean)
print("Logistic Regression (CV) Precision:", precision_mean)
print("Logistic Regression (CV) Recall:", recall_mean)
print("Logistic Regression (CV) F1:", f1_mean)
print("Logistic Regression (CV) AUC:", auc_mean)


##### 2 Naive Bayes (Individual Prediction)
naive_bayes = GaussianNB()
naive_bayes.fit(X_train_scaled, Y_train)
naive_bayes_predictions = naive_bayes.predict(X_test_scaled)

# Naive Bayes (Cross-validation)
naive_bayes_cv = GaussianNB()
scores = cross_validate(naive_bayes_cv, X_train_scaled, Y_train, cv=5, scoring=['accuracy', 'precision', 'recall', 'f1', 'roc_auc'])
accuracy_mean = np.mean(scores['test_accuracy'])
precision_mean = np.mean(scores['test_precision'])
recall_mean = np.mean(scores['test_recall'])
f1_mean = np.mean(scores['test_f1'])
auc_mean = np.mean(scores['test_roc_auc'])

print("Naive Bayes (CV) Accuracy:", accuracy_mean)
print("Naive Bayes (CV) Precision:", precision_mean)
print("Naive Bayes (CV) Recall:", recall_mean)
print("Naive Bayes (CV) F1:", f1_mean)
print("Naive Bayes (CV) AUC:", auc_mean)


In [None]:
# SVM (Individual Prediction)
svm = SVC()
svm.fit(X_train_scaled, Y_train)
svm_predictions = svm.predict(X_test_scaled)

# SVM (Cross-validation)
svm_cv = SVC()
scores = cross_validate(svm_cv, X_train_scaled, Y_train, cv=5, scoring=['accuracy', 'precision', 'recall', 'f1', 'roc_auc'])
accuracy_mean = np.mean(scores['test_accuracy'])
precision_mean = np.mean(scores['test_precision'])
recall_mean = np.mean(scores['test_recall'])
f1_mean = np.mean(scores['test_f1'])
auc_mean = np.mean(scores['test_roc_auc'])

print("SVM (CV) Accuracy:", accuracy_mean)
print("SVM (CV) Precision:", precision_mean)
print("SVM (CV) Recall:", recall_mean)
print("SVM (CV) F1:", f1_mean)
print("SVM (CV) AUC:", auc_mean)


In [None]:
# Random Forest (Individual Prediction)
RandomForest = RandomForestClassifier()
RandomForest.fit(X_train_scaled, Y_train)
RandomForest_predictions = RandomForest.predict(X_test_scaled)
# RandomForest (Cross-validation)
RandomForest_cv = RandomForestClassifier()
scores = cross_validate(RandomForest_cv, X_train_scaled, Y_train, cv=5, scoring=['accuracy', 'precision', 'recall', 'f1', 'roc_auc'])
accuracy_mean = np.mean(scores['test_accuracy'])
precision_mean = np.mean(scores['test_precision'])
recall_mean = np.mean(scores['test_recall'])
f1_mean = np.mean(scores['test_f1'])
auc_mean = np.mean(scores['test_roc_auc'])

print("RandomForest (CV) Accuracy:", accuracy_mean)
print("RandomForest (CV) Precision:", precision_mean)
print("RandomForest (CV) Recall:", recall_mean)
print("RandomForest (CV) F1:", f1_mean)
print("RandomForest (CV) AUC:", auc_mean)

In [None]:
#6 Xtreme Gradient Boosting (Individual Prediction)
XGB = XGBClassifier()
XGB.fit(X_train_scaled, Y_train)
XGB_predictions = XGB.predict(X_test_scaled)
# XGB (Cross-validation)
XGB_cv = XGBClassifier()
scores = cross_validate(XGB_cv, X_train_scaled, Y_train, cv=5, scoring=['accuracy', 'precision', 'recall', 'f1', 'roc_auc'])
accuracy_mean = np.mean(scores['test_accuracy'])
precision_mean = np.mean(scores['test_precision'])
recall_mean = np.mean(scores['test_recall'])
f1_mean = np.mean(scores['test_f1'])
auc_mean = np.mean(scores['test_roc_auc'])

print("XGB (CV) Accuracy:", accuracy_mean)
print("XGB (CV) Precision:", precision_mean)
print("XGB (CV) Recall:", recall_mean)
print("XGB (CV) F1:", f1_mean)
print("XGB (CV) AUC:", auc_mean)

In [None]:
from sklearn.ensemble import AdaBoostClassifier

# AdaBoost (Individual Prediction)
adaboost = AdaBoostClassifier(random_state=42)
adaboost.fit(X_train_scaled, Y_train)
adaboost_predictions = adaboost.predict(X_test_scaled)

# AdaBoost (Cross-validation)
adaboost_cv = AdaBoostClassifier(random_state=42)
scores = cross_validate(adaboost_cv, X_train_scaled, Y_train, cv=5, scoring=['accuracy', 'precision', 'recall', 'f1', 'roc_auc'])
accuracy_mean = np.mean(scores['test_accuracy'])
precision_mean = np.mean(scores['test_precision'])
recall_mean = np.mean(scores['test_recall'])
f1_mean = np.mean(scores['test_f1'])
auc_mean = np.mean(scores['test_roc_auc'])

print("AdaBoost (CV) Accuracy:", accuracy_mean)
print("AdaBoost (CV) Precision:", precision_mean)
print("AdaBoost (CV) Recall:", recall_mean)
print("AdaBoost (CV) F1:", f1_mean)
print("AdaBoost (CV) AUC:", auc_mean)


In [None]:
from sklearn.neighbors import KNeighborsClassifier

# KNN (Individual Prediction)
knn = KNeighborsClassifier()
knn.fit(X_train_scaled, Y_train)
knn_predictions = knn.predict(X_test_scaled)

# KNN (Cross-validation)
knn_cv = KNeighborsClassifier()
scores = cross_validate(knn_cv, X_train_scaled, Y_train, cv=5, scoring=['accuracy', 'precision', 'recall', 'f1', 'roc_auc'])
accuracy_mean = np.mean(scores['test_accuracy'])
precision_mean = np.mean(scores['test_precision'])
recall_mean = np.mean(scores['test_recall'])
f1_mean = np.mean(scores['test_f1'])
auc_mean = np.mean(scores['test_roc_auc'])

print("KNN (CV) Accuracy:", accuracy_mean)
print("KNN (CV) Precision:", precision_mean)
print("KNN (CV) Recall:", recall_mean)
print("KNN (CV) F1:", f1_mean)
print("KNN (CV) AUC:", auc_mean)
