In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv")

In [None]:
data

In [None]:
data.drop("customerID" , axis = 1 , inplace = True)

In [None]:
types = data.dtypes
Null = data.isnull().sum()
ratio = Null / data.shape[0]
n_uniques = data.nunique()

In [None]:
info = pd.DataFrame({"types" : types , "Null" : Null , "Ratio" : ratio , "n_uniques" : n_uniques})

In [None]:
info

In [None]:
sns.histplot(data["Churn"] )

In [None]:
numerical_columns = data.select_dtypes("number").columns

In [None]:
con_columns = []
for col in numerical_columns:
  if data[col].nunique() > 10:
    con_columns.append(col)

In [None]:
con_columns

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
cat_columns = data.select_dtypes(include = ["object"]).columns

In [None]:
cat_columns

In [None]:
for col in cat_columns:
  encoder = LabelEncoder()
  data[col] = encoder.fit_transform(data[col])

In [None]:
data.head(5)

In [None]:
plt.figure(figsize=(12,12))
for i,col in enumerate(con_columns):
  plt.subplot(1,2,i+1)
  plt.boxplot(data[col])
  plt.title(f"{col} boxplot")
plt.show()

In [None]:
data.duplicated().sum()

In [None]:
data.drop_duplicates(inplace = True)

In [None]:
data.duplicated().sum()

In [None]:
X = data.drop("Churn", axis = 1 , inplace = False)
Y = data["Churn"]

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
orig_columns = X.columns
orig_index = X.index
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(X)
x_scaled_df = pd.DataFrame(x_scaled , columns = orig_columns , index = orig_index)

In [None]:
x_scaled_df

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X , X_test_inter , Y , Y_test_inter = train_test_split(x_scaled_df , Y , test_size = 0.1 , random_state = 44 , stratify=Y)


In [None]:
Y_test_inter.value_counts()

In [None]:
Y.value_counts()

In [None]:
pip install -U scikit-learn imbalanced-learn

In [None]:
from imblearn.over_sampling import RandomOverSampler

In [None]:
sampler = RandomOverSampler()

In [None]:
X_resampler , Y_resampler = sampler.fit_resample(X , Y)

In [None]:
Y_resampler.value_counts()

In [None]:
X_train,X_test , Y_train , Y_test   =  train_test_split(X_resampler , Y_resampler , test_size = 0.2 , random_state = 44 , stratify=Y_resampler)

In [None]:
import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,BatchNormalization,Input
from tensorflow.keras.models import Model

In [None]:
input_layer = Input(shape = (X.shape[1] , ))
h_layer1 = Dense(128 , activation = 'relu')(input_layer)
h_layer2 = Dense(64 , activation = 'relu')(h_layer1)
h_layer3 = Dense(32 , activation = 'relu')(h_layer2)
norm = BatchNormalization()(h_layer3)
drop = Dropout(0.2)(norm)
output_layer = Dense(1 , activation = 'sigmoid')(drop)

In [None]:
model = Model(inputs = input_layer , outputs = output_layer)

In [None]:
model.compile('adam' , loss = "binary_crossentropy" , metrics = ["accuracy"])

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
early_stopping = EarlyStopping(monitor = 'val_accuracy' , patience = 10 , restore_best_weights = True)

In [None]:
history = model.fit(X_train , Y_train , validation_split = 0.2 , batch_size = 32 , epochs = 100 , callbacks = [early_stopping])

In [None]:
loss  , accuracy = model.evaluate(X_test , Y_test)
print(f"loss for test data is {loss}")
print(f"r2_score for test data is {accuracy}")

In [None]:
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.title("accuracy for training and validation")
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.legend(["training" , "validation"] , loc = "upper left")
plt.grid()
plt.show()

In [None]:
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title("loss for training and validation")
plt.xlabel("epochs")
plt.ylabel("loss")
plt.legend(["training" , "validation"] , loc = "upper left")
plt.grid(True)
plt.show()

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_pred = y_pred.tolist()

In [None]:
y_pred = np.round(y_pred)

In [None]:
y_pred = y_pred.tolist()

In [None]:
y_pred

In [None]:
df = pd.DataFrame({"Y_test": Y_test[0:20], "y_pred" : y_pred[0:20]})

In [None]:
df

In [None]:
y_pred_inter = model.predict(X_test_inter)

In [None]:
y_pred_inter = np.round(y_pred_inter)

In [None]:
df = pd.DataFrame({"Y_test": Y_test_inter[0:20], "y_pred" : y_pred_inter[0:20].flatten()})

In [None]:
df

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(Y_test , y_pred))

In [None]:
print(classification_report(Y_test_inter , y_pred_inter))