<a href="https://colab.research.google.com/github/RickyDoan/DL-Tensor-Flow-Churn-Prediction/blob/main/ANN_Churn_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [None]:
tf.config.list_physical_devices('GPU')

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("blastchar/telco-customer-churn")

print("Path to dataset files:", path)

In [None]:
path

In [None]:
import os

filepath = os.path.join(path, "WA_Fn-UseC_-Telco-Customer-Churn.csv")
df = pd.read_csv(filepath)
print(df.shape)
df.head()

# Cleaning Data

In [None]:
# Drop unimportant columns

df.drop('customerID', axis=1, inplace=True)

In [None]:
# Check null values
df.isnull().sum()

In [None]:
# Check duplicate values
df.duplicated().sum()

In [None]:
df.drop_duplicates(inplace=True, keep='first')

In [None]:
df.duplicated().sum()

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df[df['TotalCharges']==" "]

In [None]:
df[df['TotalCharges']==" "].shape

In [None]:
df = df[df['TotalCharges']!=" "]

In [None]:
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'])


In [None]:
df['TotalCharges'].dtype

In [None]:
df.shape

In [None]:
sns.histplot(df, x = 'Churn', hue='Churn')

In [None]:
cm_yes_churn_tennure = df[df['Churn']=='Yes']['tenure']
cm_no_churn_tennure = df[df['Churn']=='No']['tenure']

# blood_sugar_men = [113, 85, 90, 150, 149, 88, 93, 115, 135, 80, 77, 82, 129]
# blood_sugar_women = [67, 98, 89, 120, 133, 150, 84, 69, 89, 79, 120, 112, 100]
plt.hist([cm_yes_churn_tennure, cm_no_churn_tennure], color=['red', 'green'], label=['Yes', 'No'])
plt.legend()
plt.xlabel('Tenure')
plt.ylabel('Count')
plt.title('Churn vs Tenure')
plt.show()


In [None]:
churn_yes_MonthlyCharges = df[df['Churn']=='Yes']['MonthlyCharges']
churn_no_MonthlyCharges = df[df['Churn']=='No']['MonthlyCharges']
plt.hist([churn_yes_MonthlyCharges, churn_no_MonthlyCharges], color=['red', 'green'], label=['Yes', 'No'])
plt.legend()
plt.xlabel('MonthlyCharges')
plt.ylabel('Count')
plt.title('Churn vs MonthlyCharges')
plt.show()

# Check Spelling

In [None]:
for col in df.select_dtypes(include=('object')):
    print(f'{col}: {df[col].unique()}')

In [None]:
df['MultipleLines'].replace('No phone service', 'No', inplace=True)
df['OnlineSecurity'].replace('No internet service', 'No', inplace=True)
df['OnlineBackup'].replace('No internet service', 'No', inplace=True)
df['DeviceProtection'].replace('No internet service', 'No', inplace=True)
df['TechSupport'].replace('No internet service', 'No', inplace=True)
df['StreamingTV'].replace('No internet service', 'No', inplace=True)
df['StreamingMovies'].replace('No internet service', 'No', inplace=True)


In [None]:
for col in df.select_dtypes(include=('object')):
    print(f'{col}: {df[col].unique()}')

In [None]:
len(df['gender'].unique())

In [None]:
holder_column = []
for value in df.select_dtypes(include=('object')):
    if len(df[value].unique()) == 2:
        holder_column.append(value)

holder_column


In [None]:
list_encoder = ['Partner',
 'Dependents',
 'PhoneService',
 'MultipleLines',
 'OnlineSecurity',
 'OnlineBackup',
 'DeviceProtection',
 'TechSupport',
 'StreamingTV',
 'StreamingMovies',
 'PaperlessBilling',
 'Churn']

In [None]:
for col in list_encoder:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

In [None]:
for col in df.select_dtypes(include=('object')):
    print(f'{col}: {df[col].unique()}')

In [None]:
list_categories = ['gender', 'InternetService', 'Contract', 'PaymentMethod']

df = pd.get_dummies( df, columns =list_categories, drop_first=True, dtype=int)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
for col in df.columns:
    print(f'{col}: {df[col].unique()}')

In [None]:
list_numerical = ['tenure', 'MonthlyCharges', 'TotalCharges']
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df[list_numerical] = scaler.fit_transform(df[list_numerical])

In [None]:
for col in df.columns:
    print(f'{col}: {df[col].unique()}')

In [None]:
# Check VIF or Correlation

In [None]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

def check_vif(data):
    df = pd.DataFrame()
    df["features"] = data.columns
    df["VIF"] = [variance_inflation_factor(data.values, i) for i in range(data.shape[1])]
    return df

df_vif = check_vif(df.drop('Churn', axis=1))
df_vif

In [None]:
df_vif[df_vif['VIF']>20].sort_values(by='VIF', ascending=False).features.tolist()

In [None]:
df.drop(df_vif[df_vif['VIF']>20].sort_values(by='VIF', ascending=False).features.tolist(), axis=1, inplace=True)


In [None]:
df_vif = check_vif(df.drop('Churn', axis=1))
df_vif

In [None]:
df_vif[df_vif['VIF']>10].sort_values(by='VIF', ascending=False).features.tolist()

In [None]:
df.drop(df_vif[df_vif['VIF']>10].sort_values(by='VIF', ascending=False).features.tolist(), axis=1, inplace=True)

In [None]:
df_vif = check_vif(df.drop('Churn', axis=1))
df_vif

In [None]:
print(df.shape)
df.head()

# Training model

In [None]:
X = df.drop('Churn', axis=1)
y = df['Churn']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model  = keras.Sequential([
    keras.layers.Dense(128, input_shape=(17,), activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
with tf.device('/GPU:0'):
    model.fit(X_train, y_train, epochs=100)

In [None]:
model.evaluate(X_test, y_test)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
y_pred = model.predict(X_test)
y_pred[:5]

In [None]:
y_predict = []
for i in range(len(y_pred)):
    if y_pred[i] >= 0.5:
       y_predict.append(1)
    else:
        y_predict.append(0)

In [None]:
y_predict[:10]

In [None]:
y_test[:10].values

In [None]:
report = classification_report(y_test, y_predict)
print(report)

In [None]:
cm  = confusion_matrix(y_test, y_predict)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')