In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
import seaborn as sn
from tensorflow import keras
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import warnings 
warnings.filterwarnings(action='ignore')

# Step-1: Data Gathering

In [None]:
df=pd.read_csv('Telco-Customer-Churn.csv')
df.head(3)

In [None]:
df.shape

In [None]:
df.sample(3)

# Step-2: Data Preprocessing

In [None]:
df.drop('customerID',axis='columns',inplace=True)

In [None]:
df.sample(3)

In [None]:
df.columns

In [None]:
df.dtypes # show all column's name and data type

In [None]:
df.info()

In [None]:
df.TotalCharges.values # or
df['TotalCharges'].values

In [None]:
pd.to_numeric(df['TotalCharges'], errors='coerce') # Convert argument to a numeric type.

In [None]:
df['TotalCharges'].dtype

In [None]:
df.shape

In [None]:
df['TotalCharges'][753]

In [None]:
df[pd.to_numeric(df['TotalCharges'], errors='coerce').isnull()] # identify which rows of TotalCharges these values are blank

In [None]:
df.iloc[488] # iloc() function enables us to select a particular cell of the dataset, that is, it helps us select a value that belongs to a particular row or column from a set of values of a data frame or dataset.

In [None]:
df.iloc[488]['TotalCharges']

In [None]:
df1=df[df['TotalCharges']!=' '] # Drop rows these are blank
df1.shape

In [None]:
df1.TotalCharges=pd.to_numeric(df1.TotalCharges) # convert data type object to float

In [None]:
df1['TotalCharges'].dtype

In [None]:
df1.dtypes

In [None]:
df1[df1.Churn=='No'] # return those customer did not leave

In [None]:
df1[df1.Churn=='Yes'] # return those customer leave

# Step-3: data Visualization

| Histrograam on Tenure 

In [None]:
tenure_churn_no=df1[df1.Churn=='No'].tenure # return tenure of those customer did not leave

In [None]:
tenure_churn_yes=df1[df1.Churn=='Yes'].tenure


In [None]:
plt.figure(figsize=(10,6))
sn.set(style='white')
plt.grid()
plt.hist([tenure_churn_yes,tenure_churn_no], color=['green','red'],label=['Chrun: Yes','Chrun: No'])
plt.legend()
plt.title('Customer Churn prediction visualizaton')
plt.xlabel('Tenure')
plt.ylabel('Number of customer')
plt.show()

| Histrogram on MonthlyCharges 

In [None]:
mc_churn_no=df1[df1.Churn=='No'].MonthlyCharges 
mc_churn_yes=df1[df1.Churn=='Yes'].MonthlyCharges

In [None]:
plt.figure(figsize=(10,6))
sn.set(style='white')
plt.grid()
plt.hist([mc_churn_yes,mc_churn_no], color=['Yellow','Blue'],label=['Chrun: Yes','Chrun: No'])
plt.title('Customer Churn prediction visualizaton')
plt.xlabel('MonthlyCharges')
plt.ylabel('Number of customer')
plt.legend()
plt.show()

# Step-4: Data Preprocessing

In [None]:
for column in df: # Return all column name
    print(column)

In [None]:
for column in df: # Return list of unique values of all columns 
    print(f'{column} : {df[column].unique()}')

In [None]:
def unique_column_values(df):
    for column in df: # Return list of unique values of those columns whose dtypes is object
        if df[column].dtypes=='object':
            print(f'{column} : {df[column].unique()}')

In [None]:
unique_column_values(df)

In [None]:
df1.replace('No internet service', 'No', inplace=True)
df1.replace('No phone service', 'No', inplace=True)


In [None]:
def unique_column_values(df1):
    for column in df1: # Return list of unique values of those columns whose dtypes is object
        if df1[column].dtypes=='object':
            print(f'{column} : {df1[column].unique()}')

In [None]:
unique_column_values(df1)

In [None]:
yes_no_columns = ['Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup',
                  'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'PaperlessBilling', 'Churn']


In [None]:
for col in yes_no_columns: # replacing yes to one and No to zero
    df1[col].replace({'Yes': 1,'No': 0}, inplace=True)

In [None]:
df1['gender'].replace({'Female':1,'Male': 0},inplace=True) # replacing felame to one and male to zero

| One Hot Encoding

In [None]:
df2=pd.get_dummies(data=df1,columns=['InternetService','Contract','PaymentMethod'])
df2

In [None]:
df2.dtypes

In [None]:
df2.columns

# Step-5: Scaling

In [None]:
columns_to_scales=['tenure','MonthlyCharges','TotalCharges']

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()

In [None]:
df2[columns_to_scales]=scaler.fit_transform(df2[columns_to_scales])
df2

# Step-6: Train and test split

In [None]:
x=df2.drop(['Churn'], axis='columns')
x.head()

In [None]:
y=df2['Churn']
y


In [None]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
xtrain.shape

In [None]:
xtest.shape

# Step-7: Building keras model 

In [None]:
model=keras.Sequential([
    keras.layers.Dense(20,input_shape=(26,),activation='relu'),
    keras.layers.Dense(1,activation='sigmoid')
])

In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy']

)
model.fit(xtrain,ytrain,epochs=10) 

In [None]:
model.evaluate(xtest,ytest)

In [None]:
yp=model.predict(xtest) # predict ytest value using to xtest
yp[:10]

In [None]:
ytest[:10] # compairing with y_p

In [None]:
y_pred=[]  # yp/ predicted value to zero & one which are similar to ytest
for element in yp:
    if element>0.5:
        y_pred.append(1)
    else:
        y_pred.append(0)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
print(classification_report(ytest,y_pred)) # 

    Note: 
classification_report print statistical report on precision and recall |precision and recall are ploting performence of overoll model

In [None]:
y_pred[:10]

    | Confusion metrics

In [None]:
cm=tf.math.confusion_matrix(labels=ytest, predictions=y_pred)
cm

In [None]:

plt.figure(figsize=(10,7))
sn.set(style='white')
sn.heatmap(cm,annot=True,fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Truth')

In [None]:
cm

In [None]:
round((881+215)/(881+215+159+152),2)*100 # Accuracy calculating 

In [None]:
# precision for 0 class: i.e. precision for customers who did not churn 
round((881)/(881+159),2)*100 # precision calculating 

In [None]:
# precision for 1 class: i.e. precision for customers who actually churned
round((215)/(215+152),2)*100 # precision calculating 

In [None]:
cm

In [None]:
# recall for 0 class: which is truth value of zero out of total predicted zero
round((881)/(881+152),2)*100 # recall calculating 

In [None]:
# recall for 1 class: which is truth value of one out of total predicted value of one
round((215)/(215+159),2)*100 # recall calculating 

In [None]:
df2.sample(3)

In [None]:
df2.dtypes