In [None]:
import pandas as pd
import numpy as np
import missingno as msno
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')


In [None]:
# %%
data=pd.read_csv("Data\WA_Fn-UseC_-Telco-Customer-Churn.csv")
data.head()

# %%
data.shape

# %%
data.isnull().sum()

# %%
data.shape
data.info()

# %%
msno.matrix(data)


In [None]:
df=data.drop(['customerID'],axis=1)


In [None]:
df['TotalCharges'] = pd.to_numeric(df.TotalCharges, errors='coerce') # missing
df.isnull().sum()


In [None]:
df[np.isnan(df['TotalCharges'])]


In [None]:

df[df['tenure'] == 0].index
df.drop(labels=df[df['tenure'] == 0].index, axis=0, inplace=True)
df[df['tenure'] == 0].index



In [None]:
df.fillna(df["TotalCharges"].mean())


In [None]:
numerical_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']
df[numerical_cols].describe()


In [None]:
plt.figure(figsize=(6, 6))
labels =["Churn: Yes","Churn:No"]
values = [1869,5163]
labels_gender = ["F","M","F","M"]
sizes_gender = [939,930 , 2544,2619]
colors = ['#ff6666', '#66b3ff']
colors_gender = ['#c2c2f0','#ffb3e6', '#c2c2f0','#ffb3e6']
explode = (0.3,0.3)
explode_gender = (0.1,0.1,0.1,0.1)
textprops = {"fontsize":15}

In [None]:
#Plot
plt.pie(values, labels=labels,autopct='%1.1f%%',pctdistance=1.08, labeldistance=0.8,colors=colors, startangle=90,frame=True, explode=explode,radius=10, textprops =textprops, counterclock = True, )
plt.pie(sizes_gender,labels=labels_gender,colors=colors_gender,startangle=90, explode=explode_gender,radius=7, textprops =textprops, counterclock = True, )
#Draw circle
centre_circle = plt.Circle((0,0),5,color='black', fc='white',linewidth=0)
fig = plt.gcf()
fig.gca().add_artist(centre_circle)

plt.title('Churn Distribution w.r.t Gender: Male(M), Female(F)', fontsize=15, y=1.1)

# show plot

plt.axis('equal')
plt.tight_layout()
plt.show()


In [None]:
sns.set_context("paper",font_scale=1.1)
ax = sns.kdeplot(df.MonthlyCharges[(df["Churn"] == 'No') ],
                color="Red", shade = True);
ax = sns.kdeplot(df.MonthlyCharges[(df["Churn"] == 'Yes') ],
                ax =ax, color="Blue", shade= True);
ax.legend(["Not Churn","Churn"],loc='upper right');
ax.set_ylabel('Density');
ax.set_xlabel('Monthly Charges');
ax.set_title('Distribution of monthly charges by churn');



In [None]:
churned = df[df["Churn"] == 'Yes']["tenure"]
not_churned = df[df["Churn"] == 'No']["tenure"]
# Plot histograms
plt.figure(figsize=(10, 6))
plt.hist(not_churned, bins=30, alpha=0.5, label='Not Churned', color='blue')
plt.hist(churned, bins=30, alpha=0.5, label='Churned', color='red')
plt.xlabel('Tenure (months)')
plt.ylabel('Frequency')
plt.title('Distribution of Tenure by Churn Status')
plt.legend(loc='upper right')
plt.show()

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Install keras-tuner
%pip install keras-tuner

from keras_tuner.tuners import RandomSearch

# Define the model with hyperparameters
def build_model(hp):
    model = keras.Sequential()
    hp_units1 = hp.Int('units1', min_value=10, max_value=50, step=10)
    hp_units2 = hp.Int('units2', min_value=10, max_value=50, step=10)
    model.add(layers.Dense(units=hp_units1, input_shape=(19,), activation='relu'))
    model.add(layers.Dense(units=hp_units2, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # Tune the learning rate for the optimizer
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Initialize the tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='my_dir',
    project_name='tuning_example'
)

# Run the hyperparameter search
tuner.search(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

# Retrieve the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Print the evaluation results for the best model
def print_evaluated_results(model, X_train, y_train, X_test, y_test):
    train_results = model.evaluate(X_train, y_train)
    val_results = model.evaluate(X_test, y_test)
    print(f"Model performance for Training set\n- Accuracy: {train_results[1]}\n- Loss: {train_results[0]}")
    print("----------------------------------")
    print(f"Model performance for Validation set\n- Accuracy: {val_results[1]}\n- Loss: {val_results[0]}")

print_evaluated_results(best_model, X_train, y_train, X_test, y_test)

# Predict on the test set
y_pred = best_model.predict(X_test)
y_pred_classes = np.round(y_pred).astype(int)

# Generate and print the confusion matrix
cm = confusion_matrix(y_test, y_pred_classes)
print("Confusion Matrix:\n", cm)

# Print classification report
print("Classification Report:\n", classification_report(y_test, y_pred_classes))

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Predict on the test set
y_pred = best_model.predict(X_test)
y_pred_classes = np.round(y_pred).astype(int)

# Generate and print the confusion matrix
cm = confusion_matrix(y_test, y_pred_classes)
print("Confusion Matrix:\n", cm)

# Print classification report
print("Classification Report:\n", classification_report(y_test, y_pred_classes))

In [None]:
from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = rus.fit_resample(X_train, y_train)