In [1]:
import pandas as pd

df = pd.read_csv('customer_churn.csv')
display(df.head())

Unnamed: 0,tenure,PhoneService,InternetService,MonthlyCharges,TotalCharges,Churn
0,1,No,DSL,29.85,29.85,Stayed
1,34,Yes,DSL,56.95,1889.5,Stayed
2,2,Yes,DSL,53.85,108.15,Churned
3,45,No,DSL,42.3,1840.75,Stayed
4,2,Yes,Fiber optic,70.7,151.65,Churned


In [2]:
# Check for missing values
print("Missing values before handling:")
print(df.isnull().sum())

# Convert 'TotalCharges' to numeric, coercing errors to NaN
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# Handle missing values in 'TotalCharges' by filling with the mean
df['TotalCharges'].fillna(df['TotalCharges'].mean(), inplace=True)

print("\nMissing values after handling:")
print(df.isnull().sum())

Missing values before handling:
tenure             0
PhoneService       0
InternetService    0
MonthlyCharges     0
TotalCharges       0
Churn              0
dtype: int64

Missing values after handling:
tenure             0
PhoneService       0
InternetService    0
MonthlyCharges     0
TotalCharges       0
Churn              0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['TotalCharges'].fillna(df['TotalCharges'].mean(), inplace=True)


In [3]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Encode categorical features
for column in ['PhoneService', 'InternetService', 'Churn']:
    if column in df.columns:
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])

display(df.head())

Unnamed: 0,tenure,PhoneService,InternetService,MonthlyCharges,TotalCharges,Churn
0,1,0,0,29.85,29.85,1
1,34,1,0,56.95,1889.5,1
2,2,1,0,53.85,108.15,0
3,45,0,0,42.3,1840.75,1
4,2,1,1,70.7,151.65,0


In [4]:
from sklearn.model_selection import train_test_split

# Define features (X) and target (y)
X = df.drop('Churn', axis=1)
y = df['Churn']

# Scale numerical features
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns
scaler = StandardScaler()
X[numerical_features] = scaler.fit_transform(X[numerical_features])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (5634, 5)
X_test shape: (1409, 5)
y_train shape: (5634,)
y_test shape: (1409,)


In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Build the ANN model
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.5972 - loss: 0.6422 - val_accuracy: 0.7906 - val_loss: 0.4591
Epoch 2/50
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7844 - loss: 0.4697 - val_accuracy: 0.7870 - val_loss: 0.4391
Epoch 3/50
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7806 - loss: 0.4571 - val_accuracy: 0.7897 - val_loss: 0.4326
Epoch 4/50
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7887 - loss: 0.4476 - val_accuracy: 0.7906 - val_loss: 0.4298
Epoch 5/50
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8031 - loss: 0.4359 - val_accuracy: 0.7941 - val_loss: 0.4263
Epoch 6/50
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7852 - loss: 0.4482 - val_accuracy: 0.7924 - val_loss: 0.4259
Epoch 7/50
[1m141/141[0m 

In [7]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)

print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7992 - loss: 0.4158
Test Loss: 0.4146
Test Accuracy: 0.8041


In [8]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Predict on the test data
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int) # Convert probabilities to binary predictions

# Calculate and display the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Confusion Matrix:
[[179 194]
 [ 82 954]]

Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.48      0.56       373
           1       0.83      0.92      0.87      1036

    accuracy                           0.80      1409
   macro avg       0.76      0.70      0.72      1409
weighted avg       0.79      0.80      0.79      1409



**Reasoning**:
The previous command failed because the `keras_tuner` library is not installed. Install `keras_tuner` using pip.

