<a href="https://colab.research.google.com/github/S-Devisri01/Python-colab/blob/main/Customer_Churn_Prediction_(ANN).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# =========================================================
# Customer Churn Prediction (ANN) - Complete Single File
# =========================================================

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping

# -------------------------------
# Step 0: Generate Synthetic Dataset
# -------------------------------
np.random.seed(42)
n_samples = 5000

# Numerical features
tenure = np.random.randint(0, 72, n_samples)           # months
monthly_charges = np.random.uniform(20, 120, n_samples)
total_charges = tenure * monthly_charges + np.random.normal(0, 20, n_samples)

# Categorical features
gender = np.random.choice(['Male', 'Female'], n_samples)
partner = np.random.choice(['Yes', 'No'], n_samples)
dependents = np.random.choice(['Yes', 'No'], n_samples)
phone_service = np.random.choice(['Yes', 'No'], n_samples)
internet_service = np.random.choice(['DSL', 'Fiber optic', 'No'], n_samples)
contract = np.random.choice(['Month-to-month', 'One year', 'Two year'], n_samples)
payment_method = np.random.choice(['Electronic check', 'Mailed check', 'Bank transfer', 'Credit card'], n_samples)

# Target: Churn (binary)
# Higher chance of churn if month-to-month contract, high monthly charge, low tenure
churn_prob = (
    (contract == 'Month-to-month') * 0.3 +
    (monthly_charges > 80) * 0.2 +
    (tenure < 12) * 0.3 +
    np.random.normal(0, 0.05, n_samples)
)
churn = (churn_prob > 0.5).astype(int)

# Create DataFrame
df = pd.DataFrame({
    'tenure': tenure,
    'MonthlyCharges': monthly_charges,
    'TotalCharges': total_charges,
    'gender': gender,
    'Partner': partner,
    'Dependents': dependents,
    'PhoneService': phone_service,
    'InternetService': internet_service,
    'Contract': contract,
    'PaymentMethod': payment_method,
    'Churn': churn
})

print("Synthetic dataset created ✅")
print(df.head())

# -------------------------------
# Step 1: Encode Categorical Columns
# -------------------------------
cat_cols = df.select_dtypes(include=['object']).columns.tolist()
le = LabelEncoder()
for col in cat_cols:
    df[col] = le.fit_transform(df[col].astype(str))

# -------------------------------
# Step 2: Train-Test Split & Scaling
# -------------------------------
X = df.drop("Churn", axis=1)
y = df["Churn"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("\nTraining samples:", X_train.shape)
print("Testing samples:", X_test.shape)

# -------------------------------
# Step 3: Build ANN Model
# -------------------------------
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# -------------------------------
# Step 4: Train the Model
# -------------------------------
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train,
    y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.1,
    callbacks=[early_stop],
    verbose=1
)

# -------------------------------
# Step 5: Evaluate Model
# -------------------------------
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))

roc_auc = roc_auc_score(y_test, y_pred_prob)
print("ROC-AUC Score:", roc_auc)

# -------------------------------
# Step 6: Save Model (Optional)
# -------------------------------
model.save('synthetic_churn_ann_model.h5')
print("\nModel saved as 'synthetic_churn_ann_model.h5' ✅")


Synthetic dataset created ✅
   tenure  MonthlyCharges  TotalCharges  gender Partner Dependents  \
0      51      118.950533   6059.946483  Female     Yes         No   
1      14       88.431425   1214.621451    Male     Yes         No   
2      71      114.898067   8161.810403  Female     Yes         No   
3      60       34.255656   2024.200308  Female     Yes        Yes   
4      20       58.213947   1165.073226  Female      No        Yes   

  PhoneService InternetService        Contract PaymentMethod  Churn  
0           No              No        One year   Credit card      0  
1           No              No        One year  Mailed check      0  
2           No     Fiber optic        Two year  Mailed check      0  
3          Yes             DSL  Month-to-month  Mailed check      0  
4          Yes             DSL  Month-to-month   Credit card      0  

Training samples: (4000, 10)
Testing samples: (1000, 10)


Epoch 1/20
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.8125 - loss: 0.4430 - val_accuracy: 0.9125 - val_loss: 0.2246
Epoch 2/20
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8864 - loss: 0.2572 - val_accuracy: 0.9025 - val_loss: 0.2005
Epoch 3/20
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8974 - loss: 0.2302 - val_accuracy: 0.9000 - val_loss: 0.1925
Epoch 4/20
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9092 - loss: 0.2053 - val_accuracy: 0.9050 - val_loss: 0.1886
Epoch 5/20
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9060 - loss: 0.2043 - val_accuracy: 0.9025 - val_loss: 0.1858
Epoch 6/20
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9027 - loss: 0.1999 - val_accuracy: 0.9025 - val_loss: 0.1830
Epoch 7/20
[1m113/113[0m 




Classification Report:

              precision    recall  f1-score   support

           0       0.95      0.96      0.96       869
           1       0.74      0.69      0.71       131

    accuracy                           0.93      1000
   macro avg       0.85      0.83      0.83      1000
weighted avg       0.93      0.93      0.93      1000

Confusion Matrix:

[[837  32]
 [ 41  90]]
ROC-AUC Score: 0.9660924639183408

Model saved as 'synthetic_churn_ann_model.h5' ✅
