In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import json

# Load data
df = pd.read_csv('Telco-Customer-Churn.csv')

# Data preprocessing
# Convert TotalCharges to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# Fill missing values
df["TotalCharges"].fillna(df["TotalCharges"].median(), inplace=True)

# Convert categorical variables
df['gender'] = df['gender'].map({'Female': 0, 'Male': 1})
df['Partner'] = df['Partner'].map({'No': 0, 'Yes': 1})
df['Dependents'] = df['Dependents'].map({'No': 0, 'Yes': 1})
df['PhoneService'] = df['PhoneService'].map({'No': 0, 'Yes': 1})
df['PaperlessBilling'] = df['PaperlessBilling'].map({'No': 0, 'Yes': 1})
df['Churn'] = df['Churn'].map({'No': 0, 'Yes': 1})

# One-hot encoding for categorical variables - ลบ Contract และ PaymentMethod ออก
categorical_columns = ['InternetService', 'MultipleLines', 'OnlineSecurity',
                      'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies']
df_encoded = pd.get_dummies(df, columns=categorical_columns)

# Select features for model - ไม่รวม Contract และ PaymentMethod
feature_columns = [col for col in df_encoded.columns if col != 'Churn' and col != 'customerID']

# Save feature columns for later use
with open('feature_columns.json', 'w') as f:
    json.dump(feature_columns, f)

# Prepare features and target
X = df_encoded[feature_columns]
y = df_encoded['Churn']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save scaler for later use
import pickle
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Build the model
model = Sequential([
    Dense(128, activation='relu', input_shape=(len(feature_columns),)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train_scaled, y_train,
                    epochs=100,
                    batch_size=32,
                    validation_split=0.2,
                    verbose=1)

# Evaluate the model
y_pred_proba = model.predict(X_test_scaled)
y_pred = (y_pred_proba > 0.5).astype(int)

# Calculate metrics
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
roc_auc = roc_auc_score(y_test, y_pred_proba)

print(f'\nTest Accuracy: {test_accuracy:.4f}')
print(f'ROC-AUC:       {roc_auc:.4f}\n')

# Print classification report
print('Classification report:')
print(classification_report(y_test, y_pred))

# Print confusion matrix
print('Confusion matrix:\n', confusion_matrix(y_test, y_pred))

# Save the model
model.save('churn_model.h5')

print("\n✅ Saved files: churn_model.h5, scaler.pkl, feature_columns.json")

Epoch 1/100


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["TotalCharges"].fillna(df["TotalCharges"].median(), inplace=True)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7617 - loss: 0.4877 - val_accuracy: 0.7799 - val_loss: 0.4444 - learning_rate: 0.0010
Epoch 2/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7897 - loss: 0.4387 - val_accuracy: 0.7879 - val_loss: 0.4399 - learning_rate: 0.0010
Epoch 3/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7988 - loss: 0.4260 - val_accuracy: 0.7862 - val_loss: 0.4384 - learning_rate: 0.0010
Epoch 4/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8028 - loss: 0.4235 - val_accuracy: 0.7924 - val_loss: 0.4366 - learning_rate: 0.0010
Epoch 5/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8012 - loss: 0.4213 - val_accuracy: 0.7862 - val_loss: 0.4410 - learning_rate: 0.0010
Epoch 6/100
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m




Test Accuracy: 0.7899
ROC-AUC:       0.8348

Classification report:
               precision    recall  f1-score   support

           0       0.84      0.88      0.86      1035
           1       0.62      0.53      0.57       374

    accuracy                           0.79      1409
   macro avg       0.73      0.71      0.72      1409
weighted avg       0.78      0.79      0.78      1409


Confusion matrix:
 [[914 121]
 [175 199]]

✅ Saved files: churn_model.h5, scaler.pkl, feature_columns.json
