In [2]:
# Install libraries
!pip install pandas scikit-learn imbalanced-learn tensorflow

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load dataset
df = pd.read_csv('customer_data.csv')

# Optional: add a fake Churn column for now (0/1 randomly assigned for testing)
import numpy as np
df['Churn'] = np.random.randint(0, 2, df.shape[0])

# Drop customerID
df.drop(columns=['customerID'], inplace=True)

# Convert TotalCharges to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(inplace=True)  # Drop rows with missing values

# Encode categorical columns
df = pd.get_dummies(df, drop_first=True)

# Define features and target
X = df.drop('Churn', axis=1)
y = df['Churn']

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Balance the data
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

# 1. Logistic Regression
print("\n--- Logistic Regression ---")
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_res, y_train_res)
y_pred_lr = lr_model.predict(X_test)
print(confusion_matrix(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))

# 2. Decision Tree
print("\n--- Decision Tree ---")
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train_res, y_train_res)
y_pred_dt = dt_model.predict(X_test)
print(confusion_matrix(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))

# 3. Neural Network
print("\n--- Neural Network ---")
scaler = StandardScaler()
X_train_res_scaled = scaler.fit_transform(X_train_res)
X_test_scaled = scaler.transform(X_test)

nn_model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train_res_scaled.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])
nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
nn_model.fit(X_train_res_scaled, y_train_res, epochs=50, batch_size=10, verbose=0)

# Evaluate NN
loss, accuracy = nn_model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Neural Network Test Accuracy: {accuracy:.4f}")
y_pred_nn = (nn_model.predict(X_test_scaled) > 0.5).astype(int)
print(confusion_matrix(y_test, y_pred_nn))
print(classification_report(y_test, y_pred_nn))


Collecting tensorflow
  Downloading tensorflow-2.19.0-cp312-cp312-win_amd64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt-einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting termcolor>=1.1.0 (from tensorflow)
  Downloading termcolor-3.1

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[[420 434]
 [438 466]]
              precision    recall  f1-score   support

           0       0.49      0.49      0.49       854
           1       0.52      0.52      0.52       904

    accuracy                           0.50      1758
   macro avg       0.50      0.50      0.50      1758
weighted avg       0.50      0.50      0.50      1758


--- Decision Tree ---
[[422 432]
 [451 453]]
              precision    recall  f1-score   support

           0       0.48      0.49      0.49       854
           1       0.51      0.50      0.51       904

    accuracy                           0.50      1758
   macro avg       0.50      0.50      0.50      1758
weighted avg       0.50      0.50      0.50      1758


--- Neural Network ---


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Neural Network Test Accuracy: 0.5108
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[[482 372]
 [488 416]]
              precision    recall  f1-score   support

           0       0.50      0.56      0.53       854
           1       0.53      0.46      0.49       904

    accuracy                           0.51      1758
   macro avg       0.51      0.51      0.51      1758
weighted avg       0.51      0.51      0.51      1758



In [16]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load dataset
df = pd.read_csv('customer_data.csv')
df
# Load dataset
# df = pd.read_csv('/content/sample_data/customer_data.csv')

# TEMPORARY: Add fake churn column if not already there
import numpy as np
if 'Churn' not in df.columns:
    df['Churn'] = np.random.randint(0, 2, df.shape[0])

# Drop customerID
df.drop(columns=['customerID'], inplace=True)

# Convert TotalCharges to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(inplace=True)

# Separate features and target BEFORE one-hot encoding
y = df['Churn']  # Save target column
X = df.drop('Churn', axis=1)  # Drop target from features

# Encode categorical features
X = pd.get_dummies(X, drop_first=True)

# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# SMOTE
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)
