In [62]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

# Load and Prepare All Data
df = pd.read_csv('Customer-Churn-Records.csv')
df_clean = df.drop(['RowNumber', 'Surname', 'Complain', 'Satisfaction Score'], axis=1)

# Create the full feature set (X_full) for all 10,000 customers
X_full = pd.get_dummies(df_clean.drop(['Exited', 'CustomerId'], axis=1))
y_full = df_clean['Exited']

# Split the data to create a reliable test set
X_train, X_test, y_train, y_test = train_test_split(X_full, y_full, test_size=0.2, random_state=24)

# Scale the features
numerical_cols = ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'EstimatedSalary', 'Point Earned']
scaler = StandardScaler()
# Learn the scaling rules from the training data
X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
# Apply those same rules to the test data
X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])

# Train the model
model = LogisticRegression(max_iter=1000).fit(X_train, y_train)

# Evaluate the model to prove it's reliable
print(f"Model Accuracy on Unseen Test Data: {model.score(X_test, y_test):.4f}\n")

Model Accuracy on Unseen Test Data: 0.8105

Top 100 Highest-Risk Customers who actually churned 
      RowNumber  CustomerId     Surname  CreditScore Geography  Gender  Age  \
3531       3532    15653251      Hickey          408    France  Female   84   
9555       9556    15655360     Chikelu          782   Germany  Female   72   
7499       7500    15790113   Schofield          609   Germany  Female   71   
8488       8489    15794360         Hao          592   Germany  Female   70   
7629       7630    15591107    Flemming          723   Germany  Female   68   
...         ...         ...         ...          ...       ...     ...  ...   
8923       8924    15570002  Burlingame          625   Germany  Female   55   
5789       5790    15726103        Tsou          689   Germany  Female   55   
7317       7318    15734008    Bartlett          727   Germany    Male   59   
3898       3899    15750156          Yu          662   Germany    Male   59   
6911       6912    15710087    Nic