In [1]:
# Import basic libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score


In [2]:
# Loading data
data = pd.read_csv(r"C:\Users\shrey\OneDrive\Desktop\Projects\churn\churn.csv")

In [3]:
# Simple feature engineering
data['BalanceToSalary'] = data['Balance'] / (data['EstimatedSalary'] + 1)  # Avoid division by zero
data['IsHighRisk'] = (data['CreditScore'] < 600) & (data['NumOfProducts'] > 1)

In [4]:
# Convert text columns to numbers
data['Geography'] = data['Geography'].map({'France':0, 'Germany':1, 'Spain':2})
data['Gender'] = data['Gender'].map({'Male':0, 'Female':1})

In [7]:
# Select important features
features = ['CreditScore', 'Age', 'Balance', 'NumOfProducts', 
            'IsActiveMember', 'BalanceToSalary', 'IsHighRisk', 'Geography']

X = data[features]
y = data['Exited']  # 1 if customer left, 0 otherwise


In [8]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model with simple settings
model = XGBClassifier(
    n_estimators=300,
    max_depth=4,
    learning_rate=0.05,
    random_state=42
)
model.fit(X_train, y_train)

In [9]:
# Check accuracy
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Model Accuracy: {accuracy:.0%}")

# Show which features matter most
print("\nFeature Importance:")
for feature, importance in zip(features, model.feature_importances_):
    print(f"{feature}: {importance:.1%}")

Model Accuracy: 87%

Feature Importance:
CreditScore: 2.5%
Age: 19.4%
Balance: 5.2%
NumOfProducts: 30.7%
IsActiveMember: 26.6%
BalanceToSalary: 4.1%
IsHighRisk: 2.5%
Geography: 9.2%
