In [None]:
pip uninstall numpy pandas -y

In [None]:
pip install numpy pandas --upgrade

In [None]:
pip install numpy==1.26.4 pandas==2.2.2

In [None]:
# Step 1: Import libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# Step 2: Generate synthetic customer data
np.random.seed(42)
n_samples = 1000

data = pd.DataFrame({
    'tenure_months': np.random.randint(1, 60, n_samples),
    'monthly_spend': np.random.uniform(20, 200, n_samples),
    'support_calls': np.random.poisson(2, n_samples),
    'is_active_user': np.random.choice([0, 1], n_samples, p=[0.3, 0.7]),
    'used_discount': np.random.choice([0, 1], n_samples, p=[0.6, 0.4]),
    'contract_type': np.random.choice(['month-to-month', 'one-year', 'two-year'], n_samples),
    'churned': np.random.choice([0, 1], n_samples, p=[0.75, 0.25])
})

In [None]:
# Step 3: Encode categorical variables
data = pd.get_dummies(data, columns=['contract_type'], drop_first=True)


In [None]:
# Step 4: Define features and target
X = data.drop('churned', axis=1)
y = data['churned']

# Step 5: Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 6: Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 7: Predict and evaluate
y_pred = model.predict(X_test)

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Stayed', 'Churned'], yticklabels=['Stayed', 'Churned'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# Step 8: Feature Importance Plot
importances = pd.Series(model.feature_importances_, index=X.columns)
importances.sort_values().plot(kind='barh', title='Feature Importance')
plt.tight_layout()
plt.show()