In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.mixture import GaussianMixture
from sklearn.cluster import DBSCAN
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import classification_report, accuracy_score
from sklearn.tree import plot_tree


In [None]:
# Step 1: Customer Segmentation

large_data_path = '/Users/keshavsaraogi/data/e-commerce/ecommerce_customer_data_large.csv'
ecommerce_data_large = pd.read_csv(large_data_path)

segmentation_features = ecommerce_data_large[['Total Purchase Amount', 'Quantity', 'Customer Age']]
segmentation_features = segmentation_features.fillna(segmentation_features.median())

In [None]:
scaler = StandardScaler()
segmentation_features_scaled = scaler.fit_transform(segmentation_features)

In [None]:
gmm = GaussianMixture(n_components=3, random_state=42)
gmm_labels = gmm.fit_predict(segmentation_features_scaled)
ecommerce_data_large['GMM_Cluster'] = gmm_labels

In [None]:
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(segmentation_features_scaled)
ecommerce_data_large['DBSCAN_Cluster'] = dbscan_labels

In [None]:
ecommerce_data_large.to_csv('/Users/keshavsaraogi/data/e-commerce/ecommerce_customer_data_large_with_clusters.csv', index=False)

In [None]:

plt.figure(figsize=(10, 6))
sns.scatterplot(data=ecommerce_data_large, x='Total Purchase Amount', y='Customer Age', hue='GMM_Cluster', palette='Set1', s=100, edgecolor='black')
plt.title('GMM Clustering - Customer Segmentation')
plt.xlabel('Total Purchase Amount')
plt.ylabel('Customer Age')
plt.legend(title='Cluster')
plt.show()

plt.figure(figsize=(10, 6))
sns.scatterplot(data=ecommerce_data_large, x='Total Purchase Amount', y='Customer Age', hue='DBSCAN_Cluster', palette='viridis', s=100, edgecolor='black')
plt.title('DBSCAN Clustering - Customer Segmentation')
plt.xlabel('Total Purchase Amount')
plt.ylabel('Customer Age')
plt.legend(title='Cluster')
plt.show()

In [None]:
# Step 2: Churn Prediction
custom_ratios_path = '/Users/keshavsaraogi/data/e-commerce/ecommerce_customer_data_custom_ratios.csv'
ecommerce_data_custom = pd.read_csv(custom_ratios_path)

In [None]:
categorical_features = ['Product Category', 'Payment Method', 'Gender']
ecommerce_data_custom = ecommerce_data_custom.fillna({'Churn': 0})  # Replace missing churn values with 0

In [None]:
for col in categorical_features:
    le = LabelEncoder()
    ecommerce_data_custom[col] = le.fit_transform(ecommerce_data_custom[col].astype(str))

In [None]:
X = ecommerce_data_custom[['Total Purchase Amount', 'Quantity', 'Customer Age', 'Product Category', 'Payment Method', 'Gender']]
y = ecommerce_data_custom['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
xgb_model = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)

y_pred = xgb_model.predict(X_test)

xgb_results = classification_report(y_test, y_pred)
xgb_accuracy = accuracy_score(y_test, y_pred)

evaluation_results = {
    "classification_report": xgb_results,
    "accuracy": xgb_accuracy
}

In [None]:
with open('/Users/keshavsaraogi/data/e-commerce/churn_model_results.txt', 'w') as file:
    file.write(f"Classification Report:\n{xgb_results}\n\nAccuracy: {xgb_accuracy:.4f}")

In [None]:
plt.figure(figsize=(10, 6))
xgb.plot_importance(xgb_model, importance_type='weight', max_num_features=10, height=0.5)
plt.title('Feature Importance - Churn Prediction')
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['No Churn', 'Churn'], yticklabels=['No Churn', 'Churn'])
plt.title('Confusion Matrix - Churn Prediction')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

In [None]:
xgb_model.save_model('/Users/keshavsaraogi/data/e-commerce/churn_prediction_model.xgb')