In [None]:
import kagglehub
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from google.colab import files

In [None]:
path = kagglehub.dataset_download("shriyashjagtap/e-commerce-customer-for-behavior-analysis")
print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/shriyashjagtap/e-commerce-customer-for-behavior-analysis/versions/4


In [None]:
df = pd.read_csv(path + "/ecommerce_customer_data_custom_ratios.csv")
print(df.head())

   Customer ID        Purchase Date Product Category  Product Price  Quantity  \
0        46251  2020-09-08 09:38:32      Electronics             12         3   
1        46251  2022-03-05 12:56:35             Home            468         4   
2        46251  2022-05-23 18:18:01             Home            288         2   
3        46251  2020-11-12 13:13:29         Clothing            196         1   
4        13593  2020-11-27 17:55:11             Home            449         1   

   Total Purchase Amount Payment Method  Customer Age  Returns  \
0                    740    Credit Card            37      0.0   
1                   2739         PayPal            37      0.0   
2                   3196         PayPal            37      0.0   
3                   3509         PayPal            37      0.0   
4                   3452    Credit Card            49      0.0   

         Customer Name  Age  Gender  Churn  
0  Christine Hernandez   37    Male      0  
1  Christine Hernandez   3

In [None]:
# Drop unnecessary columns
features_to_remove = ["Customer ID", "Purchase Date", "Product Price", "Total Purchase Amount", "Customer Name", "Age"]
df = df.drop(columns=features_to_remove, errors="ignore")


In [None]:
# Encode categorical features
le = LabelEncoder()
df["Gender"] = le.fit_transform(df["Gender"])

oheProduct = OneHotEncoder(sparse_output=False)
ohePayment = OneHotEncoder(sparse_output=False)

productCategoryEncoded = oheProduct.fit_transform(df[["Product Category"]])
paymentEncoded = ohePayment.fit_transform(df[["Payment Method"]])

productCatdf = pd.DataFrame(productCategoryEncoded, columns=oheProduct.get_feature_names_out(["Product Category"]))
paymentdf = pd.DataFrame(paymentEncoded, columns=ohePayment.get_feature_names_out(["Payment Method"]))

df = df.drop(columns=["Product Category", "Payment Method"])
df = pd.concat([df, productCatdf, paymentdf], axis=1)

In [None]:
# Handle missing values
df["Returns"] = df["Returns"].fillna(0)

In [None]:
# Define features and target
X = df.drop(columns=["Churn"])
y = df["Churn"]

In [None]:
# Split data into training & testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Standardize features (important for Neural Networks)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Train Neural Network Model
mlp = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu', solver='adam', max_iter=500, random_state=42)
mlp.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred = mlp.predict(X_test)

In [None]:
# Model Evaluation
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")
print("Classification Report:\n", classification_report(y_test, y_pred))

Model Accuracy: 0.8003
Classification Report:
               precision    recall  f1-score   support

           0       0.80      1.00      0.89     40016
           1       0.00      0.00      0.00      9984

    accuracy                           0.80     50000
   macro avg       0.40      0.50      0.44     50000
weighted avg       0.64      0.80      0.71     50000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Hyperparameter tuning using RandomizedSearchCV
param_grid = {
    'hidden_layer_sizes': [(32, 16), (64, 32), (128, 64)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive']
}
random_search = RandomizedSearchCV(MLPClassifier(max_iter=500, random_state=42), param_grid, n_iter=10, cv=5, scoring='accuracy', n_jobs=-1, random_state=42, verbose=1)
random_search.fit(X_train, y_train)

best_mlp = random_search.best_estimator_
y_pred = best_mlp.predict(X_test)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [None]:
# Evaluate tuned model
accuracy = accuracy_score(y_test, y_pred)
print(f"Tuned Model Accuracy: {accuracy:.4f}")
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Best Parameters:", random_search.best_params_)


Tuned Model Accuracy: 0.8003
Classification Report:
               precision    recall  f1-score   support

           0       0.80      1.00      0.89     40016
           1       0.00      0.00      0.00      9984

    accuracy                           0.80     50000
   macro avg       0.40      0.50      0.44     50000
weighted avg       0.64      0.80      0.71     50000

Best Parameters: {'solver': 'adam', 'learning_rate': 'constant', 'hidden_layer_sizes': (64, 32), 'alpha': 0.0001, 'activation': 'relu'}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Save trained model & scaler
joblib.dump(best_mlp, "neural_network_model.pkl")
joblib.dump(scaler, "scaler.pkl")

['scaler.pkl']

In [None]:
# Download trained model
files.download("neural_network_model.pkl")
files.download("scaler.pkl")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>