In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the data
data = pd.read_csv('cleaned_ecommerce_data.csv')


In [2]:
if 'Purchase Date' in data.columns:
    data['Purchase Date'] = pd.to_datetime(data['Purchase Date'])
    # Extract year, month, day if useful, otherwise drop
    data['Purchase Year'] = data['Purchase Date'].dt.year
    data['Purchase Month'] = data['Purchase Date'].dt.month
    data['Purchase Day'] = data['Purchase Date'].dt.day
    data.drop(['Purchase Date'], axis=1, inplace=True)

# Drop any other non-numeric columns not needed as features
data = data.select_dtypes(include=[float, int])

In [3]:
# Define the features and target variable
X = data.drop(['Churn'], axis=1)  # replace 'target_column' with your actual target column
y = data['Churn']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features if needed
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Initialize the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Make predictions
y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy: {accuracy_rf:.2f}")

Random Forest Accuracy: 0.80


In [5]:
from sklearn.tree import DecisionTreeClassifier

# Initialize the model
dt_model = DecisionTreeClassifier(random_state=42)

# Train the model
dt_model.fit(X_train, y_train)

# Make predictions
y_pred_dt = dt_model.predict(X_test)

# Evaluate the model
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print(f"Decision Tree Accuracy: {accuracy_dt:.2f}")

Decision Tree Accuracy: 0.83


In [6]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define the model
ann_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # use 'softmax' and adjust output units if multiclass
])

# Compile the model
ann_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
ann_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy_ann = ann_model.evaluate(X_test, y_test)
print(f"Artificial Neural Network Accuracy: {accuracy_ann:.2f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m4965/4965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 4ms/step - accuracy: 0.7921 - loss: 0.5112 - val_accuracy: 0.8033 - val_loss: 0.4978
Epoch 2/10
[1m4965/4965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 4ms/step - accuracy: 0.7992 - loss: 0.5027 - val_accuracy: 0.8033 - val_loss: 0.4970
Epoch 3/10
[1m4965/4965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 4ms/step - accuracy: 0.7983 - loss: 0.5035 - val_accuracy: 0.8033 - val_loss: 0.4979
Epoch 4/10
[1m4965/4965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 4ms/step - accuracy: 0.7982 - loss: 0.5033 - val_accuracy: 0.8033 - val_loss: 0.4964
Epoch 5/10
[1m4965/4965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.8005 - loss: 0.5000 - val_accuracy: 0.8033 - val_loss: 0.4962
Epoch 6/10
[1m4965/4965[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 4ms/step - accuracy: 0.8006 - loss: 0.4998 - val_accuracy: 0.8033 - val_loss: 0.4960
Epoch 7/10