In [1]:
import numpy as np
import pandas as pd

In [2]:
import tensorflow as tf
import keras

In [3]:
df = pd.read_excel("Ecommerce cleaned.xlsx")

In [4]:
X = df.drop('Revenue', axis = 1)
y = df['Revenue']

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [6]:
# Count of each class
print(df['Revenue'].value_counts())

# Percentage distribution
print(df['Revenue'].value_counts(normalize=True) * 100)

Revenue
False    8946
True     1425
Name: count, dtype: int64
Revenue
False    86.259763
True     13.740237
Name: proportion, dtype: float64


In [8]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

print("Before SMOTE:", y_train.value_counts())
print("After SMOTE:", y_train_res.value_counts())

Before SMOTE: Revenue
False    7137
True     1159
Name: count, dtype: int64
After SMOTE: Revenue
False    7137
True     7137
Name: count, dtype: int64


In [9]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_scal = sc.fit_transform(X_train_res)
X_test_scal = sc.transform(X_test)

# Modelling

**Predictions & label shape**

In [16]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import f1_score, classification_report

# Build ANN Model
ann = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scal.shape[1],)),  # Input layer
    Dropout(0.3),  
    Dense(64, activation='relu'),  
    Dropout(0.2),
    Dense(1, activation='sigmoid')  # Output layer (binary classification)
])

# Compile model
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = ann.fit(
    X_train_scal, y_train_res,
    validation_data=(X_test_scal, y_test),
    epochs=50,
    batch_size=32,
    verbose=1
)

# Predictions
y_pred_train = (ann.predict(X_train_scal) > 0.5).astype("int32").ravel()
y_pred_test = (ann.predict(X_test_scal) > 0.5).astype("int32").ravel()

# Evaluation
print("Train F1-score:", f1_score(y_train_res, y_pred_train, average='weighted'))
print("Test F1-score:", f1_score(y_test, y_pred_test, average='weighted'))

print("\nClassification Report (Test Data):\n", classification_report(y_test, y_pred_test))


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.8857 - loss: 0.2723 - val_accuracy: 0.8983 - val_loss: 0.2497
Epoch 2/50
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9278 - loss: 0.1799 - val_accuracy: 0.9142 - val_loss: 0.2277
Epoch 3/50
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9350 - loss: 0.1616 - val_accuracy: 0.9108 - val_loss: 0.2235
Epoch 4/50
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9391 - loss: 0.1519 - val_accuracy: 0.9094 - val_loss: 0.2211
Epoch 5/50
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9415 - loss: 0.1436 - val_accuracy: 0.9070 - val_loss: 0.2278
Epoch 6/50
[1m447/447[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9436 - loss: 0.1367 - val_accuracy: 0.9108 - val_loss: 0.2227
Epoch 7/50
[1m447/447[0m [32m━━━━━━━

In [17]:
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score

def create_model():
    model = Sequential([
        Dense(128, activation='relu', input_shape=(X_train_scal.shape[1],)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Wrap model
clf = KerasClassifier(model=create_model, epochs=50, batch_size=32, verbose=0)

# Cross-validation
scores = cross_val_score(clf, X_train_scal, y_train_res, cv=5, scoring='f1_weighted')
print("Cross-validation F1-score:", scores.mean())


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Cross-validation F1-score: 0.9451123762365837
