In [42]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import OrdinalEncoder
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
d = pd.read_csv('ECB.csv')
d.head()

Unnamed: 0,Customer ID,Gender,Age,City,Membership Type,Total Spend,Items Purchased,Average Rating,Discount Applied,Days Since Last Purchase,Satisfaction Level
0,101,Female,29,New York,Gold,1120.2,14,4.6,True,25,Satisfied
1,102,Male,34,Los Angeles,Silver,780.5,11,4.1,False,18,Neutral
2,103,Female,43,Chicago,Bronze,510.75,9,3.4,True,42,Unsatisfied
3,104,Male,30,San Francisco,Gold,1480.3,19,4.7,False,12,Satisfied
4,105,Male,27,Miami,Silver,720.4,13,4.0,True,55,Unsatisfied


## Preprocess the Data

In [3]:
d.isnull().sum()

Customer ID                 0
Gender                      0
Age                         0
City                        0
Membership Type             0
Total Spend                 0
Items Purchased             0
Average Rating              0
Discount Applied            0
Days Since Last Purchase    0
Satisfaction Level          2
dtype: int64

In [4]:
d = d.dropna(subset=['Satisfaction Level'])

In [5]:
d.isnull().sum()

Customer ID                 0
Gender                      0
Age                         0
City                        0
Membership Type             0
Total Spend                 0
Items Purchased             0
Average Rating              0
Discount Applied            0
Days Since Last Purchase    0
Satisfaction Level          0
dtype: int64

## Seeing from last 30 days data whether a customer is likely tro make a purchase or not

In [6]:
days_threshold = 30 
total_spend_threshold = d['Total Spend'].median()  
items_purchased_threshold = d['Items Purchased'].median()  

1    229
0    119
Name: Likely to Purchase, dtype: int64

## Creating a target column for further analysis

In [None]:
d['Likely to Purchase'] = np.where(
    ((d['Days Since Last Purchase'] < days_threshold) |
     ((d['Total Spend'] >= total_spend_threshold) & 
      (d['Items Purchased'] >= items_purchased_threshold))) &
    (d['Satisfaction Level'] != 'Unsatisfied'), 
    1,  
    0  
)

In [None]:
d['Likely to Purchase'].value_counts()

In [47]:
d.sample(10)

Unnamed: 0,Age,Membership Type,Total Spend,Items Purchased,Average Rating,Discount Applied,Days Since Last Purchase,Satisfaction Level,Likely to Purchase
88,32,1,670.3,10,3.8,1,33,0,0
131,37,0,420.8,7,3.1,0,21,1,1
108,30,2,1180.8,16,4.7,1,19,2,1
183,30,2,1470.5,20,4.8,0,13,2,1
9,28,2,1520.1,21,4.8,0,9,2,1
257,31,2,1160.6,15,4.5,1,29,2,1
214,37,0,420.8,7,3.4,0,24,1,1
85,35,1,810.9,12,4.3,0,13,1,1
46,32,1,680.3,10,3.8,1,32,0,0
342,35,1,800.9,12,4.1,0,20,1,1


## Dropping irrelvant column which reduce the accuracy of the model

In [8]:
d = d.drop(['Customer ID', 'City', 'Gender'], axis=1)

In [9]:
d.head()

Unnamed: 0,Age,Membership Type,Total Spend,Items Purchased,Average Rating,Discount Applied,Days Since Last Purchase,Satisfaction Level,Likely to Purchase
0,29,Gold,1120.2,14,4.6,True,25,Satisfied,1
1,34,Silver,780.5,11,4.1,False,18,Neutral,1
2,43,Bronze,510.75,9,3.4,True,42,Unsatisfied,0
3,30,Gold,1480.3,19,4.7,False,12,Satisfied,1
4,27,Silver,720.4,13,4.0,True,55,Unsatisfied,0


## Manually inputting values

In [10]:
ms_mapping = {'Bronze': 0, 'Silver': 1, 'Gold': 2}
s_mapping = {'Unsatisfied': 0, 'Neutral': 1, 'Satisfied': 2}

In [11]:
d['Membership Type'] = d['Membership Type'].map(ms_mapping)
d['Satisfaction Level'] = d['Satisfaction Level'].map(s_mapping)

In [12]:
d.sample(10)

Unnamed: 0,Age,Membership Type,Total Spend,Items Purchased,Average Rating,Discount Applied,Days Since Last Purchase,Satisfaction Level,Likely to Purchase
197,38,0,440.9,8,3.2,False,24,1,1
320,28,2,1480.1,21,4.9,False,12,2,1
306,35,1,820.9,12,4.3,False,16,1,1
275,30,2,1190.8,16,4.5,True,23,2,1
19,34,1,790.2,11,4.0,False,16,1,1
266,30,2,1460.5,20,4.8,False,13,2,1
130,27,1,700.4,13,4.0,True,53,0,0
298,36,0,470.5,8,3.0,False,22,1,1
295,41,0,475.25,9,3.6,True,44,0,0
93,29,2,1370.2,18,4.7,False,10,2,1


In [24]:
d['Discount Applied'] = d['Discount Applied'].apply(lambda x: 1 if x == True else 0)
print(d['Discount Applied'].head())

d['Discount Applied'] = d['Discount Applied'].astype(int)

0    1
1    0
2    1
3    0
4    1
Name: Discount Applied, dtype: int64


In [25]:
d.sample(5)

Unnamed: 0,Age,Membership Type,Total Spend,Items Purchased,Average Rating,Discount Applied,Days Since Last Purchase,Satisfaction Level,Likely to Purchase
225,27,1,710.4,13,4.1,1,54,0,0
278,30,2,1450.5,19,4.6,0,12,2,1
113,37,0,430.8,7,3.4,0,23,1,1
139,34,1,790.2,11,4.0,0,15,1,1
313,43,0,505.75,10,3.3,1,46,0,0


## split the data

In [26]:
X = d.drop('Likely to Purchase', axis=1)  
y = d['Likely to Purchase']

In [36]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

## Scaling

In [28]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [29]:
X_train

array([[ 0.46834438, -1.21272529, -1.03036257, ..., -1.05169416,
        -0.68920095, -0.00856832],
       [ 0.06462426,  0.01322973, -0.13206086, ..., -1.05169416,
        -0.9137297 , -0.00856832],
       [-0.1372358 ,  0.01322973, -0.01812237, ..., -1.05169416,
        -0.98857262,  1.18242769],
       ...,
       [ 0.46834438, -1.21272529, -1.03036257, ..., -1.05169416,
        -0.68920095, -0.00856832],
       [-1.14653609,  1.23918475,  1.80643883, ..., -1.05169416,
        -1.21310137,  1.18242769],
       [ 0.26648432,  0.01322973, -0.04579917, ..., -1.05169416,
        -1.13825845, -0.00856832]])

## Build the model

In [30]:
model = Sequential()

In [31]:
model = Sequential()
model.add(Dense(32, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.3))  
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [32]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [37]:
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.1)

Epoch 1/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.6686 - loss: 207.6028 - val_accuracy: 0.6250 - val_loss: 156.8158
Epoch 2/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6253 - loss: 135.0162 - val_accuracy: 0.6250 - val_loss: 56.4548
Epoch 3/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6157 - loss: 45.3087 - val_accuracy: 0.7812 - val_loss: 2.1099
Epoch 4/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6380 - loss: 34.3135 - val_accuracy: 0.6875 - val_loss: 8.5299
Epoch 5/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6521 - loss: 24.8241 - val_accuracy: 0.6250 - val_loss: 11.9480
Epoch 6/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6711 - loss: 13.4779 - val_accuracy: 0.8125 - val_loss: 1.5987
Epoch 7/30
[1m9/9[0m [32m━━━━━━━━━━━

In [38]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.2f}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8232 - loss: 0.4412 
Test Accuracy: 0.83


In [39]:
y_pred = (model.predict(X_test) > 0.5).astype(int)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step


In [40]:
print(f"Predicted values:\n{y_pred[:5].flatten()}")
print(f"Actual values:\n{y_test[:5].values}")

Predicted values:
[1 1 0 1 1]
Actual values:
[1 1 0 1 1]


In [43]:
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

Confusion Matrix:
 [[10  0]
 [ 6 19]]


In [44]:
print("Classification Report:\n", classification_report(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.62      1.00      0.77        10
           1       1.00      0.76      0.86        25

    accuracy                           0.83        35
   macro avg       0.81      0.88      0.82        35
weighted avg       0.89      0.83      0.84        35



In [45]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Final Accuracy Score: {accuracy:.2f}")

Final Accuracy Score: 0.83


In [46]:
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)
print("Sample Predictions (Predicted vs Actual):")
for i in range(10):
    print(f"Predicted: {y_pred[i][0]}, Actual: {y_test.iloc[i]}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Sample Predictions (Predicted vs Actual):
Predicted: 1, Actual: 1
Predicted: 1, Actual: 1
Predicted: 0, Actual: 0
Predicted: 1, Actual: 1
Predicted: 1, Actual: 1
Predicted: 0, Actual: 1
Predicted: 0, Actual: 0
Predicted: 0, Actual: 1
Predicted: 0, Actual: 1
Predicted: 0, Actual: 1
