In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import joblib
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Load the dataset
customer_agg = pd.read_csv('/content/drive/MyDrive/Practicum/datasets after process/customer_agg.csv')

# Display the columns of the dataset
print("Customer Aggregation Columns:", customer_agg.columns.tolist())

# Define features and target
features = ['customer_lifetime', 'recency', 'total_amount', 'clv', 'purchase_frequency']
X = customer_agg[features]
y = customer_agg['purchase_frequency']

Mounted at /content/drive
Customer Aggregation Columns: ['customer_id', 'purchase_frequency', 'total_amount', 'recency', 'customer_lifetime', 'clv']


In [None]:
# Examine the distribution of the target variable
print(y.value_counts())

purchase_frequency
3     13910
2     13382
4     10271
1      7762
5      4313
      ...  
82        1
79        1
60        1
84        1
64        1
Name: count, Length: 77, dtype: int64


In [None]:
# Exclude small classes from the dataset
min_class_size = 2  # Minimum size for a class to be included

# Filter out small classes
valid_classes = y.value_counts()[y.value_counts() >= min_class_size].index
X_valid = X[y.isin(valid_classes)]
y_valid = y[y.isin(valid_classes)]

# Perform stratified splitting on the filtered dataset
X_train, X_val, y_train, y_val = train_test_split(
    X_valid, y_valid,
    test_size=0.1,
    stratify=y_valid,  # Ensure balanced classes in splits
    random_state=42
)

# Print the shapes of the datasets to confirm the split
print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)

# Print the distribution of the target variable in the training and validation sets
print("Training set target distribution:", y_train.value_counts())
print("Validation set target distribution:", y_val.value_counts())

Training set shape: (54471, 5)
Validation set shape: (6053, 5)
Training set target distribution: purchase_frequency
3     12519
2     12044
4      9244
1      6986
5      3882
      ...  
61        3
76        3
95        2
71        2
89        2
Name: count, Length: 66, dtype: int64
Validation set target distribution: purchase_frequency
3     1391
2     1338
4     1027
1      776
5      431
6      130
7       38
27      31
24      31
29      31
26      30
22      30
28      30
25      29
8       29
23      28
9       27
30      27
19      27
17      26
10      25
18      25
32      24
21      24
20      24
14      24
16      24
15      24
13      22
31      21
11      21
12      20
34      20
33      19
35      19
36      18
38      17
41      16
40      16
37      16
42      14
39      13
44      12
43      12
45      11
46       9
48       8
47       8
50       7
49       6
52       5
53       4
54       4
51       4
55       3
57       2
56       2
58       1
62       1
59       1

In [None]:
# Impute missing values for numerical features
num_imputer = SimpleImputer(strategy='mean')
X_train[features] = num_imputer.fit_transform(X_train[features])
X_val[features] = num_imputer.transform(X_val[features])

# Standardize numerical features
scaler = StandardScaler()
X_train[features] = scaler.fit_transform(X_train[features])
X_val[features] = scaler.transform(X_val[features])

In [None]:
# Define the neural network model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(1, activation='linear')  # Assuming a regression task for purchase_frequency
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model for 10 epochs
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=32
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


The current training output indicates that the model's validation loss and mean absolute error increase as training progresses. This suggests that the model might be overfitting, as it performs well on the training data but poorly on the validation data.

In [None]:
# Define the neural network model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(1, activation='linear')  # Assuming a regression task for purchase_frequency
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


  saving_api.save_model(


In [None]:
# Define the updated neural network model with increased regularization and batch normalization
from tensorflow.keras.layers import BatchNormalization

model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(16, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(1, activation='linear')  # Assuming a regression task for purchase_frequency
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Define early stopping callback with increased patience
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Define a learning rate scheduler
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping, lr_scheduler]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100


Becasue of the features and target choose, the model is showing overfitting, so i'll start to use customer_lifetime, recency and total_amount as features and purchase_frequency as target.

In [None]:
# Define features and target
features = ['customer_lifetime', 'recency', 'total_amount']
X = customer_agg[features]
y = customer_agg['purchase_frequency']

# Exclude small classes from the dataset
min_class_size = 2  # Minimum size for a class to be included
valid_classes = y.value_counts()[y.value_counts() >= min_class_size].index
X_valid = X[y.isin(valid_classes)]
y_valid = y[y.isin(valid_classes)]

# Stratified splitting of the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X_valid, y_valid,
    test_size=0.1,
    stratify=y_valid,  # Ensure balanced classes in splits
    random_state=42
)

# Print the shapes of the datasets to confirm the split
print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)

# Print the distribution of the target variable in the training and validation sets
print("Training set target distribution:", y_train.value_counts())
print("Validation set target distribution:", y_val.value_counts())

Training set shape: (54471, 3)
Validation set shape: (6053, 3)
Training set target distribution: purchase_frequency
3     12519
2     12044
4      9244
1      6986
5      3882
      ...  
61        3
76        3
95        2
71        2
89        2
Name: count, Length: 66, dtype: int64
Validation set target distribution: purchase_frequency
3     1391
2     1338
4     1027
1      776
5      431
6      130
7       38
27      31
24      31
29      31
26      30
22      30
28      30
25      29
8       29
23      28
9       27
30      27
19      27
17      26
10      25
18      25
32      24
21      24
20      24
14      24
16      24
15      24
13      22
31      21
11      21
12      20
34      20
33      19
35      19
36      18
38      17
41      16
40      16
37      16
42      14
39      13
44      12
43      12
45      11
46       9
48       8
47       8
50       7
49       6
52       5
53       4
54       4
51       4
55       3
57       2
56       2
58       1
62       1
59       1

In [None]:
# Impute missing values for numerical features
num_imputer = SimpleImputer(strategy='mean')
X_train[features] = num_imputer.fit_transform(X_train[features])
X_val[features] = num_imputer.transform(X_val[features])

# Standardize numerical features
scaler = StandardScaler()
X_train[features] = scaler.fit_transform(X_train[features])
X_val[features] = scaler.transform(X_val[features])

In [None]:
# Define the simplified neural network model
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='linear')  # Assuming a regression task for purchase_frequency
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Define early stopping callback with increased patience
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Define a learning rate scheduler with a more aggressive reduction
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping, lr_scheduler]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100


In [None]:
# Define the simplified neural network model
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='linear')  # Assuming a regression task for purchase_frequency
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model for 10 epochs
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=32
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Evaluate the model on the validation set
val_loss, val_mae = model.evaluate(X_val, y_val)
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation MAE: {val_mae:.4f}")

# Predict on the validation set
y_pred = model.predict(X_val)

# Convert predictions and true values to bins
bins = [-1, 10, 20, float('inf')]
labels = ['low', 'medium', 'high']
y_pred_binned = pd.cut(y_pred.flatten(), bins=bins, labels=labels)
y_val_binned = pd.cut(y_val, bins=bins, labels=labels)

# Evaluate the model's performance using classification metrics
accuracy = accuracy_score(y_val_binned, y_pred_binned)
conf_matrix = confusion_matrix(y_val_binned, y_pred_binned)
class_report = classification_report(y_val_binned, y_pred_binned, zero_division=0)

print(f"Accuracy: {accuracy:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)

Validation Loss: 25.1566
Validation MAE: 2.4871
Accuracy: 0.9181

Confusion Matrix:
[[ 439   52  113]
 [  40 5052  120]
 [  67  104   66]]

Classification Report:
              precision    recall  f1-score   support

        high       0.80      0.73      0.76       604
         low       0.97      0.97      0.97      5212
      medium       0.22      0.28      0.25       237

    accuracy                           0.92      6053
   macro avg       0.66      0.66      0.66      6053
weighted avg       0.92      0.92      0.92      6053



In [None]:
# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Load the dataset
customer_agg = pd.read_csv('/content/drive/MyDrive/Practicum/datasets after process/customer_agg.csv')

# Display the columns of the dataset
print("Customer Aggregation Columns:", customer_agg.columns.tolist())

# Define features and target
features = ['customer_lifetime', 'recency', 'total_amount']
X = customer_agg[features]
y = customer_agg['purchase_frequency']

# Exclude small classes from the dataset
min_class_size = 2  # Minimum size for a class to be included
valid_classes = y.value_counts()[y.value_counts() >= min_class_size].index
X_valid = X[y.isin(valid_classes)]
y_valid = y[y.isin(valid_classes)]

# Stratified splitting of the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X_valid, y_valid,
    test_size=0.1,
    stratify=y_valid,  # Ensure balanced classes in splits
    random_state=42
)

# Print the shapes of the datasets to confirm the split
print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)

# Print the distribution of the target variable in the training and validation sets
print("Training set target distribution:", y_train.value_counts())
print("Validation set target distribution:", y_val.value_counts())

# Impute missing values for numerical features
num_imputer = SimpleImputer(strategy='mean')
X_train[features] = num_imputer.fit_transform(X_train[features])
X_val[features] = num_imputer.transform(X_val[features])

# Standardize numerical features
scaler = StandardScaler()
X_train[features] = scaler.fit_transform(X_train[features])
X_val[features] = scaler.transform(X_val[features])

Mounted at /content/drive
Customer Aggregation Columns: ['customer_id', 'purchase_frequency', 'total_amount', 'recency', 'customer_lifetime', 'clv']
Training set shape: (54471, 3)
Validation set shape: (6053, 3)
Training set target distribution: purchase_frequency
3     12519
2     12044
4      9244
1      6986
5      3882
      ...  
61        3
76        3
95        2
71        2
89        2
Name: count, Length: 66, dtype: int64
Validation set target distribution: purchase_frequency
3     1391
2     1338
4     1027
1      776
5      431
6      130
7       38
27      31
24      31
29      31
26      30
22      30
28      30
25      29
8       29
23      28
9       27
30      27
19      27
17      26
10      25
18      25
32      24
21      24
20      24
14      24
16      24
15      24
13      22
31      21
11      21
12      20
34      20
33      19
35      19
36      18
38      17
41      16
40      16
37      16
42      14
39      13
44      12
43      12
45      11
46       9
48  

In [None]:
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE
# Apply SMOTE to the training data with adjusted k_neighbors parameter
smote = SMOTE(random_state=42, k_neighbors=1)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Check the new distribution of the target variable after applying SMOTE
print("Training set target distribution after SMOTE:", y_train_smote.value_counts())

Training set target distribution after SMOTE: purchase_frequency
3     12519
58    12519
40    12519
62    12519
11    12519
      ...  
15    12519
36    12519
34    12519
19    12519
89    12519
Name: count, Length: 66, dtype: int64


In [None]:
# Define a simplified neural network model with adjusted hyperparameters
model = Sequential([
    Dense(16, activation='relu', input_shape=(X_train_smote.shape[1],)),
    Dropout(0.2),
    Dense(8, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='linear')  # Assuming a regression task for purchase_frequency
])

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

# Train the model for 10 epochs
history = model.fit(
    X_train_smote, y_train_smote,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=64
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Load the dataset
customer_agg = pd.read_csv('/content/drive/MyDrive/Practicum/datasets after process/customer_agg.csv')

# Display the columns of the dataset
print("Customer Aggregation Columns:", customer_agg.columns.tolist())

# Define features and target
features = ['customer_lifetime', 'recency', 'total_amount']
X = customer_agg[features]
y = customer_agg['purchase_frequency']

# Exclude small classes from the dataset
min_class_size = 2  # Minimum size for a class to be included
valid_classes = y.value_counts()[y.value_counts() >= min_class_size].index
X_valid = X[y.isin(valid_classes)]
y_valid = y[y.isin(valid_classes)]

# Stratified splitting of the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X_valid, y_valid,
    test_size=0.1,
    stratify=y_valid,  # Ensure balanced classes in splits
    random_state=42
)

# Print the shapes of the datasets to confirm the split
print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)

# Print the distribution of the target variable in the training and validation sets
print("Training set target distribution:", y_train.value_counts())
print("Validation set target distribution:", y_val.value_counts())

# Impute missing values for numerical features
num_imputer = SimpleImputer(strategy='mean')
X_train[features] = num_imputer.fit_transform(X_train[features])
X_val[features] = num_imputer.transform(X_val[features])

# Standardize numerical features
scaler = StandardScaler()
X_train[features] = scaler.fit_transform(X_train[features])
X_val[features] = scaler.transform(X_val[features])

# Apply SMOTE to the training data with adjusted k_neighbors parameter
smote = SMOTE(random_state=42, k_neighbors=1)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Check the new distribution of the target variable after applying SMOTE
print("Training set target distribution after SMOTE:", y_train_smote.value_counts())

# Define a more simplified neural network model with increased regularization
model = Sequential([
    Dense(8, activation='relu', input_shape=(X_train_smote.shape[1],), kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    Dropout(0.3),
    Dense(4, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    Dropout(0.3),
    Dense(1, activation='linear')  # Assuming a regression task for purchase_frequency
])

# Compile the model with a lower learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss='mse', metrics=['mae'])

# Train the model for 10 epochs with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(
    X_train_smote, y_train_smote,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=64,
    callbacks=[early_stopping]
)

Mounted at /content/drive
Customer Aggregation Columns: ['customer_id', 'purchase_frequency', 'total_amount', 'recency', 'customer_lifetime', 'clv']
Training set shape: (54471, 3)
Validation set shape: (6053, 3)
Training set target distribution: purchase_frequency
3     12519
2     12044
4      9244
1      6986
5      3882
      ...  
61        3
76        3
95        2
71        2
89        2
Name: count, Length: 66, dtype: int64
Validation set target distribution: purchase_frequency
3     1391
2     1338
4     1027
1      776
5      431
6      130
7       38
27      31
24      31
29      31
26      30
22      30
28      30
25      29
8       29
23      28
9       27
30      27
19      27
17      26
10      25
18      25
32      24
21      24
20      24
14      24
16      24
15      24
13      22
31      21
11      21
12      20
34      20
33      19
35      19
36      18
38      17
41      16
40      16
37      16
42      14
39      13
44      12
43      12
45      11
46       9
48  

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Evaluate the model on the validation set
val_loss, val_mae = model.evaluate(X_val, y_val)
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation MAE: {val_mae:.4f}")

# Predict on the validation set
y_pred = model.predict(X_val)

# Convert predictions and true values to bins
bins = [-1, 10, 20, float('inf')]
labels = ['low', 'medium', 'high']
y_pred_binned = pd.cut(y_pred.flatten(), bins=bins, labels=labels)
y_val_binned = pd.cut(y_val, bins=bins, labels=labels)

# Evaluate the model's performance using classification metrics
accuracy = accuracy_score(y_val_binned, y_pred_binned)
conf_matrix = confusion_matrix(y_val_binned, y_pred_binned)
class_report = classification_report(y_val_binned, y_pred_binned, zero_division=0)

print(f"Accuracy: {accuracy:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)

Validation Loss: 40.9239
Validation MAE: 4.4028
Accuracy: 0.8621

Confusion Matrix:
[[ 545    4   55]
 [ 151 4588  473]
 [ 124   28   85]]

Classification Report:
              precision    recall  f1-score   support

        high       0.66      0.90      0.77       604
         low       0.99      0.88      0.93      5212
      medium       0.14      0.36      0.20       237

    accuracy                           0.86      6053
   macro avg       0.60      0.71      0.63      6053
weighted avg       0.93      0.86      0.89      6053




1. **Class Imbalance**:
- The 'medium' class remains challenging. This is a common issue in imbalanced datasets. Despite applying SMOTE, the inherent difficulty of the medium class is evident.

2. **Model Performance**:
- The model shows improved performance in the 'high' and 'low' classes.
- The 'medium' class continues to have low precision and recall, indicating it is still underrepresented and challenging to predict accurately.

In [None]:
# Define features and target
features = ['customer_lifetime', 'recency', 'total_amount']
X = customer_agg[features]
y = customer_agg['purchase_frequency']

# Exclude small classes from the dataset
min_class_size = 2  # Minimum size for a class to be included
valid_classes = y.value_counts()[y.value_counts() >= min_class_size].index
X_valid = X[y.isin(valid_classes)]
y_valid = y[y.isin(valid_classes)]

# Stratified splitting of the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
 X_valid, y_valid,
 test_size=0.1,
 stratify=y_valid,  # Ensure balanced classes in splits
 random_state=42
)

# Print the shapes of the datasets to confirm the split
print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)

# Print the distribution of the target variable in the training and validation sets
print("Training set target distribution:", y_train.value_counts())
print("Validation set target distribution:", y_val.value_counts())

# Impute missing values for numerical features
num_imputer = SimpleImputer(strategy='mean')
X_train[features] = num_imputer.fit_transform(X_train[features])
X_val[features] = num_imputer.transform(X_val[features])

# Standardize numerical features
scaler = StandardScaler()
X_train[features] = scaler.fit_transform(X_train[features])
X_val[features] = scaler.transform(X_val[features])

# Apply SMOTE to the training data with adjusted k_neighbors parameter
smote = SMOTE(random_state=42, k_neighbors=1)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Check the new distribution of the target variable after applying SMOTE
print("Training set target distribution after SMOTE:", y_train_smote.value_counts())

# Define a more simplified neural network model with increased regularization
model = Sequential([
 Dense(8, activation='relu', input_shape=(X_train_smote.shape[1],), kernel_regularizer=tf.keras.regularizers.l2(0.01)),
 Dropout(0.3),
 Dense(4, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
 Dropout(0.3),
 Dense(1, activation='linear')  # Assuming a regression task for purchase_frequency
])

# Compile the model with a lower learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='mse', metrics=['mae'])

# Train the model for 10 epochs with early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(
 X_train_smote, y_train_smote,
 validation_data=(X_val, y_val),
 epochs=10,
 batch_size=64,
 callbacks=[early_stopping]
)

# Evaluate the model on the validation set
val_loss, val_mae = model.evaluate(X_val, y_val)
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation MAE: {val_mae:.4f}")

# Predict on the validation set
y_pred = model.predict(X_val)

# Convert predictions and true values to bins
bins = [-1, 10, 20, float('inf')]
labels = ['low', 'medium', 'high']
y_pred_binned = pd.cut(y_pred.flatten(), bins=bins, labels=labels)
y_val_binned = pd.cut(y_val, bins=bins, labels=labels)

# Evaluate the model's performance using classification metrics
accuracy = accuracy_score(y_val_binned, y_pred_binned)
conf_matrix = confusion_matrix(y_val_binned, y_pred_binned)
class_report = classification_report(y_val_binned, y_pred_binned, zero_division=0)

print(f"Accuracy: {accuracy:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)

Training set shape: (54471, 3)
Validation set shape: (6053, 3)
Training set target distribution: purchase_frequency
3     12519
2     12044
4      9244
1      6986
5      3882
      ...  
61        3
76        3
95        2
71        2
89        2
Name: count, Length: 66, dtype: int64
Validation set target distribution: purchase_frequency
3     1391
2     1338
4     1027
1      776
5      431
6      130
7       38
27      31
24      31
29      31
26      30
22      30
28      30
25      29
8       29
23      28
9       27
30      27
19      27
17      26
10      25
18      25
32      24
21      24
20      24
14      24
16      24
15      24
13      22
31      21
11      21
12      20
34      20
33      19
35      19
36      18
38      17
41      16
40      16
37      16
42      14
39      13
44      12
43      12
45      11
46       9
48       8
47       8
50       7
49       6
52       5
53       4
54       4
51       4
55       3
57       2
56       2
58       1
62       1
59       1

## Conclusion

### Research Question 1: What are the key predictive indicators of a customer's purchasing behavior?

To address this research question, a Neural Network model was trained on customer data to predict the `purchase_frequency` using the following features:
- `customer_lifetime`
- `recency`
- `total_amount`

### Model Performance Summary

The model achieved an overall accuracy of 83.48% on the validation set. Below is a detailed analysis of the model's performance:

- **Accuracy**: 0.8348
- **Validation Loss**: 45.0241
- **Validation MAE**: 4.5508

### Key Predictive Indicators

Based on the model's performance and the features used, the key predictive indicators of a customer's purchasing behavior are as follows:

1. **Customer Lifetime**:
   - This feature indicates the total duration a customer has been active. It is a significant predictor as it provides insights into the customer's longevity with the business.

2. **Recency**:
   - This feature measures the time since the last purchase. It is crucial as it helps in understanding how recently a customer made a purchase, indicating their engagement level.

3. **Total Amount**:
   - The total amount spent by the customer is a direct indicator of their purchasing behavior. It reflects the customer's spending capacity and frequency of purchases.

### Analysis

- **High and Low Classes**:
  - The model performs well in predicting the 'low' class with a precision of 0.99 and a recall of 0.85, resulting in a high f1-score of 0.92.
  - The 'high' class has moderate performance with a precision of 0.62 and a recall of 0.92, indicating that while it is relatively good at predicting 'high' instances, it has a higher tendency to classify some 'low' instances as 'high'.

- **Medium Class**:
  - The 'medium' class prediction is still poor with a precision of 0.10 and a recall of 0.31. This indicates that the model struggles to accurately predict this class, resulting in a very low f1-score of 0.16.