In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('EComm.csv')
df.head()

Unnamed: 0,ID,Warehouse_block,Mode_of_Shipment,Customer_care_calls,Customer_rating,Cost_of_the_Product,Prior_purchases,Product_importance,Gender,Discount_offered,Weight_in_gms,Returned
0,1,D,Flight,4,2,177,3,low,F,44,1233,1
1,2,F,Flight,4,5,216,2,low,M,59,3088,1
2,3,A,Flight,2,2,183,4,low,M,48,3374,1
3,4,B,Flight,3,3,176,4,medium,M,10,1177,1
4,5,C,Flight,2,2,184,3,medium,F,46,2484,1


In [3]:
df.shape

(10999, 12)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10999 entries, 0 to 10998
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   ID                   10999 non-null  int64 
 1   Warehouse_block      10999 non-null  object
 2   Mode_of_Shipment     10999 non-null  object
 3   Customer_care_calls  10999 non-null  int64 
 4   Customer_rating      10999 non-null  int64 
 5   Cost_of_the_Product  10999 non-null  int64 
 6   Prior_purchases      10999 non-null  int64 
 7   Product_importance   10999 non-null  object
 8   Gender               10999 non-null  object
 9   Discount_offered     10999 non-null  int64 
 10  Weight_in_gms        10999 non-null  int64 
 11  Returned             10999 non-null  int64 
dtypes: int64(8), object(4)
memory usage: 1.0+ MB


In [5]:
X = df.drop(columns=['ID','Returned'])
y = df['Returned']

In [6]:
X.head()

Unnamed: 0,Warehouse_block,Mode_of_Shipment,Customer_care_calls,Customer_rating,Cost_of_the_Product,Prior_purchases,Product_importance,Gender,Discount_offered,Weight_in_gms
0,D,Flight,4,2,177,3,low,F,44,1233
1,F,Flight,4,5,216,2,low,M,59,3088
2,A,Flight,2,2,183,4,low,M,48,3374
3,B,Flight,3,3,176,4,medium,M,10,1177
4,C,Flight,2,2,184,3,medium,F,46,2484


In [7]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: Returned, dtype: int64

In [8]:
y.unique()

array([1, 0], dtype=int64)

In [9]:
from sklearn.model_selection import train_test_split

X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=42)

In [10]:
print('Train : ', X_train.shape, y_train.shape)
print('Validation:', X_val.shape, y_val.shape)
print('Test  : ', X_test.shape, y_test.shape)

Train :  (7039, 10) (7039,)
Validation: (1760, 10) (1760,)
Test  :  (2200, 10) (2200,)


In [11]:
from category_encoders import TargetEncoder

In [12]:
encoder = TargetEncoder(cols=['Warehouse_block','Mode_of_Shipment','Product_importance','Gender'])

X_train = encoder.fit_transform(X_train, y_train)
X_val = encoder.transform(X_val, y_val)
X_test = encoder.transform(X_test, y_test)

In [13]:
from sklearn.preprocessing import StandardScaler

In [14]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [15]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

# For Reproducibility
np.random.seed(42)
tf.random.set_seed(42)

In [18]:
def create_baseline():
    model = Sequential([
                    Dense(256, activation="relu", input_shape=(X_train.shape[1],), name='input_layer'),
                    Dense(128, activation="relu", name='hidden_1'),
                    Dense(64, activation="relu", name='hidden_2'),
                    Dense(1 , activation = 'sigmoid', name='output_layer')])
    return model

In [19]:
model = create_baseline()

In [20]:
model.compile(optimizer = tf.keras.optimizers.Adam(),
             loss = tf.keras.losses.BinaryCrossentropy(),
             metrics=["accuracy"])

In [21]:
%load_ext tensorboard
log_folder = 'logs'

In [22]:
!rmdir /S /Q logs

In [23]:
# For reloading the tensorboard
# %reload_ext tensorboard

In [24]:
from tensorflow.keras.callbacks import TensorBoard
tb_callback = TensorBoard(log_dir=log_folder, histogram_freq=1)

In [25]:
history = model.fit(X_train, y_train, validation_data=(X_val,y_val), epochs=10, 
                    batch_size=128, verbose=1, callbacks=[tb_callback])

Epoch 1/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.6162 - loss: 0.6043 - val_accuracy: 0.6324 - val_loss: 0.5502
Epoch 2/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6793 - loss: 0.5152 - val_accuracy: 0.6324 - val_loss: 0.5490
Epoch 3/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6770 - loss: 0.5090 - val_accuracy: 0.6398 - val_loss: 0.5500
Epoch 4/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6845 - loss: 0.5054 - val_accuracy: 0.6455 - val_loss: 0.5496
Epoch 5/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6870 - loss: 0.5026 - val_accuracy: 0.6426 - val_loss: 0.5486
Epoch 6/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.6920 - loss: 0.4997 - val_accuracy: 0.6426 - val_loss: 0.5480
Epoch 7/10
[1m55/55[0m [32m━━━━━━━━━

In [26]:
print('Training Data:')
model.evaluate(X_train, y_train)

Training Data:
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7091 - loss: 0.4875


[0.48820701241493225, 0.7069185972213745]

In [27]:
print('Validation Data:')
model.evaluate(X_val, y_val)

Validation Data:
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6586 - loss: 0.5341


[0.5536227226257324, 0.6431818008422852]

In [None]:
# %tensorboard --logdir={log_folder}

## APPLYING REGULZARIZATION (L2):-

In [36]:
def create_baseline():
    L2Reg = tf.keras.regularizers.L2(l2=1e-6)
    model = Sequential([
                    Dense(256, activation="relu", kernel_regularizer=L2Reg),
                    Dense(128, activation="relu", kernel_regularizer=L2Reg),
                    Dense(64, activation="relu", kernel_regularizer=L2Reg),
                    Dense(1 , activation="sigmoid")
                    ])
    return model

In [37]:
model = create_baseline()

In [38]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss= tf.keras.losses.BinaryCrossentropy(),
    metrics=['accuracy']
)

In [39]:
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=128, verbose=1)

Epoch 1/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.6169 - loss: 0.5895 - val_accuracy: 0.6398 - val_loss: 0.5518
Epoch 2/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6691 - loss: 0.5168 - val_accuracy: 0.6375 - val_loss: 0.5493
Epoch 3/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6745 - loss: 0.5106 - val_accuracy: 0.6358 - val_loss: 0.5497
Epoch 4/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6847 - loss: 0.5069 - val_accuracy: 0.6415 - val_loss: 0.5492
Epoch 5/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6911 - loss: 0.5035 - val_accuracy: 0.6392 - val_loss: 0.5489
Epoch 6/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6942 - loss: 0.5004 - val_accuracy: 0.6386 - val_loss: 0.5487
Epoch 7/10
[1m55/55[0m [32m━━━━━━━━━━

In [40]:
print(f'Training Data : {model.evaluate(X_train, y_train)}')
print(f'Validation : {model.evaluate(X_val, y_val)}')


[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7054 - loss: 0.4885
Training Data : [0.4885319173336029, 0.7056400179862976]
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6545 - loss: 0.5381
Validation : [0.5525984764099121, 0.6431818008422852]


## APPLYING REGULARIZATION(Drop Out):-

In [41]:
from tensorflow.keras.layers import Dropout

In [42]:
def create_Dropout():
    L2Reg = tf.keras.regularizers.L2(l2=1e-6)
    
    model = Sequential([
                    Dense(256, activation="relu", kernel_regularizer = L2Reg ),
                    Dropout(0.3),      # 30% neurons will be dropped randomly
                    Dense(128, activation="relu", kernel_regularizer = L2Reg),
                    Dropout(0.3),
                    Dense(64, activation="relu", kernel_regularizer = L2Reg),
                    Dense(1 , activation = 'sigmoid')])
    return model

In [43]:
model = create_Dropout()

In [44]:
model.compile(optimizer = tf.keras.optimizers.Adam(),
                loss = tf.keras.losses.BinaryCrossentropy(),
                metrics=["accuracy"])

In [47]:
from tensorflow.keras.callbacks import TensorBoard
log_folder = "logs"
tb_callback = TensorBoard(log_dir=log_folder, histogram_freq=1)