## Importing Necessary Libraries

In [63]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense,Dropout,Conv1D,MaxPool1D,BatchNormalization,Flatten
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping,ReduceLROnPlateau
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold

In [36]:
data = pd.read_csv("C:/Users/Abhinav/Downloads/Santander_customer_satisfaction_kaggle/train.csv")
data.head()

Unnamed: 0,ID,var3,var15,imp_ent_var16_ult1,imp_op_var39_comer_ult1,imp_op_var39_comer_ult3,imp_op_var40_comer_ult1,imp_op_var40_comer_ult3,imp_op_var40_efect_ult1,imp_op_var40_efect_ult3,...,saldo_medio_var33_hace2,saldo_medio_var33_hace3,saldo_medio_var33_ult1,saldo_medio_var33_ult3,saldo_medio_var44_hace2,saldo_medio_var44_hace3,saldo_medio_var44_ult1,saldo_medio_var44_ult3,var38,TARGET
0,1,2,23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,39205.17,0
1,3,2,34,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,49278.03,0
2,4,2,23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67333.77,0
3,8,2,37,0.0,195.0,195.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,64007.97,0
4,10,2,39,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,117310.979016,0


In [37]:
data.shape

(76020, 371)

In [38]:
data.TARGET.unique()

array([0, 1], dtype=int64)

In [39]:
x = data.drop(labels=['ID','TARGET'],axis=1)
y = data['TARGET']

x.shape,y.shape

((76020, 369), (76020,))

In [40]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=0,stratify=y)

x_train.shape,x_test.shape

((60816, 369), (15204, 369))

## Important Feature Selection
### Remove Constants
### Quassi Constant
### Remove Duplicates

In [41]:
filters = VarianceThreshold(0.01)
x_train = filters.fit_transform(x_train)
x_test = filters.transform(x_test)

x_train.shape, x_test.shape

((60816, 273), (15204, 273))

#### Duplicate Features Removal
##### 1. Transpose

In [42]:
x_train_T = x_train.T
x_test_T  = x_test.T

##### 2.Converting into a Dataframe

In [43]:
x_train_T = pd.DataFrame(x_train_T)
x_test_T = pd.DataFrame(x_test_T)

In [44]:
x_train_T.shape

(273, 60816)

In [45]:
x_train_T.duplicated().sum()

17

In [46]:
duplicates = x_train_T.duplicated()
duplicates

0      False
1      False
2      False
3      False
4      False
       ...  
268    False
269    False
270    False
271    False
272    False
Length: 273, dtype: bool

In [47]:
duplicates.unique()

array([False,  True])

In [48]:
feature_to_select = [not index for index in duplicates]
feature_to_select

[True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 False,
 True,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 Tru

In [49]:
x_train = x_train_T[feature_to_select].T
x_test  = x_test_T[feature_to_select].T

x_train.shape,x_test.shape

((60816, 256), (15204, 256))

In [50]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test  = scaler.transform(x_test)

In [51]:
x_train[0:5]

array([[ 0.03804785, -0.55602963, -0.05273314, ..., -0.01870463,
        -0.01977204,  0.00312134],
       [ 0.03804785, -0.7871819 , -0.05273314, ..., -0.01870463,
        -0.01977204, -0.18300606],
       [ 0.03804785, -0.7871819 , -0.05273314, ..., -0.01870463,
        -0.01977204,  1.17499225],
       [ 0.03804785, -0.24782659, -0.05273314, ..., -0.01870463,
        -0.01977204, -0.23715356],
       [ 0.03804785, -0.7871819 , -0.05273314, ..., -0.01870463,
        -0.01977204,  1.08721376]])

In [52]:
x_train.shape,x_test.shape

((60816, 256), (15204, 256))

In [53]:
x_train = x_train.reshape(x_train.shape[0],x_train.shape[1],1)
x_test  = x_test.reshape(x_test.shape[0],x_test.shape[1],1)

x_train.shape, x_test.shape

((60816, 256, 1), (15204, 256, 1))

In [54]:
y_train

65503    0
39455    0
25121    0
73436    0
68392    0
        ..
61660    0
18800    0
33109    0
51120    0
73055    0
Name: TARGET, Length: 60816, dtype: int64

In [55]:
y_train = y_train.to_numpy()
y_test  = y_test.to_numpy()

## Building a Model

In [64]:
model = Sequential()

model.add(Conv1D(32,3,activation="relu",input_shape=(256,1)))
model.add(BatchNormalization())
model.add(MaxPool1D(2))
model.add(Dropout(0.2))

model.add(Conv1D(64,3,activation="relu"))
model.add(BatchNormalization())
model.add(MaxPool1D(2))
model.add(Dropout(0.5))

model.add(Conv1D(128,3,activation="relu"))
model.add(BatchNormalization())
model.add(MaxPool1D(2))
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(256,activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1,activation="sigmoid"))
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_6 (Conv1D)            (None, 254, 32)           128       
_________________________________________________________________
batch_normalization_6 (Batch (None, 254, 32)           128       
_________________________________________________________________
max_pooling1d_6 (MaxPooling1 (None, 127, 32)           0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 127, 32)           0         
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 125, 64)           6208      
_________________________________________________________________
batch_normalization_7 (Batch (None, 125, 64)           256       
_________________________________________________________________
max_pooling1d_7 (MaxPooling1 (None, 62, 64)           

In [65]:
checkpoint = ModelCheckpoint("C:/Users/Abhinav/Desktop/bank_customer.h5",
                             monitor="val_loss",
                             mode="min",
                             save_best_only = True,
                             verbose=1)

earlystop = EarlyStopping(monitor = 'val_loss',
                          min_delta = 0, 
                          patience = 4,
                          verbose = 1,
                          restore_best_weights = True)

reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 3, verbose = 1, min_delta = 0.00001)

# we put our call backs into a callback list
callbacks = [earlystop, checkpoint, reduce_lr]

model.compile(optimizer=Adam(lr=0.0001),loss="binary_crossentropy",metrics=['accuracy'])

In [66]:
history = model.fit(x_train,y_train,callbacks=callbacks,epochs=10,validation_data=(x_test,y_test),verbose=1)

Train on 60816 samples, validate on 15204 samples
Epoch 1/10
Epoch 00001: val_loss improved from inf to 0.17902, saving model to C:/Users/Abhinav/Desktop/bank_customer.h5
Epoch 2/10
Epoch 00002: val_loss improved from 0.17902 to 0.16283, saving model to C:/Users/Abhinav/Desktop/bank_customer.h5
Epoch 3/10
Epoch 00003: val_loss improved from 0.16283 to 0.16245, saving model to C:/Users/Abhinav/Desktop/bank_customer.h5
Epoch 4/10
Epoch 00004: val_loss improved from 0.16245 to 0.15744, saving model to C:/Users/Abhinav/Desktop/bank_customer.h5
Epoch 5/10
Epoch 00005: val_loss did not improve from 0.15744
Epoch 6/10
Epoch 00006: val_loss improved from 0.15744 to 0.15424, saving model to C:/Users/Abhinav/Desktop/bank_customer.h5
Epoch 7/10
Epoch 00007: val_loss improved from 0.15424 to 0.15309, saving model to C:/Users/Abhinav/Desktop/bank_customer.h5
Epoch 8/10
Epoch 00008: val_loss improved from 0.15309 to 0.15209, saving model to C:/Users/Abhinav/Desktop/bank_customer.h5
Epoch 9/10
Epoch 

In [67]:
from tensorflow.keras.models import load_model

model = load_model("C:/Users/Abhinav/Desktop/bank_customer.h5")

In [75]:
accuracy = model.evaluate(x_test,y_test,verbose=0)
accuracy[1]*100

96.04709148406982