# Step 01 - import Modules

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import keras

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold

# Step 02 - Data preprocessing

In [None]:
# getting data
! mkdir ~/.kaggle
! cp /content/kaggle.json ~/.kaggle
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle competitions download -c santander-customer-satisfaction

In [None]:
! unzip /content/santander-customer-satisfaction.zip

In [None]:
df = pd.read_csv('./train.csv')

In [None]:
df.shape

In [None]:
df['TARGET'].value_counts()

In [None]:
df.duplicated().any()

In [None]:
df = df.T

In [None]:
df.duplicated().sum()

In [None]:
df.drop_duplicates(inplace=True)

In [None]:
df = df.T

In [None]:
df.shape

In [None]:
df.head()

In [None]:
x = df.drop(columns=['ID','TARGET'], axis=1)
y = df['TARGET'].values

In [None]:
# removing constant and Quasi Constents and dublicate feature
# rm_f = VarianceThreshold(threshold=0.01).fit(x)

In [None]:
# x = rm_f.transform(x)

In [None]:
x.shape

In [None]:
# Standardizing the data
sc = StandardScaler().fit(x)

In [None]:
x = sc.transform(x)

In [None]:
# splitting the data into training and testing 
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [None]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

In [None]:
# reshping the data sets
x_train = x_train.reshape(60816, 307, 1)
x_test = x_test.reshape(15204, 307, 1)

In [None]:
x_train.shape

# Step 03 - Building and Evaluation the model 

In [None]:
model = keras.models.Sequential()

In [None]:
# adding CNN layers
model.add(keras.layers.Conv1D(filters=32, kernel_size=3, padding='same', activation='relu', input_shape=(307, 1)))

# adding batch Normalization
model.add(keras.layers.BatchNormalization())

# adding maxpool layers
model.add(keras.layers.MaxPool1D(pool_size=2))

# dropout layes
model.add(keras.layers.Dropout(0.2))

In [None]:
#adding CNN layers
model.add(keras.layers.Conv1D(filters=64, kernel_size=3, padding='same', activation='relu'))

#adding 
model.add(keras.layers.BatchNormalization())

#adding maxpool layers
model.add(keras.layers.MaxPool1D(pool_size=2))

#dropout layes
model.add(keras.layers.Dropout(0.3))

In [None]:
#adding CNN layers
model.add(keras.layers.Conv1D(filters=128, kernel_size=3, padding='same', activation='relu'))

#adding 
model.add(keras.layers.BatchNormalization())

#adding maxpool layers
model.add(keras.layers.MaxPool1D(pool_size=2))

#dropout layes
model.add(keras.layers.Dropout(0.4))

In [None]:
#flatting layes
model.add(keras.layers.Flatten())

In [None]:
#fully connected layers
model.add(keras.layers.Dense(units=128, activation='relu'))

#droput layers
model.add(keras.layers.Dense(units=64, activation='relu'))

#droput layers
model.add(keras.layers.Dense(units=1, activation='sigmoid'))


In [None]:
model.summary()

In [None]:
# compile the model

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# training the mode
%%time
history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))

### Evaluation

In [None]:
y_pred = model.predict(x_test)

In [None]:
pred = np.where(y_pred > 0.5, 1, 0)

In [None]:
accuracy_score(y_test, pred)