In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf
from tensorflow.keras import callbacks 

import numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df,label, batch_size=128):

       
        self.main_dataset = df.to_numpy()
        self.main_label = label.reshape(-1)
        self.no_of_fetures = len(df.columns)

        self.batch_size = batch_size
        self.training_dataset = None 
        self.training_label = None
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.training_dataset) / self.batch_size))

    def __getitem__(self, index):

        X = self.training_dataset[index*self.batch_size:(index+1)*self.batch_size]
        y = self.training_label[index*self.batch_size:(index+1)*self.batch_size]
        
        return X, y

    def on_epoch_end(self):
        positive_index = self.main_label == 1
        negative_index = self.main_label == 0
        
        positive_data = self.main_dataset[positive_index]
        negative_data = self.main_dataset[negative_index]
        
        #print('Negative: ' ,len(negative_index),negative_index.shape ,'Positive: ',len(positive_index))
        
        index = np.random.choice(negative_data.shape[0], size=positive_data.shape[0], replace=False)
        
        negative_data = negative_data[index]

        self.training_dataset =  np.concatenate((positive_data, negative_data), axis=0)
        self.training_label = np.array([1]*len(positive_data) + [0]*len(negative_data))
        
        self.training_dataset , self.training_label = shuffle(self.training_dataset, self.training_label)
        

In [3]:
class CFG:
    MODEL_SAVE_DIR = 'logs'
cfg = CFG()

early_stop = callbacks.EarlyStopping(monitor="val_accuracy", mode="max",
                                    restore_best_weights=True, patience=10
                                    )
checkpoint = callbacks.ModelCheckpoint(filepath=os.path.join('logs\saved_model', "{epoch:04d}-{val_accuracy:.3f}.h5"),
                                        save_weights_only=False,
                                        monitor='val_accuracy',
                                        model='max',
                                        save_best_only=False,
                                    )

csv_logs = callbacks.CSVLogger(os.path.join(cfg.MODEL_SAVE_DIR, "logs.csv"))
reduce_lr = callbacks.ReduceLROnPlateau(factor=0.66, monitor="val_accuracy", mode="max", patience=10)

In [4]:
def get_model(no_of_columns,show_summary=False):

    inp = tf.keras.layers.Input(shape=no_of_columns)
    x = tf.keras.layers.Dense(64, activation='relu')(inp)
    #x = tf.keras.layers.BatchNormalization() (x)
    x = tf.keras.layers.Dropout(0.2)(x)

    x = tf.keras.layers.Dense(256, activation='relu')(x)
    #x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.33)(x)

    x = tf.keras.layers.Dense(64, activation='relu')(x)
    #x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.33)(x)

    x = tf.keras.layers.Dense(1, activation='sigmoid')(x)

    model = tf.keras.Model(inputs=inp, outputs=x)
    model.compile(loss = 'binary_crossentropy', optimizer= tf.optimizers.Adam(learning_rate=.007) , metrics=['accuracy'])

    if show_summary: model.summary()

    return model

In [5]:
df = pd.read_excel('processed_df.xlsx')
df.drop(['Unnamed: 0'], axis=1, inplace=True)


In [6]:
model = get_model(len(df.columns)-1, True)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 46)]              0         
                                                                 
 dense (Dense)               (None, 64)                3008      
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 256)               16640     
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 64)                16448     
                                                                 
 dropout_2 (Dropout)         (None, 64)                0     

In [7]:
min_ = df.min()
max_ = df.max()
normalized_df=(df-min_)/(max_-min_)

label = normalized_df['label']
normalized_df = normalized_df.drop(['label'], axis = 1)

X,XTEST,Y, YTEST = train_test_split(normalized_df,label,test_size = .2,random_state = 42)

In [8]:
train_data_gen = DataGenerator(X,Y.to_numpy(), batch_size=128)
valid_data_gen = DataGenerator(XTEST,YTEST.to_numpy(), batch_size=128)

In [None]:
model.fit(train_data_gen,
          epochs=300,
          validation_data=valid_data_gen,
          callbacks=[ checkpoint, csv_logs, reduce_lr])

In [9]:
model = tf.keras.models.load_model(r'logs/0046-0.810.h5')
prediction = np.round(model.predict(XTEST)).reshape(-1)



In [10]:
print(confusion_matrix(YTEST, prediction))
print(classification_report(YTEST, prediction))

[[3789  921]
 [ 207  748]]
              precision    recall  f1-score   support

         0.0       0.95      0.80      0.87      4710
         1.0       0.45      0.78      0.57       955

    accuracy                           0.80      5665
   macro avg       0.70      0.79      0.72      5665
weighted avg       0.86      0.80      0.82      5665

