# Train model 

## Import packages 

In [None]:
import os 
import datetime
import numpy as np 
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras import regularizers

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

# Clear any logs from previous runs
%rm -rf ./logs/

## Import data 

In [None]:
# Decoding function
def parse_record(record):
    name_to_features = {
        'features': tf.io.FixedLenFeature([95], tf.float32),
        'label': tf.io.FixedLenFeature([], tf.int64),
    }
    return tf.io.parse_single_example(record, name_to_features)

def decode_record(record):
    features = record['features']
    target = record['label']
    return (features,target)

def prepData(record):
    record = parse_record(record)
    X,y = decode_record(record)
    return X, y

In [None]:
"""
def get_dataset_partitions_tf(ds, ds_size, train_split=0.8, val_split=0.1, test_split=0.1, shuffle=True, shuffle_size=10000):
    assert (train_split + test_split + val_split) == 1
    
    if shuffle:
        # Specify seed to always have the same split distribution between runs
        ds = ds.shuffle(shuffle_size, seed=12)
    
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    
    return train_ds, val_ds, test_ds
"""

def get_dataset_partitions_tf(ds, ds_size, train_split=0.9, val_split=0.1, shuffle=True, shuffle_size=10000):
    if shuffle:
        # Specify seed to always have the same split distribution between runs
        ds = ds.shuffle(shuffle_size, seed=12)
    
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)
    
    train_ds = ds.take(train_size)    
    val_ds = ds.skip(train_size).take(val_size)
    
    return train_ds, val_ds

In [None]:
data_path = "/Users/rick/Desktop/Boulder/Spring2022/data_science/workspace/CUB-Data-Science-Team-2022/ml_dev_tutorial/Data"

In [None]:
train_file_name = data_path+"/tfRecord/train.tfrecord"
train_dataset = tf.data.TFRecordDataset(train_file_name)
train_dataset = train_dataset.map(prepData, num_parallel_calls=tf.data.AUTOTUNE)

train_dataset = train_dataset.cache()
train_dataset = train_dataset.shuffle(10000)
train_dataset = train_dataset.batch(64)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

# Split train data into train and val 
train_size = sum(1 for _ in train_dataset)
train_dataset, val_dataset = get_dataset_partitions_tf(train_dataset,train_size)

In [None]:
test_file_name = data_path+"/tfRecord/test.tfrecord"
test_dataset = tf.data.TFRecordDataset(test_file_name)
test_dataset = test_dataset.map(prepData, num_parallel_calls=tf.data.AUTOTUNE)

test_dataset = test_dataset.cache()
test_dataset = test_dataset.shuffle(10000)
test_dataset = test_dataset.batch(64)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

In [None]:
# Test train data is feeding right 
for t in train_dataset.take(1):
    X, y  = t
    print(X.shape)
    print(y.shape)

# Test test data is feeding right
for t in test_dataset.take(1):
    X, y  = t
    print(X.shape)
    print(y.shape)

## Declare model 

In [None]:
class L1_ActivityRegularization(keras.layers.Layer):
    """Layer that creates an activity sparsity regularization loss."""

    def __init__(self, rate=1e-2):
        super(L1_ActivityRegularization, self).__init__()
        self.rate = rate

    def call(self, inputs):
        # We use `add_loss` to create a regularization loss
        # that depends on the inputs.
        self.add_loss(self.rate * tf.reduce_sum(tf.math.abs(inputs)))
        return inputs

class customDenseLayer(keras.layers.Layer):
    """
    y = eLU(w.x+b)
    """

    def __init__(self,units):
        super(customDenseLayer, self).__init__()
        self.units = units
        self.r = L1_ActivityRegularization(1e-4)

    def build(self, input_shape):

        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
            name="w"
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True,name="b"
        )

    def call(self, inputs):
        y = tf.nn.elu(tf.matmul(inputs, self.w) + self.b)
        return y

In [None]:
class BankModel(Model):
    def __init__(self,):
        super(BankModel, self).__init__(name="bank_model")
        
        # Declare model layers 
        self.layer_1 = customDenseLayer(48)
        self.layer_2 = tf.keras.layers.Dropout(.3)
        self.layer_3 = layers.Dense(
                units=16,
                activation="elu",
                kernel_regularizer=regularizers.l1_l2(l1=1e-3, l2=1e-3),
                bias_regularizer=regularizers.l2(1e-3),
                activity_regularizer=regularizers.l2(1e-3)
            )
        self.layer_4 = tf.keras.layers.Dropout(.3)
        self.layer_5 = layers.Dense(
                units=1,
                activation="sigmoid",
            )
        
        # Declare loss and metrics
        self.loss_cc = tf.keras.losses.BinaryCrossentropy()
        self.tp = tf.keras.metrics.TruePositives(name='tp')
        self.fp = tf.keras.metrics.FalsePositives(name='fp')
        self.tn = tf.keras.metrics.TrueNegatives(name='tn')
        self.fn = tf.keras.metrics.FalseNegatives(name='fn') 
        self.acc = tf.keras.metrics.BinaryAccuracy(name='accuracy')
        self.prec = tf.keras.metrics.Precision(name='precision')
        self.rec = tf.keras.metrics.Recall(name='recall')
        self.auc = tf.keras.metrics.AUC(name='auc')        
        self.loss_tracker = keras.metrics.Mean(name="loss")

    @property
    def metrics(self):
        """List of the model's metrics.
        We make sure the loss tracker is listed as part of `model.metrics`
        so that `fit()` and `evaluate()` are able to `reset()` the loss tracker
        at the start of each epoch and at the start of an `evaluate()` call.
        """
        return [self.loss_tracker,self.acc,self.rec,self.prec,self.auc,\
                        self.tp,self.fp,self.tn,self.fn]

    def call(self, inputs):
        x = self.layer_1(inputs)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.layer_4(x)
        yh = self.layer_5(x)
        return yh

    def train_step(self, data):
        X, y = data

        with tf.GradientTape() as tape:
            x = self.layer_1(X)
            x = self.layer_2(x)
            x = self.layer_3(x)
            x = self.layer_4(x)
            yh = self.layer_5(x)
            
            # Compute loss
            loss = self.loss_cc(y,yh)
            self.loss_tracker.update_state(loss)
            
        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        
        # Update metrics (includes the metric that tracks the loss)
        self.acc.update_state(y,tf.math.round(yh))
        self.rec.update_state(y,yh)
        self.prec.update_state(y,yh)
        self.auc.update_state(y,yh)
        self.tp.update_state(y,yh)
        self.fp.update_state(y,yh)
        self.tn.update_state(y,yh)
        self.fn.update_state(y,yh)
        
        # Return a dict mapping metric names to current value   
        results = {"loss": self.loss_tracker.result()}
        results["acc"] = self.acc.result()
        results["recall"] = self.rec.result()
        results["precision"] = self.prec.result()
        results["auc"] = self.auc.result()
        results["tp"] = self.tp.result()
        results["fp"] = self.fp.result()
        results["tn"] = self.tn.result()
        results["fn"] = self.fn.result()
        return results
    
    def test_step(self, data):
        X, y = data
        
        x = self.layer_1(X)
        x = self.layer_2(x)
        x = self.layer_3(x)
        x = self.layer_4(x)
        yh = self.layer_5(x)
            
        # Compute loss
        loss = self.loss_cc(y,yh)
        self.loss_tracker.update_state(loss)

        # Update metrics (includes the metric that tracks the loss)
        self.acc.update_state(y,tf.math.round(yh))
        self.rec.update_state(y,yh)
        self.prec.update_state(y,yh)
        self.auc.update_state(y,yh)
        self.tp.update_state(y,yh)
        self.fp.update_state(y,yh)
        self.tn.update_state(y,yh)
        self.fn.update_state(y,yh)
        
        # Return a dict mapping metric names to current value 
        results = {"loss": self.loss_tracker.result()}
        results["acc"] = self.acc.result()
        results["recall"] = self.rec.result()
        results["precision"] = self.prec.result()
        results["auc"] = self.auc.result()
        results["tp"] = self.tp.result()
        results["fp"] = self.fp.result()
        results["tn"] = self.tn.result()
        results["fn"] = self.fn.result()
        
        return results
    
    def build_graph(self, raw_shape):
        x = tf.keras.layers.Input(shape=raw_shape)
        return Model(inputs=[x], outputs=self.call(x))

In [None]:
# Use bank model object to make standard model object
input_shape = ((95,))
model = BankModel()

# Compile model
model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3))
model.build_graph(input_shape).summary()

## Train Model 

In [None]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
history = model.fit(train_dataset, epochs=75, validation_data=val_dataset,callbacks=[tensorboard_callback])

## Evaluate model 

In [None]:
_ = model.evaluate(test_dataset,verbose=1)

### View tensorboard

In [None]:
%tensorboard --logdir logs/fit

In [None]:
# Clear logs, remove if you want to save
%rm -rf ./logs/

### Reload test data to convert to np array

In [None]:
test_file_name = data_path+"/tfRecord/test.tfrecord"
test_dataset = tf.data.TFRecordDataset(test_file_name)
test_dataset = test_dataset.map(prepData, num_parallel_calls=tf.data.AUTOTUNE)

test_dataset = test_dataset.cache()
test_dataset = test_dataset.shuffle(10000)
test_dataset = test_dataset.batch(3000)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

In [None]:
iterator = iter(test_dataset)
next_element = iterator.get_next()
X_test, y_test = next_element
print(X_test.shape)
print(y_test.shape)

In [None]:
Y_pred = model.predict(X_test, verbose=1)
y_pred = np.round(Y_pred).flatten()
target_names = ['no fraud','fraud']

print(classification_report(y_test, y_pred,target_names=target_names))

In [None]:
cm = confusion_matrix(y_test, y_pred)
cm = pd.DataFrame(cm, range(2),range(2))
plt.figure(figsize = (8,8))

sns.heatmap(cm, annot=True, annot_kws={"size": 12}) # font size
plt.show()

## Save model 

In [None]:
MODEL_DIR='tf_model'
version = "1"
export_path = os.path.join(MODEL_DIR, str(version))

In [None]:
model.save(export_path)

## Load model 

In [None]:
tf.keras.backend.clear_session()

In [None]:
#saved_model = tf.keras.models.load_model('model', custom_objects={'layer_1': customDenseLayer})
saved_model = tf.keras.models.load_model(export_path)

In [None]:
Y_pred = saved_model.predict(X_test, verbose=1)
y_pred = np.round(Y_pred).flatten()
target_names = ['no fraud','fraud']

print(classification_report(y_test, y_pred,target_names=target_names))

## Make tf lite model 

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(saved_model)
tflite_model = converter.convert()
open("tflite_model.tflite", "wb").write(tflite_model)

### Test infrence on tf lite on sample

In [None]:
x_test = X_test.numpy()[0,:]
x_test = x_test.reshape((1,95))

# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="tflite_model.tflite")
interpreter.allocate_tensors()

#get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

#set the tensor to point to the input data to be inferred
input_index = interpreter.get_input_details()[0]["index"]
interpreter.set_tensor(input_index, x_test)

#Run the inference
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data[0])

### Check if performance holds with tf lite 

In [None]:
# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_path="tflite_model.tflite")
interpreter.allocate_tensors()

#get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

In [None]:
def testLite(x_test):
    #set the tensor to point to the input data to be inferred
    input_index = interpreter.get_input_details()[0]["index"]
    interpreter.set_tensor(input_index, x_test)

    #Run the inference
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    return output_data[0][0]

In [None]:
lite_pred = []
for i in range(2046):
    x_test = X_test.numpy()[i,:]
    x_test = x_test.reshape((1,95))
    pred = testLite(x_test)
    lite_pred.append(pred)

In [None]:
target_names = ['no fraud','fraud']
print(classification_report(y_test, np.round(lite_pred),target_names=target_names))