# Imports

In [25]:
#Packages
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow import keras
from statistics import mean
from google.colab import drive

from keras import Model
from keras.layers import Input, Dense, Activation, Dropout, BatchNormalization, Add
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from sklearn.utils import shuffle

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.metrics import precision_recall_curve, average_precision_score

# Data loading

## Mount Drive

In [26]:
drive.mount('/content/drive', force_remount=True)
root_dir = "/content/drive/My Drive/"

Mounted at /content/drive


## Load Dataset 

In [27]:
# Load frequency Vectors
fv_train = np.load(root_dir+"Data/hotspots/final/fv_train-5k-list-500chunk_with_reversed.npy")
fv_test = np.load(root_dir+"Data/hotspots/final/fv_test-5k-list-500chunk_with_reversed.npy")

# Load labels
y_train = np.load(root_dir+"Data/hotspots/final/y_train-3k-list-500chunk_with_reversed.npy")
y_test = np.load(root_dir+"Data/hotspots/final/y_test-3k-list-500chunk_with_reversed.npy")

# Neural Network

## Hyperparameters

In [31]:
EPOCHS = 2000
LEARNING_RATE = 0.001
BATCH_SIZE = 128
DROPOUT_RATE = 0.25
RESIDUAL_ACTIVATION_TYPE = 'relu'

freq_vector_size = len(fv_train[0])

reduce_lr  = ReduceLROnPlateau(monitor='val_loss', factor=0.7, patience=200, min_delta=0.01, cooldown=100, min_lr=0.0001)
early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=300, restore_best_weights=True)
model_checkpoint = ModelCheckpoint(filepath=root_dir+"checkpoint", save_weights_only=True, monitor='val_accuracy', mode='max', save_best_only=True, verbose=0)

## Model Definition

In [32]:
def createOptimizer(model, learning_rate):

  optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
  model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics = ['accuracy'])

  return model

def create_model():
  initializer = keras.initializers.GlorotNormal()

  res_input = Input(shape=(freq_vector_size))
  res_part = Dropout(DROPOUT_RATE)(res_input)

  for i in range(0, 1):

      def regression_identity_block(res_part, activation):
          res_shortcut = res_part

          ri_block = Dense(units = 32 , kernel_initializer=initializer, use_bias=True, bias_initializer='zeros')(res_part)
          #ri_block  = BatchNormalization()(ri_block)
          ri_block = Activation(activation=activation)(ri_block)

          ri_block = Dense(units = 16, kernel_initializer=initializer, use_bias=True, bias_initializer='zeros')(ri_block)
          #ri_block  = BatchNormalization()(ri_block)
          ri_block = Activation(activation=activation)(ri_block)

          ri_block = Dense(8, kernel_initializer=initializer, use_bias=True, bias_initializer='zeros')(ri_block)

          ri_jump   = Dense(8, kernel_initializer=initializer, use_bias=True, bias_initializer='zeros')(res_shortcut)

          ri_block = Add()([ri_block, ri_jump])
          #ri_block  = BatchNormalization()(ri_block)
          ri_block = Activation(activation=activation)(ri_block)
          return ri_block

      res_part = regression_identity_block(res_part, RESIDUAL_ACTIVATION_TYPE)

  output = Dense(1, activation='sigmoid')(res_part)

  model = Model(inputs=res_input, outputs=output)
  model = createOptimizer(model, LEARNING_RATE)

  return model


## Training

In [None]:
test_acc_max = 0
best_model = ""
x_test_max = ""
y_true_max = ""
best_history = ""
scores = []

for i in range(0,1):
    fv_train, y_train = shuffle(fv_train, y_train)
    fv_test, y_test = shuffle(fv_test, y_test)
    
    model = create_model()
    history = model.fit(fv_train, y_train, validation_data=(fv_test, y_test), epochs=EPOCHS, batch_size=BATCH_SIZE, shuffle=True, verbose=2, callbacks=[reduce_lr, model_checkpoint])
    model.load_weights(run_dir+"checkpoint")
    test_loss, test_acc = model.evaluate(fv_test, y_test)
    scores.append(test_acc)
    if(test_acc > test_acc_max):
      test_acc_max = test_acc
      best_model = model
      x_test_max = fv_test
      y_true_max = y_test
      best_history = history

print('Max accuracy:', test_acc_max)
print('Mean accuracy:', mean(scores))

Epoch 1/2000
965/965 - 4s - loss: 0.6956 - accuracy: 0.5029 - val_loss: 0.6943 - val_accuracy: 0.5095

Epoch 00001: val_accuracy improved from -inf to 0.50952, saving model to /content/drive/My Drive/checkpoint
Epoch 2/2000
965/965 - 3s - loss: 0.6944 - accuracy: 0.5075 - val_loss: 0.6934 - val_accuracy: 0.5187

Epoch 00002: val_accuracy improved from 0.50952 to 0.51869, saving model to /content/drive/My Drive/checkpoint
Epoch 3/2000
965/965 - 3s - loss: 0.6937 - accuracy: 0.5121 - val_loss: 0.6928 - val_accuracy: 0.5239

Epoch 00003: val_accuracy improved from 0.51869 to 0.52394, saving model to /content/drive/My Drive/checkpoint
Epoch 4/2000
965/965 - 3s - loss: 0.6931 - accuracy: 0.5156 - val_loss: 0.6923 - val_accuracy: 0.5308

Epoch 00004: val_accuracy improved from 0.52394 to 0.53084, saving model to /content/drive/My Drive/checkpoint
Epoch 5/2000
965/965 - 3s - loss: 0.6927 - accuracy: 0.5197 - val_loss: 0.6919 - val_accuracy: 0.5376

Epoch 00005: val_accuracy improved from 0.53

# Results


## Report

In [None]:
y_pred=best_model.predict(x_test_max).ravel()
print(classification_report(y_true_max, (y_pred > 0.5)))

## ROC Curve

In [None]:
# calling the roc_curve, extract the probability of 
# the positive class from the predicted probability
fpr, tpr, thresholds = roc_curve(y_true_max, y_pred)

# AUC score that summarizes the ROC curve
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, lw = 2, label = 'ROC AUC: {:.2f}'.format(roc_auc))
plt.plot([0, 1], [0, 1],
         linestyle = '--',
         color = (0.6, 0.6, 0.6),
         label = 'random guessing')
plt.plot([0, 0, 1], [0, 1, 1],
         linestyle = ':',
         color = 'black', 
         label = 'perfect performance')

plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('false positive rate')
plt.ylabel('true positive rate')
plt.title('Receiver Operator Characteristic')
plt.legend(loc = "lower right")
plt.tight_layout()
plt.show()

## Precission Recall Curve

In [None]:
precision, recall, thresholds = precision_recall_curve(y_true_max, y_pred)

# AUC score that summarizes the precision recall curve
avg_precision = average_precision_score(y_true_max, y_pred)

label = 'Precision Recall AUC: {:.2f}'.format(avg_precision)
plt.plot(recall, precision, lw = 2, label = label)
plt.xlabel('Recall')  
plt.ylabel('Precision')  
plt.title('Precision Recall Curve')
plt.legend()
plt.tight_layout()
plt.show()

## Confussion Matrix

In [None]:
y_pred=best_model.predict(x_test_max).ravel()
y_pred = y_pred > 0.5

class_names = ["Hotspot", "No Hotspot"]
con_mat = tf.math.confusion_matrix(labels=y_true_max, predictions=y_pred).numpy()
con_mat_norm = np.around(con_mat.astype('float') / con_mat.sum(axis=1)[:, np.newaxis], decimals=2)
con_mat_df = pd.DataFrame(con_mat_norm, index = class_names, columns = class_names)

print('Accuracy Y_test: ', accuracy_score(y_true_max, y_pred))
figure = plt.figure(figsize=(8, 8))
sns.heatmap(con_mat_df, annot=True,cmap=plt.cm.Blues)
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

## Accuracy

In [None]:
plt.plot(best_history.history['accuracy'])
plt.plot(best_history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

## Loss

In [None]:
plt.plot(best_history.history['loss'])
plt.plot(best_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# Save Data


In [None]:
model.save(root_dir+'ResNetmodel-2kEpochs.h5')