In [23]:
# Imports
from __future__ import print_function
import tensorflow as tf
import pandas as pd
import numpy as np
import re
import os
import matplotlib.pyplot as plt
import sys
from tensorflow import keras
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn import neural_network
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.utils import shuffle
import urllib.request
import seaborn as sns
import random
from tensorflow.keras.utils import to_categorical
from statistics import mean

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report

#Configuration
%matplotlib inline

In [24]:
# Check running in colab
try:
  from google.colab import files
  from google.colab import drive
  IN_COLAB = True
except:
  IN_COLAB = False

In [25]:
#Configure colab vs local
if (IN_COLAB == True) :
  drive.mount('/content/drive', force_remount=True)
  root_dir = "/content/drive/My Drive/"
else:
  root_dir = "./"

Mounted at /content/drive


In [26]:
hotspots = np.load(root_dir + "Data/hotspots/fasta/hotspots-5k-1polys.npy")
labels = np.load(root_dir + "Data/hotspots/fasta/labels_hotspots-5k-1polys.npy")

#Half features
hotspots = np.delete(hotspots, np.s_[512:1024], axis=1)

In [27]:
layers=2
hidden_units = 64
batch_size = 128
features = len(hotspots[0]) # currently 64+30
classes = 2 # Hotspot or NO hotspot
epochs = 1000
activation_type = 'relu'
learning_rate = 0.01
regularization_l2_rate = 1e-06
dropout_rate = 0.25

uc = [hidden_units, 2*hidden_units, hidden_units]

#reduce_lr = tf.keras.callbacks.LearningRateScheduler(lr_scheduler)
reduce_lr  = tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=50, min_lr=0.0005)
callback_es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=300, restore_best_weights=True)

In [28]:
def create_model(features, layers, classes, units_configuration, activation_type):

  from keras import regularizers

  def regression_identity_block(X, units_start, units_end, activation):
    X_shortcut = X

    output = tf.keras.layers.Dense(units = units_start, kernel_initializer=initializer, use_bias=True, bias_initializer='zeros')(X)
    #output = tf.keras.layers.BatchNormalization()(output)
    output = tf.keras.layers.Activation(activation=activation)(output)

    output = tf.keras.layers.Dense(units = units_start, kernel_initializer=initializer, use_bias=True, bias_initializer='zeros')(output)
    #output = tf.keras.layers.BatchNormalization()(output)
    output = tf.keras.layers.Activation(activation=activation)(output)

    output = tf.keras.layers.Dense(units = units_end, kernel_initializer=initializer, use_bias=True, bias_initializer='zeros')(output)
    #output = tf.keras.layers.BatchNormalization()(output)

    jump   = tf.keras.layers.Dense(units = units_end, kernel_initializer=initializer, use_bias=True, bias_initializer='zeros')(X_shortcut)

    output = tf.keras.layers.Add()([output, jump])
    output = tf.keras.layers.Activation(activation=activation)(output)

    return output

  #initializer = keras.initializers.GlorotNormal(seed=None)
  initializer = keras.initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)

  model_input = tf.keras.Input(shape=(features))
  output = model_input
  output = tf.keras.layers.Dropout(rate=dropout_rate)(model_input)

  
  for i in range(0, layers):
    output = regression_identity_block(output, units_configuration[i], units_configuration[i+1], activation_type)
    
  output = tf.keras.layers.Dense(units = classes, activation='softmax')(output)

  model = tf.keras.Model(inputs=model_input, outputs=[output])

  #optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
  optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
  model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics = ['accuracy'])

  return model

In [29]:
test_acc_max = 0
best_model = ""
x_test_max = ""
y_true_max = ""
best_history = ""
scores = []

for i in range(0,1):

  x_train, x_test, y_train, y_test = train_test_split(hotspots, labels, test_size=0.20, shuffle=True)

  x_train = x_train.astype('float32')
  x_test = x_test.astype('float32')

  y_true = y_test

  y_train = to_categorical(y_train, num_classes=2)
  y_test = to_categorical(y_test, num_classes=2)

  model = create_model(features=features, layers=layers, classes=classes, units_configuration=uc, activation_type=activation_type)

  history = model.fit(x_train, y_train, validation_data=(x_test,y_test), epochs=epochs, batch_size=batch_size, verbose=2, callbacks=[reduce_lr, callback_es])
  test_loss, test_acc = model.evaluate(x_test, y_test)
  scores.append(test_acc)
  if(test_acc > test_acc_max):
    test_acc_max = test_acc
    best_model = model
    x_test_max = x_test
    y_true_max = y_true
    best_history = history

print('Max accuracy:', test_acc_max)
print('Mean accuracy:', mean(scores))

Epoch 1/1000
483/483 - 3s - loss: 0.6931 - accuracy: 0.5182 - val_loss: 0.6922 - val_accuracy: 0.5505
Epoch 2/1000
483/483 - 3s - loss: 0.6919 - accuracy: 0.5459 - val_loss: 0.6910 - val_accuracy: 0.5688
Epoch 3/1000
483/483 - 3s - loss: 0.6907 - accuracy: 0.5609 - val_loss: 0.6898 - val_accuracy: 0.5738
Epoch 4/1000
483/483 - 3s - loss: 0.6894 - accuracy: 0.5705 - val_loss: 0.6884 - val_accuracy: 0.5788
Epoch 5/1000
483/483 - 5s - loss: 0.6880 - accuracy: 0.5751 - val_loss: 0.6868 - val_accuracy: 0.5812
Epoch 6/1000
483/483 - 3s - loss: 0.6864 - accuracy: 0.5762 - val_loss: 0.6851 - val_accuracy: 0.5862
Epoch 7/1000
483/483 - 3s - loss: 0.6846 - accuracy: 0.5810 - val_loss: 0.6831 - val_accuracy: 0.5873
Epoch 8/1000
483/483 - 3s - loss: 0.6823 - accuracy: 0.5821 - val_loss: 0.6809 - val_accuracy: 0.5903
Epoch 9/1000
483/483 - 3s - loss: 0.6803 - accuracy: 0.5840 - val_loss: 0.6785 - val_accuracy: 0.5919
Epoch 10/1000
483/483 - 3s - loss: 0.6777 - accuracy: 0.5870 - val_loss: 0.6762 - 

KeyboardInterrupt: ignored

In [None]:
keras.utils.plot_model(model, root_dir +'multi_input_and_output_model.png', show_shapes=True)

In [None]:
y_pred=np.argmax(best_model.predict(x_test_max), axis=-1)
report = classification_report(y_true_max, y_pred)
print(report)

In [None]:
class_names = ["Hotspot", "No Hotspot"]
con_mat = tf.math.confusion_matrix(labels=y_true_max, predictions=y_pred).numpy()
con_mat_norm = np.around(con_mat.astype('float') / con_mat.sum(axis=1)[:, np.newaxis], decimals=2)
con_mat_df = pd.DataFrame(con_mat_norm, index = class_names, columns = class_names)

print('Accuracy Y_test: ', accuracy_score(y_true_max, y_pred))
figure = plt.figure(figsize=(8, 8))
sns.heatmap(con_mat_df, annot=True,cmap=plt.cm.Blues)
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [None]:
plt.plot(best_history.history['accuracy'])
plt.plot(best_history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
plt.plot(best_history.history['loss'])
plt.plot(best_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()