In [None]:
!pip install tensorflow-gpu==2.1

In [None]:
import h5py
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import tables
from sklearn import preprocessing

TEST_SIZE = 0.1
RANDOM_STATE = 0

def make_data(data_lists, data_type, filepath):
  data_dict = {}
  h5file = tables.open_file(filepath, mode="r+")
  for group_type in data_lists:
    key_list = data_lists[group_type]
    for i, part_id in enumerate(key_list):
      if i == 0:
        data = h5file.get_node('/'+part_id+'/'+data_type)[:]
      else:
        data = np.concatenate([data, h5file.get_node('/'+part_id+'/'+data_type)[:]])
    data_dict[group_type] = data
  return data_dict

def load_and_prep(FILE_PATH):
  f = h5py.File(FILE_PATH, 'r')
  key_list = list(f.keys())

  train_list, test_list = train_test_split(key_list, test_size=TEST_SIZE, 
                                          random_state=RANDOM_STATE)
  train_list, val_list = train_test_split(train_list, test_size=TEST_SIZE, 
                                          random_state=RANDOM_STATE)
  data_lists = {'train': train_list, 
                'test': test_list,
                'val': val_list}

  data_x_dict = make_data(data_lists, 'Data_x', FILE_PATH)
  X_train = data_x_dict['train']
  X_val = data_x_dict['val']
  X_test = data_x_dict['test']

  data_y_dict = make_data(data_lists, 'Data_y', FILE_PATH)
  y_train = data_y_dict['train']
  y_val = data_y_dict['val']
  y_test = data_y_dict['test']

  # lets change the integers so they are ordered
  le = preprocessing.LabelEncoder()
  y_train = le.fit_transform(y_train)
  y_val = le.transform(y_val)
  y_test = le.transform(y_test)

  print(X_train.shape)
  print(X_val.shape)
  print(X_test.shape)

  print(y_train.shape)
  print(y_val.shape)
  print(y_test.shape)

  return X_train, X_val, X_test, y_train, y_val, y_test

X_train_filt, X_val_filt, X_test_filt, y_train_filt, y_val_filt, y_test_filt = load_and_prep(FILT_FILE_PATH)

Layers

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Input
from tensorflow.keras.backend import clear_session

clear_session()
model = Sequential()
# this just tells the model what input shape to expect
model.add(Input(shape=X_train_filt.shape[1:]))
for i in range(2):
  model.add(Conv1D(filters=64,
                  kernel_size=3,
                  padding="same",
                  activation='relu'))

Maxpooling

In [None]:
from tensorflow.keras.layers import MaxPooling1D
model.add(MaxPooling1D(pool_size=3, # size of the window
                       strides=2,   # factor to downsample
                       padding='same'))

In [None]:
for i in range(2):
  model.add(Conv1D(filters=128,
                  kernel_size=3,
                  padding="same",
                  activation='relu'))

Reduce the output of the layers

In [None]:
from tensorflow.keras.layers import Flatten, GlobalAveragePooling1D, GlobalMaxPooling1D
model.add(Flatten())

In [None]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

model.add(Dense(units=100,
                activation='relu'))

model.add(Dense(units=1,
                activation='sigmoid'))

model.compile(optimizer=Adam(0.001),
              loss='binary_crossentropy',
              metrics=['accuracy', 'AUC', 'Recall', 'Precision'])

model.summary()

Add Dense Layers and compile the model

In [None]:
# Returns a short sequential model
def create_model(input_shape, flatten=False):
  clear_session()
  model = Sequential()

  # this just tells the model what input shape to expect
  model.add(Input(shape=input_shape[1:]))
  for i in range(2):
    model.add(Conv1D(filters=64,
                    kernel_size=3,
                    padding="same",
                    activation='relu'))
    
  model.add(MaxPooling1D(pool_size=3, # size of the window
                       strides=2,   # factor to downsample
                       padding='same'))
  
  for i in range(2):
    model.add(Conv1D(filters=128,
                    kernel_size=3,
                    padding="same",
                    activation='relu'))
  if flatten:
    model.add(Flatten())
  else:
    model.add(GlobalAveragePooling1D())

  model.add(Dense(units=64,
                  activation='relu'))

  model.add(Dense(units=1,
                  activation='sigmoid'))

  model.compile(optimizer=Adam(0.001),
                loss='binary_crossentropy',
                metrics=['accuracy', 'AUC', 'Recall', 'Precision'])

  return model

clear_session()
# Create a basic model instance
model = create_model(X_train_filt.shape)
model.summary()

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau

def create_callbacks(best_model_filepath, tensorboard_logs_filepath):

  callback_checkpoint = ModelCheckpoint(filepath=best_model_filepath,
                                        monitor='val_loss',
                                        verbose=0,
                                        save_weights_only=True,
                                        save_best_only=True)
  
  callback_early_stopping = EarlyStopping(monitor='val_loss',
                                          patience=10, 
                                          verbose=1)
  
  callback_tensorboard = TensorBoard(log_dir=tensorboard_logs_filepath,
                                     histogram_freq=0,
                                     write_graph=False)
  
  callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                         factor=0.1,
                                         min_lr=1e-4,
                                         patience=0,
                                         verbose=1)
  
  return [callback_checkpoint, callback_early_stopping,
          callback_tensorboard, callback_reduce_lr]

train the model ..

In [None]:
from sklearn.utils import class_weight

EPOCHS = 20
BATCH_SIZE = 64
best_model_filepath = "CNN1D_Model.ckpt"
tensorboard_logs_filepath = "./CNN1D_logs/"

# calculate the class weights
class_weights = class_weight.compute_class_weight('balanced',
                                                  np.unique(y_train_filt),
                                                  y_train_filt)

history_1D = model.fit(X_train_filt, 
                       y_train_filt,
                       batch_size=BATCH_SIZE, 
                       epochs=EPOCHS,
                       validation_data = (X_val_filt, y_val_filt),
                       callbacks= create_callbacks(best_model_filepath, 
                                                   tensorboard_logs_filepath),
                       class_weight = class_weights,
                       verbose=1)