In [62]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

import pickle
from tensorflow.keras.optimizers import Adadelta
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, concatenate, Dropout, Input

In [63]:
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Flatten, Dropout, InputLayer, Embedding
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import model_from_json
import numpy as np
import pandas as pd

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler

In [64]:
from tensorflow.math import exp

def scheduler(epoch, lr):
    if epoch < 20:
        return lr
    else:
        return lr * exp(-0.1)

In [65]:
from tensorflow.keras.callbacks import Callback

class TerminateOnBaseline(Callback):
    """Callback that terminates training when either acc or val_acc reaches a specified baseline
    """
    def __init__(self, monitor='accuracy', baseline=0.9):
        super(TerminateOnBaseline, self).__init__()
        self.monitor = monitor
        self.baseline = baseline

    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        acc = logs.get(self.monitor)
        if acc is not None:
            if acc >= self.baseline:
                print('Epoch %d: Reached baseline, terminating training' % (epoch))
                self.model.stop_training = True

In [66]:
parameters_mlp = { 
    'introduction':  [ 0.2, 100, 8],
    'materials':     [ 0.2, 100, 8],
    'conclusion':  [ 0.2, 100, 8]}

# MV

In [67]:
section='introduction'

with open('dataset/dataset_{}.pkl'.format('features'), 'rb') as fp:
    dataset = pickle.load(fp)

X_features = dataset[section][0]
y_features = dataset[section][2]


columns = list(range(0, 383))
columns = list(map(str, columns))

folder_to_save = 'models_v1'
path_to_save = "/scratch/cinthiasouza/mv-text-summarizer/notebook/{}".format(folder_to_save)

X_embedd = pd.read_csv("dataset/embed_bert_{}_train.csv".format(section))

y_embedd = X_embedd['label']
X_embedd = X_embedd[columns]

In [None]:
sequence_input = Input(shape=(X_train.shape[1],), dtype='int32')

perceptron_1 = Dense(256, activation='relu')(sequence_input)
dropout1 = Dropout(.2)(perceptron_1)
perceptron_2 = Dense(256, activation='relu')(dropout1)
dropout2 = Dropout(.2)(perceptron_2)
perceptron_3 = Dense(512, activation='relu')(dropout2)
dropout3 = Dropout(.3)(perceptron_3)
perceptron_7 = Dense(512, activation='relu')(dropout3)
dropout7 = Dropout(.3)(perceptron_7)
perceptron_8 = Dense(256, activation='relu')(dropout7)
dropout8 = Dropout(.3)(perceptron_8)
perceptron_9 = Dense(256, activation='relu')(dropout8)
dropout9 = Dropout(.2)(perceptron_3)

preds = Dense(2, activation='sigmoid')(dropout9)

model = Model(inputs=[sequence_input], outputs=preds)

my_callbacks = [keras.callbacks.LearningRateScheduler(scheduler),
                TerminateOnBaseline(monitor='val_precision', baseline=0.9)]

model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(
                learning_rate=0.0001), metrics=['accuracy'])

one_hot_label = to_categorical(y_train)
X_train_features, X_valid_features, y_train_features, y_valid_features = train_test_split(
    X_train, one_hot_label, stratify=one_hot_label, shuffle=True, test_size=0.1)

history= model.fit(
	x=[X_train_embedd, X_train_features], y=y_train_features,
	epochs=3, validation_data=([X_valid_embedd, X_valid_features], y_valid_features),
    callbacks=my_callbacks, shuffle=True, batch_size=8)

In [None]:
one_hot_label = to_categorical(y_embedd)
X_train_embedd, X_valid_embedd, y_train_embedd, y_valid_embedd = train_test_split(
    X_embedd, one_hot_label, stratify=one_hot_label, shuffle=True, test_size=0.1)

one_hot_label = to_categorical(y_features)
X_train_features, X_valid_features, y_train_features, y_valid_features = train_test_split(
    X_features, one_hot_label, stratify=one_hot_label, shuffle=True, test_size=0.1)

history= model.fit(
	x=[X_train_embedd, X_train_features], y=y_train_features,
	epochs=3, validation_data=([X_valid_embedd, X_valid_features], y_valid_features),
    callbacks=my_callbacks, shuffle=True, batch_size=8)

In [None]:
one_hot_label = to_categorical(y_embedd)
X_train_embedd, X_valid_embedd, y_train_embedd, y_valid_embedd = train_test_split(
    X_embedd, one_hot_label, stratify=one_hot_label, shuffle=True, test_size=0.1)

one_hot_label = to_categorical(y_features)
X_train_features, X_valid_features, y_train_features, y_valid_features = train_test_split(
    X_features, one_hot_label, stratify=one_hot_label, shuffle=True, test_size=0.1)

history= model.fit(
	x=[X_train_embedd, X_train_features], y=y_train_features,
	epochs=5, validation_data=([X_valid_embedd, X_valid_features], y_valid_features),
    callbacks=my_callbacks, shuffle=True, batch_size=4)

In [80]:
model_json = model.to_json()
with open('{}/mv_mlp_bert_{}.json'.format(path_to_save, section), "w") as json_file:
    json_file.write(model_json)
model.save_weights('{}/mv_mlp_bert_{}.h5'.format(path_to_save, section))
print("Saved model to disk")

Saved model to disk


In [13]:
model.evaluate(x=[X_train_embedd, X_train_features], y=y_train_features)



[0.5884652733802795, 0.6430595517158508]