In [None]:
# !pip uninstall tensorflow tensorflow_hub tensorflowjs
# !pip install tensorflow==2.0.0a0 tensorflow_hub==0.5.0 tensorflowjs==1.2.6

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use("ggplot")
from sklearn.model_selection import train_test_split
import tensorflow.compat.v1 as tf
#To make tf 2.0 compatible with tf1.0 code, we disable the tf2.0 functionalities
tf.disable_eager_execution()
import tensorflow_hub as hub
from keras import backend as K
from keras.models import Model, Input
from keras.layers.merge import add
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional, Lambda
# from sklearn import cross_validation




In [None]:
class SentenceGetter(object):
    def __init__(self, data):
        self.n_sent = 1
        self.data = data
        self.empty = False
        agg_func = lambda s: [(w, t) for w, t in zip(s["1"].values.tolist(),
                                                           s["2"].values.tolist())]
        self.grouped = self.data.groupby("0").apply(agg_func)
        self.sentences = [s for s in self.grouped]

    def get_next(self):
        try:
            s = self.grouped["Sentence: {}".format(self.n_sent)]
            self.n_sent += 1
            return s
        except:
            return None


data = pd.read_csv("../input/d/datasets/spashal/nlp-project/10k_dataset.csv", encoding="latin1")
data = data.fillna(method="ffill")
print(data.tail(10))

words = list(set(data["1"].values))
words.append("ENDPAD")
n_words = len(words)
tags = list(set(data["2"].values))
n_tags = len(tags)
print(n_tags)

getter = SentenceGetter(data)
sent = getter.get_next()
print(sent)
sentences = getter.sentences
max_len = 50
tag2idx = {t: i for i, t in enumerate(tags)}
X = [[w[0] for w in s] for s in sentences]
new_X = []
for seq in X:
    new_seq = []
    for i in range(max_len):
        try:
            new_seq.append(seq[i])
        except:
            new_seq.append("__PAD__")
    new_X.append(new_seq)
X = new_X
print(X[1])

y = [[tag2idx[w[1]] for w in s] for s in sentences]
from keras.preprocessing.sequence import pad_sequences
y = pad_sequences(maxlen=max_len, sequences=y, padding="post", value=tag2idx["O"])
print(y[1])

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.1, random_state=2018)
batch_size = 32
sess = tf.compat.v1.Session()
K.set_session(sess)

elmo_model = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())

def ElmoEmbedding(x):
    return elmo_model(inputs={
                            "tokens": tf.squeeze(tf.cast(x, tf.string)),
                            "sequence_len": tf.constant(batch_size*[max_len])
                      },
                      signature="tokens",
                      as_dict=True)["elmo"]

input_text = Input(shape=(max_len,), dtype=tf.string)
embedding = Lambda(ElmoEmbedding, output_shape=(max_len, 1024))(input_text)
x = Bidirectional(LSTM(units=512, return_sequences=True,
                       recurrent_dropout=0.2, dropout=0.2))(embedding)
x_rnn = Bidirectional(LSTM(units=512, return_sequences=True,
                           recurrent_dropout=0.2, dropout=0.2))(x)
x = add([x, x_rnn])  # residual connection to the first biLSTM
out = TimeDistributed(Dense(n_tags, activation="softmax"))(x)

'''
input = Input(shape=(140,))
model = Embedding(input_dim=n_words, output_dim=140, input_length=140)(input)
model = Dropout(0.1)(model)
model = Bidirectional(LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(model)
out = TimeDistributed(Dense(n_tags, activation="softmax"))(model)  # softmax output layer
'''

model = Model(input_text, out)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
# print(len(X_tr), len(y_tr))
X_tr, X_val = X_tr[:320], X_tr[320:352]
y_tr, y_val = y_tr[:320], y_tr[320:352]
y_tr = y_tr.reshape(y_tr.shape[0], y_tr.shape[1], 1)
y_val = y_val.reshape(y_val.shape[0], y_val.shape[1], 1)

history = model.fit(np.array(X_tr), y_tr, validation_data=(np.array(X_val), y_val), batch_size=batch_size, epochs=3, verbose=1)

# bs = cross_validation.Bootstrap(len(X_tr), random_state=0)

In [None]:
import keras

# store the already trained model
keras.models.save_model(model, './initial_trained_model.h5')

In [None]:
# loading the pretrained model
from tensorflow import keras
saved_model = keras.models.load_model('./initial_trained_model.h5')

In [None]:
for i in range(32):
    if len(X_val[i]) != 50:
        print(i)

In [None]:
# ** Remember to store all the intermediate models to avoid any extra training **
# tf.enable_eager_execution()
# remove the final dense layer and replace with a new one

model = saved_model
model = Model(inputs=model.input, outputs=model.layers[-2].output)
__num_of_classes = 22

new_model = keras.Sequential()
new_model.add(model)
# new_model.add(keras.layers.Dense(100, activation='softmax'))
new_model.add(keras.layers.Dense(__num_of_classes, activation='softmax'))
# model = new_model

# freeze the embedding and lstm layers
new_model.layers[0].trainable = False
# new_model.layers[1].trainable = False

# train the new dense layer
tf.compat.v1.experimental.output_all_intermediates(True)
new_model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
new_model.summary()
new_model.fit(x=np.array(X_tr),
              y=y_tr,
              batch_size=batch_size,
              validation_data=(np.array(X_val),y_val),
              epochs=20, verbose=1)

# unfreeze and finetune the other layers 
new_model.layers[0].trainable = True
new_model.layers[1].trainable = True

new_model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
new_model.fit(x=np.array(X_tr),
              y=y_tr,
              batch_size=batch_size,
              validation_data=(np.array(X_val),y_val),
              epochs=100, verbose=1)

