In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/Shareddrives/Диплом Дерево диалогов/')


In [None]:
import uuid
import gc

import pandas as pd
from experiment_collection import ExperimentCollectionRemote, Experiment
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model

from hse_dialog_tree.utils.cpu import get_processor_info
from hse_dialog_tree.utils.files import load_pickle

LANG = 'rus'
print(get_processor_info())

In [None]:
exps = ExperimentCollectionRemote('HOST',  
                                  '03_recurrent3',
                                  'TOKEN', 
                                 credentials=True)

In [None]:
X_lstm_train, y_lstm_train, X_lstm_test, y_lstm_test = load_pickle('lite/03_train_recurrent2.pkl.zip')
cluster_model_kmeans = load_pickle('steps/03_kmeans/kmeans_v2_044.pkl.zip')

In [None]:
y_lstm_train_cluster = cluster_model_kmeans.predict(y_lstm_train)
y_lstm_test_cluster = cluster_model_kmeans.predict(y_lstm_test)

In [None]:
def make_experiment(model_creator, model_descr, exps, epochs=3):
    # for loss in ['CosineSimilarity', 'MeanSquaredError']:
    # for loss in ['MeanSquaredError']:
    for loss in ['CosineSimilarity']:
        model = model_creator(loss)
        for epoch in range(epochs):
            h = model.fit(X_lstm_train, y_lstm_train, 
                          validation_data=(X_lstm_test, y_lstm_test), 
                          epochs=epoch + 1, initial_epoch=epoch, verbose=1)

            predict_lstm_train = model.predict(X_lstm_train, batch_size=64)
            predict_lstm_test = model.predict(X_lstm_test, batch_size=64)
            predict_lstm_train_cluster = cluster_model_kmeans.predict(predict_lstm_train)
            predict_lstm_test_cluster = cluster_model_kmeans.predict(predict_lstm_test)

            eq_train = (predict_lstm_train_cluster == y_lstm_train_cluster).mean()
            eq_test = (predict_lstm_test_cluster == y_lstm_test_cluster).mean()

            exp = Experiment(uuid.uuid4().__str__(), 
                     {
                         'architecture': model_descr,
                         'loss': loss,
                         'epochs': epoch + 1,
                     },
                    {
                        'cos': h.history['cosine_similarity'][0],
                        'mse': h.history['MSE'][0],
                        'eq': eq_train,
                        'val_cos': h.history['val_cosine_similarity'][0],
                        'val_mse': h.history['val_MSE'][0],
                        'val_eq': eq_test,
                    }
                    )
            exps.add_experiment(exp)
            del predict_lstm_train, predict_lstm_test, predict_lstm_train_cluster, predict_lstm_test_cluster, eq_train, eq_test, exp
            gc.collect()

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.LSTM(512)(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'LSTM512', exps, 10)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Bidirectional(layers.LSTM(128,))(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(512)(x)
    
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'Attention BiLSTM(128) Drop0.1 Dense512', exps, 10)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Bidirectional(layers.LSTM(256))(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'BiLSTM256', exps, 10)


In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.GRU(512)(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'GRU512', exps, 10)


In [None]:

def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Bidirectional(layers.GRU(256))(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'BiGRU256', exps, 10)

In [None]:
def point_wise_feed_forward_network(d_model, dff):
    return tf.keras.Sequential([
      tf.keras.layers.Dense(dff, activation='relu'),  # (batch_size, seq_len, dff)
      tf.keras.layers.Dense(d_model)  # (batch_size, seq_len, d_model)
    ])

class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(EncoderLayer, self).__init__()

        self.mha = layers.MultiHeadAttention(num_heads, d_model)
        self.ffn = point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)

    def call(self, x, training):
        attn_output = self.mha(x, x, x)  # (batch_size, input_seq_len, d_model)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)
        ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)
        return out2

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = EncoderLayer(512, 16, 8)(x) 
    x = EncoderLayer(512, 16, 8)(x) 
    x = EncoderLayer(512, 16, 8)(x) 
    x = EncoderLayer(512, 16, 8)(x)     
    x = layers.GlobalAveragePooling1D()(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'EncoderLayer(512,16,8)x4 GAP', exps, 10)