In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/Shareddrives/Диплом Дерево диалогов/')

In [None]:
import uuid
import gc

import pandas as pd
import numpy as np
from experiment_collection import ExperimentCollectionRemote, Experiment
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model

from hse_dialog_tree.utils.cpu import get_processor_info
from hse_dialog_tree.utils.files import load_pickle

LANG = 'rus'
print(get_processor_info())

def point_wise_feed_forward_network(d_model, dff):
    return tf.keras.Sequential([
      tf.keras.layers.Dense(dff, activation='relu'),  # (batch_size, seq_len, dff)
      tf.keras.layers.Dense(d_model)  # (batch_size, seq_len, d_model)
    ])

class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(EncoderLayer, self).__init__()

        self.mha = layers.MultiHeadAttention(num_heads, d_model)
        self.ffn = point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = tf.keras.layers.Dropout(rate)
        self.dropout2 = tf.keras.layers.Dropout(rate)

    def call(self, x, training):
        attn_output = self.mha(x, x, x)  # (batch_size, input_seq_len, d_model)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)
        ffn_output = self.ffn(out1)  # (batch_size, input_seq_len, d_model)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)  # (batch_size, input_seq_len, d_model)
        return out2

In [None]:
exps = ExperimentCollectionRemote('HOST',  
                                  '03_recurrent3',
                                  'TOKEN', 
                                 credentials=True)

In [None]:
X_lstm_train, y_lstm_train, X_lstm_test, y_lstm_test, cl_lstm_train, cl_lstm_test = load_pickle('lite/03_train_recurrent3.pkl.zip')
cluster_model_kmeans = load_pickle('steps/03_kmeans/kmeans_v2_044.pkl.zip')

In [None]:
def make_experiment(model_creator, model_descr, exps, epochs=3):
    for loss in ['CosineSimilarity', 'MeanSquaredError']:
    # for loss in ['MeanSquaredError']:
    # for loss in ['CosineSimilarity']:
        model = model_creator(loss)
        for epoch in range(epochs):
            h = model.fit(X_lstm_train, y_lstm_train, 
                          validation_data=(X_lstm_test, y_lstm_test), 
                          epochs=epoch + 1, initial_epoch=epoch, verbose=1)

            predict_lstm_train = model.predict(X_lstm_train, batch_size=64)
            predict_lstm_test = model.predict(X_lstm_test, batch_size=64)
            predict_lstm_train_cluster = cluster_model_kmeans.predict(predict_lstm_train)
            predict_lstm_test_cluster = cluster_model_kmeans.predict(predict_lstm_test)

            r = []
            p = []
            for predict, true in zip(predict_lstm_train_cluster, cl_lstm_train):
                r.append(predict in true)
                pos = np.where(true == predict)[0]
                if len(pos) > 0:
                    p.append(pos.min())
                else:
                    p.append(len(true + 1))
            acc_train = np.mean(r)
            pos_train = np.mean(p)

            r = []
            p = []
            for predict, true in zip(predict_lstm_test_cluster, cl_lstm_test):
                r.append(predict in true)
                pos = np.where(true == predict)[0]
                if len(pos) > 0:
                    p.append(pos.min())
                else:
                    p.append(len(true + 1))
            acc_test = np.mean(r)
            pos_test = np.mean(p)

            exp = Experiment(uuid.uuid4().__str__(), 
                     {
                         'architecture': model_descr,
                         'loss': loss,
                         'epochs': epoch + 1,
                     },
                    {
                        'cos': h.history['cosine_similarity'][0],
                        'mse': h.history['MSE'][0],
                        'acc': acc_train,
                        'pos': pos_train,
                        'val_cos': h.history['val_cosine_similarity'][0],
                        'val_mse': h.history['val_MSE'][0],
                        'val_acc': acc_test,
                        'val_pos': pos_test,
                    }
                    )
            exps.add_experiment(exp)
            del predict_lstm_train, predict_lstm_test, predict_lstm_train_cluster, predict_lstm_test_cluster, exp
            gc.collect()

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.LSTM(128)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Dense(512)(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'LSTM128 BN ReLU Dense512', exps)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Bidirectional(layers.LSTM(128))(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Dense(512)(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'BiLSTM128 BN ReLU Dense512', exps)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.LSTM(512)(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'LSTM512', exps, 10)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Bidirectional(layers.LSTM(256))(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'BiLSTM256', exps, 10)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.GRU(512)(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'GRU512', exps, 10)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Bidirectional(layers.GRU(256))(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'BiGRU256', exps, 10)

In [None]:
def make_model(loss):
    def self_attention(x, f=512, k=1):
        x = layers.Conv1D(filters=f, kernel_size=k, padding='same')(x)
        x = layers.Attention(use_scale=True)([x, x])
        return x
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = self_attention(x, 64, 1)
    x = self_attention(x, 128, 2)
    x = self_attention(x, 512, 3)
    x = layers.GlobalAveragePooling1D()(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'SA(f,k)=(conv(f,k) Attention) SA(64,1) SA(128,2) SA(512,3) GAP', exps)

In [None]:
def make_model(loss):
    def self_attention(x, f=512, k=1):
        x = layers.Conv1D(filters=f, kernel_size=k, padding='same')(x)
        x = layers.Attention(use_scale=True)([x, x])
        return x
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = self_attention(x, 64, 5)
    x = self_attention(x, 128, 3)
    x = self_attention(x, 512, 2)
    x = layers.GlobalAveragePooling1D()(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'SA(f,k)=(conv(f,k) Attention) SA(64,5) SA(128,3) SA(512,2) GAP', exps)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Bidirectional(layers.LSTM(128,))(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Attention(use_scale=True)([x, x]) 
    x = layers.Dense(512)(x)
    
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'BiLSTM(128) Drop0.1 Dense128 Relu Attention Dense512', exps)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Bidirectional(layers.LSTM(128,))(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Attention(use_scale=True)([x, x]) 
    x = layers.Dense(512)(x)
    
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'BiLSTM(128) Drop0.1 Dense128 Relu Attention Attention Dense512', exps)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Bidirectional(layers.LSTM(128,))(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Attention(use_scale=True)([x, x]) 
    x = layers.Attention(use_scale=True)([x, x]) 
    x = layers.Dense(512)(x)
    
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'BiLSTM(128) Drop0.1 Dense128 Relu Attention Attention Attention Dense512', exps)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Bidirectional(layers.LSTM(128,))(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dense(512)(x)
    
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'Attention BiLSTM(128) Drop0.1 Dense128 Relu Dense512', exps)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Bidirectional(layers.LSTM(128,))(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dense(512)(x)
    
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'Attention Attention BiLSTM(128) Drop0.1 Dense128 Relu Dense512', exps)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Bidirectional(layers.LSTM(128,))(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dense(512)(x)
    
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'Attention Attention Attention BiLSTM(128) Drop0.1 Dense128 Relu Dense512', exps)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Bidirectional(layers.LSTM(128,))(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(512)(x)
    
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'Attention BiLSTM(128) Drop0.1 Dense512', exps)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Bidirectional(layers.LSTM(128,))(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(512)(x)
    
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'Attention Attention BiLSTM(128) Drop0.1 Dense512', exps)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Attention(use_scale=True)([x, x])
    x = layers.Bidirectional(layers.LSTM(128,))(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(512)(x)
    
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'Attention Attention Attention BiLSTM(128) Drop0.1 Dense512', exps)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = EncoderLayer(512, 16, 8)(x)     
    x = layers.GlobalAveragePooling1D()(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'EncoderLayer(512,16,8) GAP', exps, 10)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = EncoderLayer(512, 16, 8)(x) 
    x = EncoderLayer(512, 16, 8)(x) 
    x = EncoderLayer(512, 16, 8)(x) 
    x = EncoderLayer(512, 16, 8)(x)     
    x = layers.GlobalAveragePooling1D()(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'EncoderLayer(512,16,8)x4 GAP', exps, 10)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = EncoderLayer(512, 16, 4)(x)     
    x = layers.GlobalAveragePooling1D()(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'EncoderLayer(512,16,4) GAP', exps, 10)

In [None]:
def make_model(loss):
    inp = layers.Input(shape=(None, 512))
    x = inp
    x = EncoderLayer(512, 16, 12)(x)     
    x = layers.GlobalAveragePooling1D()(x)
    model = Model(inputs=inp, outputs=x, name='03_recurrent')
    model.compile(loss=loss, optimizer='adam', metrics=['CosineSimilarity', 'MSE'])
    model.summary()
    return model

make_experiment(make_model, 'EncoderLayer(512,16,12) GAP', exps, 10)