# Notebook for Generating Summaries

In [1]:
# temp while cluster is full
# !pip install keras-tuner

In [2]:
import os
num_threads = 32
os.environ['NUMEXPR_MAX_THREADS'] = str(num_threads)

import pandas as pd
import numpy as np
import pickle
import copy
from tqdm import tqdm
from tqdm import tqdm_notebook
from sentence_transformers import SentenceTransformer
import ipynb.fs

from sklearn.neighbors import KDTree

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.layers import GRU
from tensorflow.keras.layers import LSTM

import kerastuner as kt
from kerastuner.engine.hyperparameters import HyperParameters
from kerastuner.tuners import Hyperband

In [3]:
default_col_labels = ['cosine_similarity', 'cos_sim_nearest_nug']
default_input_col = "embedding"

## Keras NN Model

In [4]:
class NNTuner:
    def __init__(self, save_dir, save_name, input_dim=768, tuning_iterations=50, force_reload=False):
        """Can save using project_name param, if overwrite false then will reload where it started
        In Tuner Class documentation
        """
        self.input_dim = input_dim
        self.models = []
        self.tuner = Hyperband(self.build_model, 
                          objective='mean_squared_error', 
                          max_epochs=25,
                          hyperband_iterations=tuning_iterations,
                          directory=save_dir,
                          project_name=save_name,
                          overwrite=force_reload)
        
    def build_model(self, hp):
        model = Sequential()
        ilayer = InputLayer(input_shape=(self.input_dim,))
        model.add(ilayer)
        for i in range(hp.Int('num_layers', min_value=1, max_value=4)):
            model.add(Dense(units=hp.Int('units_' + str(i),
                                        min_value=1, max_value=1024, step=32),
                            activation=hp.Choice('activ_' + str(i),
                                                ['relu', 'tanh', 'sigmoid'])))
        opt = tf.keras.optimizers.Adam(
                learning_rate=hp.Float('learning_rate', min_value=0.00001, max_value=0.1))           
        losses = hp.Choice('loss_func', ['MSE', 'huber', 'binary_crossentropy', 'categorical_crossentropy'])
        model.compile(optimizer=opt, loss=losses, metrics=['mean_squared_error'])  # add metrics here
        self.models.append(model)
        return model
    
    def search(self, inputs, labels, save_path=None, return_hyperparams=False):
        """Find optimal model given dataset
        """
        self.tuner.search(x=inputs, y=labels, verbose=1, use_multiprocessing=True, workers=num_threads)
        best_model = self.tuner.get_best_models(num_models=1)
        if save_path is not None:
            tf.keras.save(save_path)
        if return_hyperparams:
            hyperparams = self.tuner.get_best_hyperparameters(num_trials=1)
            return best_model, hyperparams
        return best_model
        

## Feeding the Dataset

In [5]:
from .defs.corpus_loader import PathRetriever, load_embeddings, load_topics, read_df_file_type, save_df_file_type
from .defs.corpus_loader import convert_to_list

In [12]:
def resolve_input_params(path_ret, corpus_names, nested_dirs, col_labels, input_col=None):
    """Helper function to resolve the selection of input params that determine what data to load/generate"""
    # resolve corpus_names
    if corpus_names is None:
        corpus_names = path_ret.get_corpus_names()
        if len(corpus_names) == 0:
            raise Exception("There are no corpuses to load from")
    # resolve col_labels
    if col_labels is None:  # our columns to generate files for
        col_labels = default_col_labels.copy()
        if input_col is not None:
            col_labels.append(input_col)
    # resolve nested_dirs
    if type(nested_dirs) != dict:  # if output gets passed through again
        nested_dict = {}
        for corpus_name in corpus_names:  # get the nested dir for each corpus name
            nested_dict[corpus_name] = path_ret.get_nested_dirs(corpus_name, "embeddings")
            if nested_dirs is not None:
                # add only selected nested_dirs for this corpus_name
                nested_dict[corpus_name] = [x for x in nested_dict[corpus_name] if x in nested_dirs]
        nested_dirs = nested_dict
    # make sure there is at least one entry in nested_dict
    empty_dirs = [len(x) == 0 for x in nested_dirs.values()]  # get if empty for each item
    if all(empty_dirs):
        raise Exception("There are no nested_dirs matching the selection")
    return corpus_names, nested_dirs, col_labels

def corpus_name_topic_ids(corpus_name):
    topic_path = self.path_ret.get_topic_path(corpus_name, verbose=False)
    topic_df = load_topics(topic_path, verbose=False)
    topic_ids = list(topic_df['id'].unique())
    return topic_ids

def find_combinations(path_df, corpus_names, nested_dirs, col_labels, add_topics=False, 
                      force_reload=False):
    """Find the combinations that have not been generated/trained already in path_df and return as tuple"""
    topic_ids = {}
    if add_topics:  # find topic_ids for each corpus
        for corpus_name in corpus_names:
            topic_ids[corpus_name] = corpus_name_topic_ids(corpus_name)
    # get possible combinations
    combinations = []
    for corpus_name in corpus_names:
        for nested_dir in nested_dirs[corpus_name]:
            for col_label in col_labels:
                combo_path = path_df[(path_df['corpus_name'] == corpus_name)
                                    & (path_df['nested_dir'] == nested_dir)
                                    & (path_df['col_label'] == col_label)]
                if len(combo_path) == 0 or force_reload:
                    combo = [corpus_name, nested_dir, col_label]
                    if add_topics:  # add additional combination for each topic
                        topic_combos = []
                        for topic_id in topic_ids[corpus_name]:
                            topic_path = combo_path[combo_path['topic_id'] == topic_id]
                            if len(topic_path) == 0 or force_reload:
                                topic_combo = copy.deepcopy(combo)
                                topic_combo.append(topic_id)
                                topic_combo = tuple(topic_combo)
                                topic_combos.append(topic_combo)
                        combinations.extend(topic_combos)
                    else:
                        combinations.append(tuple(combo))  # append without topic_id
    return combinations

In [7]:
class InputLabelHandler:
    """Class that will load and store an instance of the dataset to be fed to a model"""
    def __init__(self, proj_dir=None, input_col_name="embedding"):
        if proj_dir is None:
            self.proj_dir = '/nfs/proj-repo/AAARG-dissertation'
        else:
            self.proj_dir = proj_dir
        self.default_file_type = ".hdf"
        self.path_ret = PathRetriever(proj_dir)
        self.label_options = ['cosine_similarity', 'cos_sim_nearest_nug']
        self.input_col_name = input_col_name
#         self.default_test_topics = [1,2,3,4,5,6,8,9,10]
        # label_path_df variables
        self.label_path_df_dir = self.path_ret.path_handler.dataset_dir
        self.label_path_df_path = os.path.join(self.label_path_df_dir, "label_path_df.hdf")
        self.label_path_df_cols = ['corpus_name', 'nested_dir', 'topic_id', 'col_label', 'path']
        
        
    def generate(self, corpus_names=None, nested_dirs=None, col_labels=None, emb_file_type=None,
                               force_reload=False, verbose=True):
        """Generate easily loadable inputs/labels files to be fed to NN when needed"""
        
        self.label_path_df = self.load_label_path_df(verbose=verbose)
        
        corpus_names, nested_dirs, col_labels = resolve_input_params(self.path_ret, corpus_names,
                                                                    nested_dirs, col_labels, input_col="embedding")
        
        if emb_file_type is None:  # target file type to load from
            emb_file_type = self.default_file_type
        
        if verbose:
            print("Retrieving the following: " + str(", ".join(col_labels)))
        
        for corpus_name in tqdm_notebook(corpus_names):
            if verbose:
                print("Generating from corpus: " + str(corpus_name))
            # get topic_ids from topic_df
            topic_path = self.path_ret.get_topic_path(corpus_name, verbose=False)
            topic_df = load_topics(topic_path, verbose=False)
            topic_ids = list(topic_df['id'].unique())
            for nested_dir in tqdm_notebook(nested_dirs[corpus_name]):
                if verbose:
                    print("Using inputs from " + str(nested_dir))
                for topic_id in tqdm_notebook(topic_ids):
                    untrained_cols = []
                    if not force_reload:
                        trained_cols = self.label_path_df
                        trained_cols = trained_cols[(trained_cols['corpus_name'] == corpus_name)
                                                   & (trained_cols['nested_dir'] == nested_dir)
                                                   & (trained_cols['topic_id'] == topic_id)]
                        trained_cols = list(trained_cols['col_label'])
                        # find untrained cols
                        untrained_cols = [x for x in col_labels if x not in trained_cols]
                    else:
                        untrained_cols = col_labels  # if force_reload train all
                    
                    if len(untrained_cols) > 0:
                    
                        emb_paths, nested_dir_path = self.path_ret.get_embedding_paths(corpus_name, nested_dir, 
                                                        file_type=emb_file_type, verbose=False, 
                                                        return_dir_path=True, topic_ids=[topic_id])
                        if len(emb_paths) == 0:
                            raise Exception("No paths for " + str(corpus_name) + ", " + str(nested_dir) + ", "
                                           + str(emb_file_type) + ", topic_id: " + str(topic_id))

                        # load labels and save to pickled file
                        save_paths = {}
                        for col_label in untrained_cols:
                            save_paths[col_label] = self.generate_path(nested_dir_path, topic_id, col_label)
                        # find which labels do not currently have files (or force reload them)
#                         target_labels = []
#                         for col_label, save_path in save_paths.items():
#                             if force_reload or not os.path.exists(save_path):  # force_reload will add all
#                                 target_labels.append(col_label)
                        # load the selected labels
                        loaded_labels = self.retrieve_col_data(emb_paths, untrained_cols, verbose=verbose)
                        # save the results to separate files
                        for label, label_data in loaded_labels.items():
                            label_path = save_paths[label]
                            with open(label_path, 'wb') as handle:
                                pickle.dump(label_data, handle, protocol=pickle.HIGHEST_PROTOCOL)
                            # save path in label_path_df to keep track of saved files
                            self.add_path_to_df(corpus_name, nested_dir, topic_id, label, label_path)
                            if verbose:
                                print("File saved to: " + str(label_path))
                        self.save_label_path_df()
                    else:
                        if verbose:
                            print("full trained for topic " + str(topic_id) + ": " + str(col_labels))
        print("Generated inputs and labels")
        
    def load(self, corpus_name, nested_dir, col_label, topic_ids=None, verbose=True):
        """Load selected generated input/label
        
        Return:
            dict where keys are equal to topic_ids, values equal to a list of the input/label data
        """
        paths = self.label_path_df
        paths = paths[paths['corpus_name'] == corpus_name & paths['nested_dir'] == nested_dir
                     & paths['col_label'] == col_label]
        if topic_ids is not None:  # otherwise load all
            for topic_id in topic_ids:  # issues with .isin method, using iterative for loop instead
                paths = paths[paths['topic_id'] == topic_id]  
        if len(paths) == 0:
            raise Exception("There are no paths to load the selected inputs/labels")
        
        loaded_data = {}
        for index, row in paths.iterrows():
            save_path = row['path']
            if not os.path.exists(save_path):
                raise Exception("Generated " + str(col_label) + " for " + str(corpus_name) 
                                + ", " + str(nested_dir) + " does not exist at: " + str(save_path))
            topic_data = None
            with open(save_path, 'rb') as handle:
                topic_data = pickle.load(handle)
            
            topic_id = int(row['topic_id'])
            topic_ids.append(topic_id)  # for verbose printing
            loaded_data[topic_id] = topic_data
        
        if verbose:
            print("Loaded " + str(col_label) + " for " + str(corpus_name) 
                                + ", " + str(nested_dir) + "\ntopic_ids loaded: " + str(topic_ids))
        return loaded_data
    
    def corpus_topic_ids(self, corpus_name):
        """Num topics for given corpus_name"""
        topic_ids = list(self.label_path_df[self.label_path_df['corpus_name'] == corpus_name]['topic_id'].unique())
        return topic_ids

                        
    def load_label_path_df(self, verbose=True):
        label_path_df = None
        if verbose:
            print("Loading label_path_df")
        if os.path.exists(self.label_path_df_path):
            label_path_df = read_df_file_type(self.label_path_df_path, verbose=True)
        else:
            label_path_df = pd.DataFrame(columns=self.label_path_df_cols)
            if verbose:
                print("label_path_df created from scratch")
        return label_path_df
    
    def add_path_to_df(self, corpus_name, nested_dir, topic_id, col_label, path):
        row = {"corpus_name":corpus_name, "nested_dir":nested_dir, "topic_id":topic_id, 
               "col_label":col_label, "path":path}
        self.label_path_df = self.label_path_df.append(row, ignore_index=True)
        
    def save_label_path_df(self):
        save_df_file_type(self.label_path_df, self.label_path_df_path, verbose=False)
                
    def retrieve_col_data(self, emb_paths, col_labels, verbose=True):
        # setup return variables
        labels = {}
        for col_label in col_labels:
            labels[col_label] = []
        # search through paths for labels
        pbar = None
        if verbose:
            pbar = tqdm_notebook(total=len(emb_paths))
        for emb_path in emb_paths['path']:
            emb_df = load_embeddings(emb_path, verbose=False)
            for col_label in col_labels:
                if col_label not in emb_df.columns:
                    raise ValueError("Target label " + str(col_label) + " is not in file at " + str(emb_path))
                # collect label values from df
                labs = list(emb_df[col_label])
                # put scalars in numpy arrays to fit keras output format
                for item in labs:
                    if np.isscalar(item):
                        item = np.array(item)
                labels[col_label].extend(labs)
            if verbose:
                pbar.update()
        return labels
    
    def generate_path(self, nested_dir_path, topic_id, col_label):
        filename = str(topic_id) + '_' + str(col_label) + ".pickle"  # e.g. 1_embeddings.pickle
        path = os.path.join(nested_dir_path, filename)
        return path

In [9]:
class NNTrainer:
    def __init__(self, proj_dir=None, nn_base_save_dir_name=None):
        if proj_dir is None:
            self.proj_dir = '/nfs/proj-repo/AAARG-dissertation'
        else:
            self.proj_dir = proj_dir
        self.input_handler = InputLabelHandler(self.proj_dir)
        self.nn_base_save_dir_name = nn_base_save_dir_name
        if self.nn_base_save_dir_name is None:
            self.nn_base_save_dir_name = "summarization_models"
        self.nn_base_save_dir_path = os.path.join(self.proj_dir, self.nn_base_save_dir_name)
        self.nn_path_df_name = "nn_path_df.hdf"
        self.nn_path_df_path = os.path.join(self.nn_base_save_dir_path, self.nn_path_df_name)
        self.nn_path_df_cols = ['corpus_name', 'nested_dir', 'col_label', 'dir_path']
        self.default_test_topics = [1,2,3,4,5,6,8,9,10]
    
    def train(self, corpus_names=None, nested_dirs=None, col_labels=None, tuning_iterations=100,
              train_topics = None, test_topics=None, input_col_name="embedding", force_reload=False, verbose=True):
        """
        1. Generate Data if needed
        2. Determine combinations to try
        3. Load combination
        4. Train network on it
        5. Generate summary on test topics
        5. Save tuned network, metrics, database entries
        
        """
        if test_topics is None:  # determine what topics to test on (rest is for training)
            test_topics = self.default_test_topics
            
        self.nn_path_df = self.load_nn_path_df(verbose=verbose)
        
        # generate data
        self.input_handler.generate(corpus_names=corpus_names, nested_dirs=nested_dirs, col_labels=col_labels,
                                   force_reload=force_reload, verbose=verbose)
        
        # get our dataset identifiers, used to load correct inputs/labels
        corpus_names, nested_dirs, col_labels = resolve_input_params(self.input_handler.path_ret,
                                                                     corpus_names, nested_dirs, col_labels,
                                                                    add_input_col=False)
        
        # load combination, train network
        for corpus_name in corpus_names:
            print("Training for corpus: " + str(corpus_name))
            for nested_dir in tqdm_notebook(nested_dirs):
                # check what col_labels for this nested_dir and corpus_name already trained
                untrained_cols = []
                if not force_reload:
                    trained_cols = self.nn_path_df
                    trained_cols = trained_cols[(trained_cols['corpus_name'] == corpus_name)
                                               & (trained_cols['nested_dir'] == nested_dir)]
                    trained_cols = list(trained_cols['col_label'])
                    # find untrained cols
                    untrained_cols = [x for x in col_labels if x not in trained_cols]
                else:
                    untrained_cols = col_labels  # if force_reload train all

                if len(untrained_cols) > 0:
                    # load inputs
                    if verbose:
                        print("Training neural networks on the following labels: " + str(untrained_cols))
                    topic_ids = self.input_handler.corpus_topic_ids(corpus_name)
                    if train_topics is None:  # then set train_topics to all not in test_topics
                        train_topics = [x for x in topic_ids if x not in test_topics]
                    X_input = self.input_handler.load(corpus_name, nested_dir, input_col_name, 
                                                      topic_ids=train_topics, verbose=verbose)
                    input_dim = len(X_input[0])
                    print("Training on nested_dir/emb_type: " + str(nested_dir))
                    
                    # train/tune a keras network with selected col_label
                    for col_label in tqdm_notebooks(untrained_cols):
                        print("Training on label type: " + str(col_label))
                        """
                        Add check here if not force_reload load pretrained model
                        Maybe some check if already generated summaries?
                        Exists column for partially trained networks? As in load before?

                        NOTE: Currently loading X and y full topic_ids, mistake

                        """
                        # load labels
                        y_label = self.input_handler.load(corpus_name, nested_dir, col_label,
                                                          topic_ids=train_topics, verbose=verbose)

                        save_dir, save_name = self.generate_nn_save_path(corpus_name, nested_dir, col_label,
                                                                        create_dir=True)
                        # generate optimised neural network
                        tuner = NNTuner(save_dir, save_name, tuning_iterations=tuning_iterations, 
                                        input_dim=input_dim, force_reload=force_reload)

                        best_model_path = os.paths.join(save_dir, "best_model")
                        best_model, best_hyperparams = tuner.search(X_input, y_labels, save_path=best_model_path, 
                                     return_hyperparams=True)

                        self.add_path_to_nn_path_df(corpus_name, nested_dir, input_col_name, col_label, save_dir,
                                                   save_name, best_hyperparams, best_model_path, verbose=verbose)
                else:
                    if verbose:
                        print("All target columns trained for " + str(nested_dir) + ", " + str(corpus_name))
    
    def load_nn_path_df(self, verbose=True):
        if os.path.exists(self.nn_path_df_path):
            nn_path_df = read_df_file_type(self.nn_path_df, verbose=verbose)
        else:
            nn_path_df = pd.DataFrame(columns=self.nn_path_df_cols)
            if verbose:
                print("nn_path_df created from scratch")
        return nn_path_df
    
    def add_to_nn_path_df(self, corpus_name, nested_dir, input_col_name, label_col_name, tuner_dir, tuner_name,
                          best_hyperparams, best_model_path, verbose=True):
        row = {"corpus_name":corpus_name, "nested_dir":nested_dir, "input_col_name":input_col_name,
              "label_col_names":label_col_name, "tuner_dir":tuner_dir, "tuner_name":tuner_name,
              "best_hyperparams":best_hyperparams, "best_model_path":best_model_path}
        
        self.nn_path_df = self.nn_path_df.append(row, ignore_index=True)
        save_df_file_type(self.nn_path_df, self.nn_path_df_path, verbose=verbose)
    
    def generate_nn_save_path(self, corpus_name, nested_dir, col_labels, create_dir=True):
        col_dir = "_".join(convert_to_list(col_labels))
        dir_list = [nn_base_save_dir_path, corpus_name, nested_dir, col_dir]
        # combine directories to form path of subdirectories, create dirs if necessary
        dir_path = None
        for cur_dir in dir_list:
            if dir_path is None:  # first iteration
                dir_path = dir_list[0]
            else:
                dir_path = os.paths.join(dir_path, cur_dir)
            if not os.path.exists(dir_path) and create_dir:
                os.makedirs(dir_path)
        # generate name
        save_name = "tuner_proj"
        return dir_path, save_name

In [10]:
corpus_names = ["mine-trects-kba2014-filtered"]

trainer = NNTrainer()

trainer.train(corpus_names=corpus_names, verbose=True, force_reload=False)

nn_path_df created from scratch
Loading label_path_df
loaded from .hdf file
Retrieving the following: cosine_similarity, cos_sim_nearest_nug, embedding


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Generating from corpus: mine-trects-kba2014-filtered


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

Using inputs from distilbert-base-nli-stsb-mean-tokens


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))

full trained for topic 1: ['cosine_similarity', 'cos_sim_nearest_nug', 'embedding']
full trained for topic 2: ['cosine_similarity', 'cos_sim_nearest_nug', 'embedding']
full trained for topic 3: ['cosine_similarity', 'cos_sim_nearest_nug', 'embedding']
full trained for topic 4: ['cosine_similarity', 'cos_sim_nearest_nug', 'embedding']
full trained for topic 5: ['cosine_similarity', 'cos_sim_nearest_nug', 'embedding']
full trained for topic 6: ['cosine_similarity', 'cos_sim_nearest_nug', 'embedding']
full trained for topic 8: ['cosine_similarity', 'cos_sim_nearest_nug', 'embedding']
full trained for topic 9: ['cosine_similarity', 'cos_sim_nearest_nug', 'embedding']
full trained for topic 10: ['cosine_similarity', 'cos_sim_nearest_nug', 'embedding']
full trained for topic 11: ['cosine_similarity', 'cos_sim_nearest_nug', 'embedding']
full trained for topic 12: ['cosine_similarity', 'cos_sim_nearest_nug', 'embedding']
full trained for topic 13: ['cosine_similarity', 'cos_sim_nearest_nug', '

HBox(children=(IntProgress(value=0, max=66), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/23_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/23_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/23_embedding.pickle


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block0_values] [items->Index(['corpus_name', 'nested_dir', 'topic_id', 'col_label', 'path'], dtype='object')]

  encoding=encoding,


HBox(children=(IntProgress(value=0, max=55), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/24_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/24_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/24_embedding.pickle


HBox(children=(IntProgress(value=0, max=25), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/25_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/25_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/25_embedding.pickle


HBox(children=(IntProgress(value=0, max=49), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/26_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/26_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/26_embedding.pickle


HBox(children=(IntProgress(value=0, max=6), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/27_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/27_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/27_embedding.pickle


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/28_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/28_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/28_embedding.pickle


HBox(children=(IntProgress(value=0, max=6), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/29_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/29_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/29_embedding.pickle


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/30_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/30_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/30_embedding.pickle


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/31_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/31_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/31_embedding.pickle


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/32_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/32_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/32_embedding.pickle


HBox(children=(IntProgress(value=0, max=24), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/33_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/33_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/33_embedding.pickle


HBox(children=(IntProgress(value=0, max=19), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/34_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/34_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/34_embedding.pickle


HBox(children=(IntProgress(value=0, max=34), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/35_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/35_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/35_embedding.pickle


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/36_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/36_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/36_embedding.pickle


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/37_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/37_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/37_embedding.pickle


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/38_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/38_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/38_embedding.pickle


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/39_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/39_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/39_embedding.pickle


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/40_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/40_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/40_embedding.pickle


HBox(children=(IntProgress(value=0, max=39), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/41_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/41_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/41_embedding.pickle


HBox(children=(IntProgress(value=0, max=14), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/42_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/42_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/42_embedding.pickle


HBox(children=(IntProgress(value=0, max=28), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/43_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/43_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/43_embedding.pickle


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/44_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/44_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/44_embedding.pickle


HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/45_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/45_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/45_embedding.pickle


HBox(children=(IntProgress(value=0, max=6), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/46_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/46_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/distilbert-base-nli-stsb-mean-tokens/46_embedding.pickle

Using inputs from stsb-roberta-base


HBox(children=(IntProgress(value=0, max=45), HTML(value='')))

HBox(children=(IntProgress(value=0, max=6), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/1_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/1_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/1_embedding.pickle


HBox(children=(IntProgress(value=0, max=8), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/2_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/2_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/2_embedding.pickle


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/3_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/3_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/3_embedding.pickle


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/4_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/4_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/4_embedding.pickle


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/5_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/5_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/5_embedding.pickle


HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/6_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/6_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/6_embedding.pickle


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/8_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/8_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/8_embedding.pickle


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/9_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/9_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/9_embedding.pickle


HBox(children=(IntProgress(value=0, max=9), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/10_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/10_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/10_embedding.pickle


HBox(children=(IntProgress(value=0, max=9), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/11_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/11_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/11_embedding.pickle


HBox(children=(IntProgress(value=0, max=15), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/12_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/12_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/12_embedding.pickle


HBox(children=(IntProgress(value=0, max=46), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/13_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/13_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/13_embedding.pickle


HBox(children=(IntProgress(value=0, max=39), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/14_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/14_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/14_embedding.pickle


HBox(children=(IntProgress(value=0, max=8), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/15_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/15_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/15_embedding.pickle


HBox(children=(IntProgress(value=0, max=7), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/16_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/16_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/16_embedding.pickle


HBox(children=(IntProgress(value=0, max=27), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/17_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/17_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/17_embedding.pickle


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/18_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/18_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/18_embedding.pickle


HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/19_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/19_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/19_embedding.pickle


HBox(children=(IntProgress(value=0, max=9), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/20_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/20_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/20_embedding.pickle


HBox(children=(IntProgress(value=0, max=43), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/21_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/21_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/21_embedding.pickle


HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/22_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/22_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/22_embedding.pickle


HBox(children=(IntProgress(value=0, max=66), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/23_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/23_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/23_embedding.pickle


HBox(children=(IntProgress(value=0, max=55), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/24_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/24_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/24_embedding.pickle


HBox(children=(IntProgress(value=0, max=25), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/25_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/25_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/25_embedding.pickle


HBox(children=(IntProgress(value=0, max=49), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/26_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/26_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/26_embedding.pickle


HBox(children=(IntProgress(value=0, max=6), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/27_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/27_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/27_embedding.pickle


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/28_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/28_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/28_embedding.pickle


HBox(children=(IntProgress(value=0, max=6), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/29_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/29_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/29_embedding.pickle


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/30_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/30_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/30_embedding.pickle


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/31_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/31_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/31_embedding.pickle


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/32_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/32_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/32_embedding.pickle


HBox(children=(IntProgress(value=0, max=24), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/33_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/33_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/33_embedding.pickle


HBox(children=(IntProgress(value=0, max=19), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/34_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/34_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/34_embedding.pickle


HBox(children=(IntProgress(value=0, max=34), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/35_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/35_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/35_embedding.pickle


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/36_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/36_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/36_embedding.pickle


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))


File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/37_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/37_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/37_embedding.pickle


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/38_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/38_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/38_embedding.pickle


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/39_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/39_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/39_embedding.pickle


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/40_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/40_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/40_embedding.pickle


HBox(children=(IntProgress(value=0, max=39), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/41_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/41_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/41_embedding.pickle


HBox(children=(IntProgress(value=0, max=14), HTML(value='')))

File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/42_cosine_similarity.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/42_cos_sim_nearest_nug.pickle
File saved to: /nfs/proj-repo/AAARG-dissertation/dataset/mine-trects-kba2014-filtered/embeddings/stsb-roberta-base/42_embedding.pickle


HBox(children=(IntProgress(value=0, max=28), HTML(value='')))

KeyboardInterrupt: 

In [None]:
# class NNTrainer:
#     def __init__(self, proj_dir=None):
#         if proj_dir is None:
#             self.proj_dir = '/nfs/proj-repo/AAARG-dissertation'
#         else:
#             self.proj_dir = proj_dir
#         self.paths = PathRetriever(self.proj_dir)
#         self.model_path = self.proj_dir + "/" + "test_nn"
        
#     def train(self, force_reload=False):
#         print("Creating NN")
#         if os.path.exists(self.model_path) and not force_reload:
#             nn = tf.keras.models.load_model(self.model_path)
#             print("loaded from file at " + str(self.model_path))
#         else:
#             tuner = NNTuner()
#             print("Getting X,y")
#             x,y = self.get_x_y()
#             print("Fitting NN")
#             nn = tuner.search(inputs=x,labels=y, save_path=self.model_path)
#             print("Completed fitting")
#         print("Comparing predictions")
#         results = self.compare_predict(nn)
#         print(self.format_results(results))
        
#     def compare_predict(self, nn):
#         knear = KNearest()
#         k_sents = knear.get_k_nearest(k=20)
#         results = []
        
#         embs = []
#         for index, sent in k_sents.iterrows():
#             emb = sent['embedding']
#             embs.append(emb)
#             result = [sent['cosine_similarity']]
#             results.append(result)
#         embs = np.asarray(embs)  # turn into matrix
#         preds = nn.predict(embs)
#         for result, pred in zip(results, preds):
#             result.append(pred)
        
# #         for index, sent in k_sents.iterrows():
# #             emb = sent['embedding']
# # #             emb = np.expand_dims(emb, axis=1)
# #             emb = (emb,)
# # #             print("shape emb: " + str(emb.shape))
# #             results[str(emb)] = []
# #             results[str(emb)].append(sent['cosine_similarity'])  # actual
# #             pred = nn.predict(emb)
# #             results[embedding].append(pred)  # prediction
        
#         return results
    
#     def format_results(self, results):
#         outstr = ""
#         for result in results:
#             outstr += "actual: " + str(result[0]) + "\n"
#             outstr += "pred: " + str(result[1]) + "\n"
#             outstr += "\n"
# #             outstr += str(emb) + "\n"
# #             outstr += "actual: " + str(result[0]) + "\n"
# #             outstr += "pred: " + str(result[1]) + "\n"
# #             outstr += "\n"
#         return outstr
        
#     def get_x_y(self):
#         corpus_name = "original-trects-kba2014-filtered"
#         nested_dir = 'distilbert-base-nli-stsb-mean-tokens'
#         x_y_paths = self.paths.get_embedding_paths(corpus_name, nested_dir)
#         x = []
#         y = []
#         for path in tqdm_notebook(list(x_y_paths['path'])):
#             emb_df = load_embeddings(path, verbose=False)
#             emb_x = list(emb_df['embedding'])
#             emb_y = list(emb_df['cosine_similarity'])
# #             # keras compatibility wrap singular floats in ndarrays
#             emb_y = [np.asarray(num) for num in emb_y]
#             x.extend(emb_x)
#             y.extend(emb_y)
#         x = np.asarray(x)
#         y = np.asarray(y)
#         return x, y

In [None]:
trainer = NNTrainer()
trainer.train(force_reload=True)

In [None]:
# class NeuralNetwork:
#     def __init__(self, nn_config, learning_rate=0.0001, input_dim=768, output_dim=1, loss_func="huber",
#                 epochs=10):
#         # nn structure params
#         self.nn_config = nn_config  # currently just list of layer sizes, can expand to include diff types layers
#         self.learning_rate = learning_rate
#         self.input_dim = input_dim
#         self.output_dim = output_dim
#         self.loss_func = loss_func
        
#         # nn fit execution params
#         self.epochs = epochs
        
#         self.model = self.build_model()
        
#     def build_model(self):
#         model = Sequential()
#         ilayer = InputLayer(input_shape=(self.input_dim,))
#         model.add(ilayer)
#         for num_neurons in self.nn_config:
#             # GRU has better memory performance
#             # use tanh bc cos similarity is between -1 and 1
#             model.add(Dense(num_neurons, activation='tanh'))  
#         # output layer
# #         model.add(Dense(self.output_dim, activation='tanh'))
#         # build model
#         opt = keras.optimizers.Adam(learning_rate=self.learning_rate)
#         model.compile(loss=self.loss_func, optimizer=opt)
#         return model
    
#     def fit(self, X, y=None, save_path=None):
#         if y is not None:
#             self.model.fit(x=X, y=y, epochs=self.epochs, verbose=1,
#                           use_multiprocessing=True, workers=32)
#         else:
#             self.model.fit(x=X, epochs=self.epochs, verbose=1,
#                           use_multiprocessing=True, workers=32)
#         if save_path is not None:
#             self.model.save(save_path)
    
#     def predict(self, s, a=None):              
#         if a==None:            
#             return self._predict_nn(s)
#         else:                        
#             return self._predict_nn(s)[a]
        
#     def _predict_nn(self,state_hat):                          
#         """
#         Predict the output of the neural network (note: these can be vectors)
#         """                
#         x = self.model.predict(state_hat)                                                    
#         return x

In [None]:
# class NNTrainer:
#     def __init__(self, proj_dir=None):
#         if proj_dir is None:
#             self.proj_dir = '/nfs/proj-repo/AAARG-dissertation'
#         else:
#             self.proj_dir = proj_dir
#         self.paths = PathRetriever(self.proj_dir)
#         self.nn_config = [752, 128]
#         self.model_path = self.proj_dir + "/" + "test_nn"
        
#     def train(self, force_reload=False):
#         print("Creating NN")
#         if os.path.exists(self.model_path) and not force_reload:
#             nn = tf.keras.models.load_model(self.model_path)
#             print("loaded from file at " + str(self.model_path))
#         else:
#             nn = HyperNNs(self.nn_config, epochs=1)
#             print("Getting X,y")
#             x,y = self.get_x_y()
#             print("Fitting NN")
#             nn.fit(x,y=y, save_path=self.model_path)
#             print("Completed fitting")
#         print("Comparing predictions")
#         results = self.compare_predict(nn)
#         print(self.format_results(results))
        
#     def compare_predict(self, nn):
#         knear = KNearest()
#         k_sents = knear.get_k_nearest(k=20)
#         results = []
        
#         embs = []
#         for index, sent in k_sents.iterrows():
#             emb = sent['embedding']
#             embs.append(emb)
#             result = [sent['cosine_similarity']]
#             results.append(result)
#         embs = np.asarray(embs)  # turn into matrix
#         preds = nn.predict(embs)
#         for result, pred in zip(results, preds):
#             result.append(pred)
        
# #         for index, sent in k_sents.iterrows():
# #             emb = sent['embedding']
# # #             emb = np.expand_dims(emb, axis=1)
# #             emb = (emb,)
# # #             print("shape emb: " + str(emb.shape))
# #             results[str(emb)] = []
# #             results[str(emb)].append(sent['cosine_similarity'])  # actual
# #             pred = nn.predict(emb)
# #             results[embedding].append(pred)  # prediction
        
#         return results
    
#     def format_results(self, results):
#         outstr = ""
#         for result in results:
#             outstr += "actual: " + str(result[0]) + "\n"
#             outstr += "pred: " + str(result[1]) + "\n"
#             outstr += "\n"
# #             outstr += str(emb) + "\n"
# #             outstr += "actual: " + str(result[0]) + "\n"
# #             outstr += "pred: " + str(result[1]) + "\n"
# #             outstr += "\n"
#         return outstr
        
#     def get_x_y(self):
#         corpus_name = "original-trects-kba2014-filtered"
#         nested_dir = 'distilbert-base-nli-stsb-mean-tokens'
#         x_y_paths = self.paths.get_embedding_paths(corpus_name, nested_dir)
#         x = []
#         y = []
#         for path in tqdm_notebook(list(x_y_paths['path'])):
#             emb_df = load_embeddings(path, verbose=False)
#             emb_x = list(emb_df['embedding'])
#             emb_y = list(emb_df['cosine_similarity'])
# #             # keras compatibility wrap singular floats in ndarrays
#             emb_y = [np.asarray(num) for num in emb_y]
#             x.extend(emb_x)
#             y.extend(emb_y)
#         x = np.asarray(x)
#         y = np.asarray(y)
#         return x, y

In [None]:
trainer = NNTrainer()
trainer.train(force_reload=True)

## Simple K-Nearest

In [None]:
class KNearest:
    def __init__(self, proj_dir=None):
        if proj_dir is None:
            self.proj_dir = '/nfs/proj-repo/AAARG-dissertation'
        self.paths = PathRetriever(self.proj_dir)
    
    def get_k_nearest(self, k=10):
        topic_ids = None
        emb_paths = self.get_emb_paths(topic_ids=topic_ids)
        
        emb_df = []
        for path in tqdm_notebook(list(emb_paths['path'])):
            add_df = load_embeddings(path, verbose=False)
#             debug = add_df[0:1]
#             debug = list(debug['embedding'])[0]
#             print("type emb: " + str(type(debug)))
#             print("emb shape: " + str(debug.shape))
            emb_df.append(load_embeddings(path, verbose=False))
        emb_df = pd.concat(emb_df, ignore_index=True)

        k_sents = emb_df.nlargest(k, columns=['cosine_similarity'])
#         top_emb = k_sents.iloc[0]['embedding']
#         print(top_emb)
#         print(display(k_sents))
        return k_sents
        
        
#     def tokens_embs(self):
#         """Retrieve sentences and their embeddings"""
#         emb_paths = self.get_emb_paths()
#         toks = []
#         embs = []
        
#         for path in tqdm_notebook(list(emb_paths['path'])):
#             emb_df = load_embeddings(path, verbose=False)
#             toks.extend(list(emb_df['sentence']))
#             embs.extend(list(emb_df['embedding']))
#         return toks, embs
        
            
    def get_emb_paths(self, topic_ids=None):
        corpus_name = "original-trects-kba2014-filtered"
        nested_dir = 'distilbert-base-nli-stsb-mean-tokens'
        emb_paths = self.paths.get_embedding_paths(corpus_name, nested_dir, topic_ids=topic_ids)
        return emb_paths

In [None]:
knear = KNearest()
knear.get_k_nearest()

In [None]:
arr = np.array([1,5,6,2,2])
arr = np.expand_dimensions(arr)
print(arr.shape)