Install dependencies

!pip3 install -r requirements.txt -U
!python3 -m spacy download en_core_web_md

Suppress warnings

In [1]:
import warnings
warnings.filterwarnings('ignore')

Import core lib

In [2]:
import sys
sys.path.append('./core')

Import dependencies

In [3]:
from models.predicate_extractor import PredicateExtractor

[nltk_data] Downloading package stopwords to /Users/toor/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
import numpy as np
import pandas as pd
import pickle
import re
import os
from collections import Counter
import matplotlib
import matplotlib.pyplot as plt
#import random

In [5]:
import ray
import ray.tune as tune
from ray.tune.schedulers import AsyncHyperBandScheduler, HyperBandScheduler
import multiprocessing

In [6]:
import tensorflow as tf
import tf_metrics
from tensorflow.core.util import event_pb2
from tensorflow.python.lib.io import tf_record
from sklearn.model_selection import KFold
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
import sklearn.preprocessing as preprocessing
from imblearn import over_sampling, under_sampling, combine

Using TensorFlow backend.


Suppress tensorflow warnings

In [7]:
tf.get_logger().setLevel('ERROR')

Define constants

In [8]:
USE_TEST_SET = False
USE_PATTERN_EMBEDDING = True

ZERO_CLASS = 'none'
LABELS_TO_EXCLUDE = [
	#'cites',
	'cites_as_review',
	#'extends', 
	#'uses_data_from', 
	#'uses_method_in',
]

TRAIN_EPOCHS = None
MAX_STEPS = 10**4
EVALUATION_PER_TRAINING = 30
EVALUATION_STEPS = MAX_STEPS/EVALUATION_PER_TRAINING
MODEL_DIR = './model'
TF_MODEL = 'USE_MLQA'
MODEL_OPTIONS = {'tf_model':TF_MODEL, 'use_lemma':False}

Define function for converting input datasets from csv to pandas dataframe

In [9]:
def get_dataframe(dataset_file):
	# Load dataset
	df = pd.read_csv(dataset_file, sep='	')
	#print(df.dtypes)

	# Get target values list
	df['citfunc'].replace(np.NaN, 'none', inplace=True)
	df['citfunc'] = df['citfunc'].map(lambda x: x.strip())
	# Remove rows with excluded labels
	for label in LABELS_TO_EXCLUDE:
		df.loc[df.citfunc == label, 'citfunc'] = ZERO_CLASS
	# Remove bad rows
	df['citfunc'].replace('ERROR', 'none', inplace=True)
	df = df[df.citfunc != 'none']
	# Extract target list
	target_list = df.pop('citfunc').values.tolist()

	# Extract features from dataframe
	df = df[['anchorsent','sectype']]
	
	# Remove null values
	df['anchorsent'].replace(np.NaN, '', inplace=True)
	df['sectype'].replace(np.NaN, 'none', inplace=True)

	df = df[df.anchorsent != '']
	df['anchorsent'] = df['anchorsent'].map(lambda x: re.sub(r'\[\[.*\]\]','',x))
	df['anchorsent'] = df['anchorsent'].map(lambda x: re.sub(r'[^\x00-\x7F]+',' ',x))
	df['anchorsent'] = df['anchorsent'].map(lambda x: re.sub(r"^'(.*)'$",r'\1',x))

	# Print dataframe
	print('Dataframe')
	print(df)
	
	# Return dataset
	feature_list = df.columns.values.tolist()
	x_dict = {feature: df[feature].tolist() for feature in feature_list}
	y_list = target_list
	return {'x':x_dict, 'y':y_list}

Define function for casting dataset to numpy arrays

In [10]:
def numpyfy_dataset(set):
	set['x'] = {k: np.array(v) for k,v in set['x'].items()}
	set['y'] = np.array(set['y'])

Define function for encoding a dataset, from string to numerical representations

In [11]:
def encode_dataset(dataset):
	# Embed anchor sentences into vectors
	for key,value in dataset.items():
		df = value['x']
		if USE_PATTERN_EMBEDDING:
			df['main_predicate'] = df['anchorsent']
		# Embed anchor sentences
		cache_file = f'{TF_MODEL}.{key}.anchorsent.embedding_cache.pkl'
		if os.path.isfile(cache_file):
			with open(cache_file, 'rb') as f:
				embedded_sentences = pickle.load(f)
		else:
			MODEL_MANAGER = PredicateExtractor(MODEL_OPTIONS)
			df['anchorsent'] = list(df['anchorsent'])
			embedded_sentences = MODEL_MANAGER.embed(df['anchorsent'])
			with open(cache_file, 'wb') as f:
				pickle.dump(embedded_sentences, f)
		df['anchorsent'] = embedded_sentences
		# Embed extra info
		if USE_PATTERN_EMBEDDING:
			cache_file = f'{TF_MODEL}.{key}.extra.embedding_cache.pkl'
			if os.path.isfile(cache_file):
				with open(cache_file, 'rb') as f:
					embedded_extra = pickle.load(f)
			else:
				MODEL_MANAGER = PredicateExtractor(MODEL_OPTIONS)
				extra_list = []
				for text in df['main_predicate']:
					extra = list(Counter(pattern['predicate'] for pattern in MODEL_MANAGER.get_pattern_list(text)).keys())
					extra_list.append(extra[0] if len(extra)>0 else '')
				embedded_extra = MODEL_MANAGER.embed(extra_list)
				with open(cache_file, 'wb') as f:
					pickle.dump(embedded_extra, f)
			df['main_predicate'] = embedded_extra

	# Encode labels
	label_encoder_target = LabelEncoder()
	label_encoder_target.fit([e for set in dataset.values() for e in set['y']])
	print('Label classes:', list(label_encoder_target.classes_))
	for set in dataset.values():
		set['y'] = label_encoder_target.transform(set['y'])

	# Encode sectypes
	all_sectypes = [e for set in dataset.values() for e in set['x']['sectype']]
	label_encoder_sectype = LabelEncoder()
	all_sectypes = label_encoder_sectype.fit_transform(all_sectypes)
	onehot_encoder_sectype = OneHotEncoder()
	onehot_encoder_sectype.fit(all_sectypes.reshape(-1, 1))
	print('Sectype classes:', list(label_encoder_sectype.classes_))
	for set in dataset.values():
		labeled_sectypes = label_encoder_sectype.transform(set['x']['sectype'])
		set['x']['sectype'] = onehot_encoder_sectype.transform(labeled_sectypes.reshape(-1, 1)).toarray()[:,1:]

	# Input features to numpy array
	for set in dataset.values():
		numpyfy_dataset(set)
	# Return number of target classes
	return len(label_encoder_target.classes_)

Define function for resampling the dataset

In [12]:
def resample_dataset(set, resampling_fn=None):
	if resampling_fn is None:
		return
	#numpyfy_dataset(set)
	print('Dataset size before re-sampling:', len(set['y']))

	# Build combined features
	combined_features_sizes = {}
	combined_features_list = []
	for feature in zip(*set['x'].values()):
		combined_features = []
		for e,data in enumerate(feature):
			if type(data) in [np.ndarray,list,tuple]:
				data_list = list(data)
				combined_features.extend(data_list)
				combined_features_sizes[e] = (len(data_list), type(data[0]))
			else:
				combined_features.append(data)
				combined_features_sizes[e] = (1, type(data))
		combined_features_list.append(combined_features)
	#print(combined_features_list[0])

	# Re-sample data
	combined_features_list = np.array(combined_features_list, dtype=np.object)
	#combined_features_list, set['y'] = over_sampling.RandomOverSampler(sampling_strategy='all').fit_sample(combined_features_list, set['y'])
	combined_features_list, set['y'] = resampling_fn().fit_sample(combined_features_list, set['y'])

	# Separate features
	new_combined_features_list = []
	for combined_features in combined_features_list:
		new_combined_features = []
		start = 0
		for e,(size,dtype) in combined_features_sizes.items():
			feature = combined_features[start:start+size]
			if size > 1:
				#feature = np.array(feature, dtype=dtype)
				feature = np.array(feature, dtype=np.float32)
			else:
				feature = feature[0]
			new_combined_features.append(feature)
			start += size
		new_combined_features_list.append(new_combined_features)
	#print(new_combined_features_list[0])
	separated_features = list(zip(*new_combined_features_list))

	for feature, value in zip(set['x'].keys(), separated_features):
		set['x'][feature] = value
	print('Dataset size after re-sampling:', len(set['y']))
	numpyfy_dataset(set)

Define function for getting the dataframe feature shapes

In [13]:
def get_dataframe_feature_shape(df, feature):
	first_element = df[feature][0]
	shape = first_element.shape if type(first_element) is np.ndarray else ()
	return tf.feature_column.numeric_column(feature, shape=shape)

Define function to convert a data-set into a data-list

In [14]:
def listify_dataset(dataset):
	dataset_xs = zip(*dataset['x'].values())
	dataset_xs = map(lambda x: tuple((k,v) for k,v in zip(dataset['x'].keys(),x)), dataset_xs)
	return list(zip(dataset_xs, dataset['y']))

Define function to convert a data-set into a data-list

In [15]:
def dictify_datalist(datalist):
	xs, y = zip(*datalist)
	y_list = np.array(y)
	xs = zip(*xs)
	xs_dict = {}
	for x_tuples in xs:
		feature_names, x_tuples = zip(*x_tuples)
		feature = feature_names[0]
		xs_dict[feature] = np.array(x_tuples)
		#print(feature, len(xs_dict[feature]))
	#print('y', len(y_list))
	return {
		'x': xs_dict,
		'y': y_list
	}

Define the DNN classifier model

In [16]:
def build_model_fn(feature_columns, n_classes, config):
	def model_fn(
		features, # This is batch_features from input_fn
		labels,   # This is batch_labels from input_fn
		mode):	# And instance of tf.estimator.ModeKeys, see below

		if mode == tf.estimator.ModeKeys.PREDICT:
			tf.logging.info("my_model_fn: PREDICT, {}".format(mode))
		elif mode == tf.estimator.ModeKeys.EVAL:
			tf.logging.info("my_model_fn: EVAL, {}".format(mode))
		elif mode == tf.estimator.ModeKeys.TRAIN:
			tf.logging.info("my_model_fn: TRAIN, {}".format(mode))

		# Create the layer of input
		input_layer = tf.feature_column.input_layer(features, feature_columns)
		#input_layer = tf.expand_dims(input_layer, 1)

		input_layer = tf.layers.Dense(config['UNITS'], #3, padding='same',
			activation=config['ACTIVATION_FUNCTION'], 
			#kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.003)
		)(input_layer)

		input_layer = tf.layers.Dropout()(input_layer)
		#input_layer = tf.layers.Flatten()(input_layer)

		logits = tf.layers.Dense(n_classes, 
			#kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.003)
		)(input_layer)

		# class_ids will be the model prediction for the class (Iris flower type)
		# The output node with the highest value is our prediction
		def sample(logits, random=True):
			if random:
				u = tf.random_uniform(tf.shape(logits), dtype=logits.dtype)
				logits -= tf.log(-tf.log(u))
			return tf.argmax(logits, axis=1)

		predictions = { 'class_ids': sample(logits, random=False) }

		# 1. Prediction mode
		# Return our prediction
		if mode == tf.estimator.ModeKeys.PREDICT:
			return tf.estimator.EstimatorSpec(mode, predictions=predictions)

		# Evaluation and Training mode

		# Calculate the loss
		loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
		loss += tf.losses.get_regularization_loss()

		# Calculate the accuracy between the true labels, and our predictions
		y_true=labels
		y_pred=predictions['class_ids']
		average_type_list = ['micro','macro','weighted']
		metrics = {}
		for average in average_type_list:
			metrics[f'precision_{average}'] = tf_metrics.precision(y_true, y_pred, n_classes, average=average)
			metrics[f'recall_{average}'] = tf_metrics.recall(y_true, y_pred, n_classes, average=average)
			metrics[f'f1_{average}'] = tf_metrics.f1(y_true, y_pred, n_classes, average=average)

		# 2. Evaluation mode
		# Return our loss (which is used to evaluate our model)
		# Set the TensorBoard scalar my_accurace to the accuracy
		# Obs: This function only sets value during mode == ModeKeys.EVAL
		# To set values during training, see tf.summary.scalar
		if mode == tf.estimator.ModeKeys.EVAL:
			return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)

		# If mode is not PREDICT nor EVAL, then we must be in TRAIN
		assert mode == tf.estimator.ModeKeys.TRAIN, "TRAIN is only ModeKey left"

		# 3. Training mode

		# Default optimizer for DNNClassifier: Adagrad with learning rate=0.05
		# Our objective (train_op) is to minimize loss
		# Provide global step counter (used to count gradient updates)
		#optimizer = tf.train.AdagradOptimizer(0.05)
		#optimizer = tf.train.AdamOptimizer()
		optimizer = tf.train.ProximalAdagradOptimizer(learning_rate=config['LEARNING_RATE'], l2_regularization_strength=config['REGULARIZATION_STRENGTH'])
		train_op = optimizer.minimize(loss, global_step=tf.train.get_or_create_global_step())

		# For Tensorboard
		for metric_name, metric in metrics.items():
			tf.summary.scalar(metric_name, metric[1])

		# Return training operations: loss and train_op
		return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
	return model_fn

Define function for extracting summaries (statistics) from tensorboard events

In [17]:
def get_document_list(directory):
	doc_list = []
	for obj in os.listdir(directory):
		obj_path = os.path.join(directory, obj)
		if os.path.isfile(obj_path):
			doc_list.append(obj_path)
		elif os.path.isdir(obj_path):
			doc_list.extend(get_document_list(obj_path))
	return doc_list

In [18]:
def get_summary_results(summary_dir):
	def my_summary_iterator(path):
		for r in tf_record.tf_record_iterator(path):
			yield event_pb2.Event.FromString(r)

	result_list = []
	document_list = get_document_list(summary_dir)
	#print(document_list)
	for filename in document_list:
		print(filename)
		if not os.path.basename(filename).startswith('events.'):
			continue
		value_dict = {}
		for event in my_summary_iterator(filename):
			for value in event.summary.value:
				tag = value.tag
				if tag not in value_dict:
					value_dict[tag]=[]
				value_dict[tag].append((event.step, value.simple_value))
		result_list.append({'event_name':filename, 'results':value_dict})
	return result_list

Define function for training and evaluating

In [19]:
def train_and_evaluate(config, trainset, testset, num_epochs, batch_size, max_steps, model_dir, feature_columns, n_classes):
	# Create a custom estimator using model_fn to define the model
	tf.logging.info("Before classifier construction")
	run_config = tf.estimator.RunConfig(
		model_dir=model_dir,
		#save_checkpoints_secs=EVALUATION_SECONDS, 
		save_checkpoints_steps=EVALUATION_STEPS,
		#keep_checkpoint_max=3,
	)
	estimator = tf.estimator.Estimator(
		model_fn=build_model_fn(feature_columns, n_classes, config),
		config=run_config,
	)
	tf.logging.info("...done constructing classifier")

	# Build train input callback
	train_input_fn = tf.estimator.inputs.numpy_input_fn(
		x=trainset['x'],
		y=trainset['y'],
		num_epochs=num_epochs,
		batch_size=batch_size,
		shuffle=True
	)
	# Build test input callback
	test_input_fn = tf.estimator.inputs.numpy_input_fn(
		x=testset['x'],
		y=testset['y'],
		num_epochs=1,
		batch_size=batch_size,
		shuffle=False
	)

	train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=max_steps)
	eval_spec = tf.estimator.EvalSpec(input_fn=test_input_fn, steps=EVALUATION_STEPS, start_delay_secs=0, throttle_secs=0)

	tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

Define function for plotting summary results

In [20]:
def plot_summary_results(summary_results):
	plt.clf()
	plt_height = len(summary_results)
	_, axes = plt.subplots(nrows=plt_height, sharex=True, figsize=(14,15*plt_height))
	for e, (stat, value_list) in enumerate(summary_results.items()):
		ax = axes[e]
		#ax.set_ylim([0, 1])
		#ax.set_yticks(value_list)
		step_list,value_list=zip(*value_list)
		ax.plot(step_list, value_list)
		ax.set(xlabel='step', ylabel=stat)
		ax.grid()
	plt.show()

Define function for cross-validating the model

In [21]:
def build_cross_validate_model(datalist):
	def get_best_stat_dict(summary_results_list):
		best_stat_dict = {}
		for summary_results in summary_results_list:
			for stat, value_list in summary_results.items():
				_,value_list=zip(*value_list)
				if not re.search(r'(f1|precision|recall)', stat):
					continue
				if stat not in best_stat_dict:
					best_stat_dict[stat] = []
				best_stat_dict[stat].append(np.mean(sorted(value_list, reverse=True)[:3]))
		for stat,best_list in best_stat_dict.items():
			best_stat_dict[stat] = {'mean':np.mean(best_list), 'std':np.std(best_list)}
		return best_stat_dict

	def cross_validate_model(config, reporter):
		# Perform k-fold cross-validation
		summary_results_list = []
		cross_validation = KFold(n_splits=config["N_SPLITS"], shuffle=True, random_state=1)
		for e, (train_index, test_index) in enumerate(cross_validation.split(datalist)):
			print(f'-------- Fold {e} --------')
			print(f'Train-set {e} indexes {train_index}')
			print(f'Test-set {e} indexes {test_index}')
			# Split training and test set
			trainlist = [datalist[u] for u in train_index]
			trainset = dictify_datalist(trainlist)
			# Re-sample training set (after sentences embedding)
			resample_dataset(trainset, resampling_fn=config["RESAMPLING_FN"])
			print(f'Train-set {e} distribution', Counter(trainset['y']))
			testlist = [datalist[u] for u in test_index]
			testset = dictify_datalist(testlist)
			print(f'Test-set {e} distribution', Counter(testset['y']))

			#config_str = '_'.join(f'{key}={value if not callable(value) else value.__name__}' for key,value in config.items())
			model_dir = f'{MODEL_DIR}{e}'#'-{config_str}'
			train_and_evaluate(
				config=config,
				trainset=trainset, 
				testset=testset, 
				num_epochs=TRAIN_EPOCHS, 
				batch_size=config["BATCH_SIZE"], 
				max_steps=MAX_STEPS, 
				model_dir=model_dir, 
				feature_columns=feature_columns, 
				n_classes=n_classes
			)
			summary_results = get_summary_results(f'./{model_dir}/eval')
			summary_results = summary_results[-1]['results']
			summary_results_list.append(summary_results)
			print(f'Test-set {e} results:', summary_results)
			best_stat_dict = get_best_stat_dict(summary_results_list)
			reporter(
				timesteps_total=e, 
				# F1 scores
				f1_macro_mean=best_stat_dict["f1_macro"]["mean"],
				f1_macro_std=best_stat_dict["f1_macro"]["std"],
				f1_micro_mean=best_stat_dict["f1_micro"]["mean"],
				f1_micro_std=best_stat_dict["f1_micro"]["std"],
				f1_weighted_mean=best_stat_dict["f1_weighted"]["mean"],
				f1_weighted_std=best_stat_dict["f1_weighted"]["std"],
				# Precision scores
				precision_macro_mean=best_stat_dict["precision_macro"]["mean"],
				precision_macro_std=best_stat_dict["precision_macro"]["std"],
				precision_micro_mean=best_stat_dict["precision_micro"]["mean"],
				precision_micro_std=best_stat_dict["precision_micro"]["std"],
				precision_weighted_mean=best_stat_dict["precision_weighted"]["mean"],
				precision_weighted_std=best_stat_dict["precision_weighted"]["std"],
				# Recall scores
				recall_macro_mean=best_stat_dict["recall_macro"]["mean"],
				recall_macro_std=best_stat_dict["recall_macro"]["std"],
				recall_micro_mean=best_stat_dict["recall_micro"]["mean"],
				recall_micro_std=best_stat_dict["recall_micro"]["std"],
				recall_weighted_mean=best_stat_dict["recall_weighted"]["mean"],
				recall_weighted_std=best_stat_dict["recall_weighted"]["std"],
			)
			print(f'Average best statistics at fold {e}: {best_stat_dict}')
	return cross_validate_model

Load dataset 1

In [22]:
trainset = get_dataframe('training_all.csv')

Dataframe
                                             anchorsent       sectype
0     In summary, the open nature of the Internet as...  introduction
1     Finally, a common data transformation method u...          none
3     Hucaljuk and Rakipovic [15] included an expert...       results
4     The first step in the modelling process is to ...       results
5     Hucaljuk and Rakipovic [15] used a separate ex...       results
...                                                 ...           ...
1552  To support feature computation and combination...          none
1553  To query the lexical databases we use the open...          data
1554  To compute the score, we use a series of featu...          data
1555  Using the strategy described in Section 3.3, w...          none
1556  It is computed by comparing all patents in the...          none

[1376 rows x 2 columns]


Load dataset 2

In [23]:
testset = get_dataframe('test_groundtruth_all.csv')

Dataframe
                                            anchorsent       sectype
0    This is common in software design when the UI ...  introduction
1    The most related items intersect first, and th...          none
2    In this study, participants performed a hierar...          none
3    Card sorting software, xSort (Arroz, 2008) was...          none
4    The concept of using PA to analyze SD evaluati...  introduction
..                                                 ...           ...
295  Data words are commonly studied in XML literat...          none
296  It was shown in [7] that the language L={(ad1)...          none
297  Originally they were defined on words over inf...          none
298  What all of these languages (with the sole exc...  introduction
299  Note that besides the operators in Definition ...          none

[289 rows x 2 columns]


Encode dataset

In [None]:
n_classes = encode_dataset({'train':trainset, 'test':testset})

Label classes: ['cites', 'extends', 'uses_data_from', 'uses_method_in']
Sectype classes: ['acknowledgements', 'background', 'conclusion', 'data', 'discussion', 'introduction', 'materials', 'methods', 'model', 'motivation', 'none', 'related work', 'results', 'scenario']


Get feature columns

In [None]:
feature_columns = [get_dataframe_feature_shape(trainset['x'],feature) for feature in trainset['x'].keys()]
print(feature_columns)

[NumericColumn(key='anchorsent', shape=(512,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='sectype', shape=(13,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='main_predicate', shape=(512,), default_value=None, dtype=tf.float32, normalizer_fn=None)]


Merge dataset 1 and 2, because they have different distributions and thus we have to build new train and test sets. Before mergin we convert the datasets into datalists, this way we can easily shuffle them.

In [None]:
trainlist = listify_dataset(trainset)
if USE_TEST_SET:
	testlist = listify_dataset(testset)
	datalist = trainlist + testlist
else:
	datalist = trainlist

Initialize ray

In [None]:
ray.init(num_cpus=multiprocessing.cpu_count())

2019-11-17 16:44:14,448	INFO resource_spec.py:205 -- Starting Ray with 1.9 GiB memory available for workers and up to 0.95 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).


{'node_ip_address': '172.20.10.13',
 'redis_address': '172.20.10.13:41115',
 'object_store_address': '/tmp/ray/session_2019-11-17_16-44-14_442941_4310/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2019-11-17_16-44-14_442941_4310/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2019-11-17_16-44-14_442941_4310'}

N.B. Do not use code imported with sys.path.append inside ray distributed code: https://stackoverflow.com/questions/54338013/parallel-import-a-python-file-from-sibling-folder

Perform automatic hyper-parameters tuning

In [None]:
experiment_name = 'hp_tuning'
local_dir = os.path.join('.','ray_results')
analysis = tune.run( # https://ray.readthedocs.io/en/latest/tune-package-ref.html#ray.tune.run
    build_cross_validate_model(datalist),
    num_samples=1, # Number of times to sample from the hyperparameter space. Defaults to 1. If grid_search is provided as an argument, the grid will be repeated num_samples of times.
    name=experiment_name,
    local_dir=local_dir,
    resume=os.path.isdir(os.path.join(local_dir,experiment_name)),
    #global_checkpoint_period=15*60,
    #keep_checkpoints_num=3,
    config={ 
        "N_SPLITS": tune.grid_search([
            #3,
            #4,
            5,
        ]), 
        "RESAMPLING_FN": tune.grid_search([
            None,
            #combine.SMOTEENN, 
            combine.SMOTETomek, 
            over_sampling.RandomOverSampler,
            over_sampling.SMOTE,
            over_sampling.ADASYN,
            #under_sampling.RandomUnderSampler,
            #under_sampling.EditedNearestNeighbours,
            under_sampling.TomekLinks,
        ]),
        "BATCH_SIZE": tune.grid_search([
            #2,
            3, 
            4,
        ]),
        'UNITS': tune.grid_search([
            4, 
            6, 
            8, 
            10,
            12,
        ]),
        'ACTIVATION_FUNCTION': tune.grid_search([
            #None,
            #tf.nn.relu,
            tf.nn.leaky_relu,
            #tf.nn.selu,
            #tf.nn.tanh,
        ]),
        #'LEARNING_RATE': tune.sample_from(lambda spec: 0.1*3*random.random()),
        'LEARNING_RATE': tune.grid_search([
            #0.3,
            0.1,
            #0.03,
            #0.01,
        ]),
        'REGULARIZATION_STRENGTH': tune.grid_search([
            #0.01,
            0.003,
            0.001,
            #0.0003,
            #0.0001,
        ]),
    },
    scheduler=AsyncHyperBandScheduler(
        metric='f1_macro_mean',
        mode='max',
    )
)

2019-11-17 16:44:23,511	INFO trial_runner.py:170 -- Resuming trial.
2019-11-17 16:44:23,512	INFO trial_runner.py:241 -- TrialRunner resumed, ignoring new add_experiment.


== Status ==
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 64.000: None | Iter 16.000: None | Iter 4.000: None | Iter 1.000: None
Resources requested: 0/4 CPUs, 0/0 GPUs, 0.0/1.9 GiB heap, 0.0/0.63 GiB objects
Memory usage on this node: 5.1/8.0 GiB
Result logdir: /Users/toor/Documents/University/PhD/Project/SCAR/software/experiment/DL for SCAR/code/ray_results/hp_tuning
Number of trials: 120 ({'PENDING': 120})
PENDING trials:
 - cross_validate_model_0_ACTIVATION_FUNCTION=<function leaky_relu at 0x13a2882f0>,BATCH_SIZE=3,LEARNING_RATE=0.1,N_SPLITS=5,REGULARIZATION_STRENGTH=0.003,RESAMPLING_FN=None,UNITS=4:	PENDING
 - cross_validate_model_1_ACTIVATION_FUNCTION=<function leaky_relu at 0x13a2882f0>,BATCH_SIZE=4,LEARNING_RATE=0.1,N_SPLITS=5,REGULARIZATION_STRENGTH=0.003,RESAMPLING_FN=None,UNITS=4:	PENDING
 - cross_validate_model_2_ACTIVATION_FUNCTION=<function leaky_relu at 0x13a2882f0>,BATCH_SIZE=3,LEARNING_RATE=0.1,N_SPLITS=5,REGULARIZATION_STRENGTH=0.001,RESAMPLING_FN=None,UNITS=4:	P

== Status ==
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 64.000: None | Iter 16.000: None | Iter 4.000: None | Iter 1.000: None
Resources requested: 1/4 CPUs, 0/0 GPUs, 0.0/1.9 GiB heap, 0.0/0.63 GiB objects
Memory usage on this node: 5.1/8.0 GiB
Result logdir: /Users/toor/Documents/University/PhD/Project/SCAR/software/experiment/DL for SCAR/code/ray_results/hp_tuning
Number of trials: 120 ({'RUNNING': 1, 'PENDING': 119})
PENDING trials:
 - cross_validate_model_1_ACTIVATION_FUNCTION=<function leaky_relu at 0x13a2882f0>,BATCH_SIZE=4,LEARNING_RATE=0.1,N_SPLITS=5,REGULARIZATION_STRENGTH=0.003,RESAMPLING_FN=None,UNITS=4:	PENDING
 - cross_validate_model_2_ACTIVATION_FUNCTION=<function leaky_relu at 0x13a2882f0>,BATCH_SIZE=3,LEARNING_RATE=0.1,N_SPLITS=5,REGULARIZATION_STRENGTH=0.001,RESAMPLING_FN=None,UNITS=4:	PENDING
 - cross_validate_model_3_ACTIVATION_FUNCTION=<function leaky_relu at 0x13a2882f0>,BATCH_SIZE=4,LEARNING_RATE=0.1,N_SPLITS=5,REGULARIZATION_STRENGTH=0.001,RESAMPLING_FN=N

[2m[36m(pid=4324)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=4324)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=4324)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=4324)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=4324)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=4324)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=4325)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=4325)[0m   _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
[2m[36m(pid=4325)[0m   _np_qint16 = np.dtype([("qint16", np.int16, 1)])
[2m[36m(pid=4325)[0m   _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
[2m[36m(pid=4325)[0m   _np_qint32 = np.dtype([("qint32", np.int32, 1)])
[2m[36m(pid=4325)[0m   np_resource = np.dtype([("resource", np.ubyte, 1)])
[2m[36m(pid=4323)[0m   _np_qint8 = np.dtype([("qint8", np.int8, 1)])
[2m[36m(pid=4323)[0

[2m[36m(pid=4323)[0m -------- Fold 0 --------
[2m[36m(pid=4323)[0m Train-set 0 indexes [   0    1    2 ... 1373 1374 1375]
[2m[36m(pid=4323)[0m Test-set 0 indexes [   3   12   19   27   37   47   48   49   51   56   58   60   65   73
[2m[36m(pid=4323)[0m    75   80   81   87   88   90   91   94   98  101  107  108  111  115
[2m[36m(pid=4323)[0m   119  120  126  131  133  135  142  159  163  167  169  177  181  186
[2m[36m(pid=4323)[0m   190  194  195  198  201  204  215  223  231  236  239  248  255  258
[2m[36m(pid=4323)[0m   259  267  268  270  280  283  288  292  298  301  302  303  304  309
[2m[36m(pid=4323)[0m   311  321  325  330  335  341  350  351  368  375  382  386  390  401
[2m[36m(pid=4323)[0m   403  404  408  409  419  422  424  428  432  435  452  478  480  481
[2m[36m(pid=4323)[0m   490  491  494  496  498  503  512  520  521  525  531  534  536  537
[2m[36m(pid=4323)[0m   541  547  549  554  558  559  561  571  585  596  597  602  616  6

[2m[36m(pid=4323)[0m W1117 16:44:29.665950 123145481334784 deprecation.py:323] From /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
[2m[36m(pid=4323)[0m Instructions for updating:
[2m[36m(pid=4323)[0m Use tf.where in 2.0, which has the same broadcast rule as np.where
[2m[36m(pid=4323)[0m W1117 16:44:29.790632 123145481334784 deprecation_wrapper.py:119] From /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tf_metrics/__init__.py:152: The name tf.diag_part is deprecated. Please use tf.linalg.tensor_diag_part instead.
[2m[36m(pid=4323)[0m 
[2m[36m(pid=4323)[0m W1117 16:44:29.811521 123145481334784 deprecation.py:323] From /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tf_metrics/__init__.py:140: to_float 

[2m[36m(pid=4322)[0m W1117 16:44:30.355269 123145464324096 deprecation_wrapper.py:119] From /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tf_metrics/__init__.py:152: The name tf.diag_part is deprecated. Please use tf.linalg.tensor_diag_part instead.
[2m[36m(pid=4322)[0m 
[2m[36m(pid=4322)[0m W1117 16:44:30.368822 123145464324096 deprecation.py:323] From /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tf_metrics/__init__.py:140: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
[2m[36m(pid=4322)[0m Instructions for updating:
[2m[36m(pid=4322)[0m Use `tf.cast` instead.
[2m[36m(pid=4325)[0m -------- Fold 0 --------
[2m[36m(pid=4325)[0m Train-set 0 indexes [   0    1    2 ... 1373 1374 1375]
[2m[36m(pid=4325)[0m Test-set 0 indexes [   3   12   19   27   37   47   48   49   51   56   58   60   65   73
[2m[36m(pid=4325)[0m    75   80   81   87   88   90

[2m[36m(pid=4323)[0m 2019-11-17 16:44:34.966265: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=4323)[0m W1117 16:44:35.394752 123145481334784 deprecation.py:323] From /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/training/monitored_session.py:875: start_queue_runners (from tensorflow.python.training.queue_runner_impl) is deprecated and will be removed in a future version.
[2m[36m(pid=4323)[0m Instructions for updating:
[2m[36m(pid=4323)[0m To construct input pipelines, use the `tf.data` module.
[2m[36m(pid=4322)[0m 2019-11-17 16:44:35.514277: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
[2m[36m(pid=4322)[0m W1117 16:44:35.990619 123145464324096 deprecation.py:323] From /Library/Frameworks/Python.framework/Vers

In [None]:
#print("Best config: ", analysis.get_best_config(metric='f1_macro_mean'))
analysis_df = analysis.dataframe()
#analysis_df['f1_macro_min'] = analysis_df['f1_macro_mean']-analysis_df['f1_macro_std']
#analysis_df['f1_macro_max'] = analysis_df['f1_macro_mean']+analysis_df['f1_macro_std']
analysis_df['config/RESAMPLING_FN'] = analysis_df['config/RESAMPLING_FN'].map(lambda x: x.split('.')[-1][:-2] if x is not None else x)
best_stats = analysis_df.sort_values(['timesteps_total','f1_macro_mean'], ascending=[False,False]).filter(regex='timesteps_total|macro|config|logdir').iloc[:10]
best_stats.style