Skip to content

Commit

Permalink
Alright now we may just be able to actually train the similarity mode…
Browse files Browse the repository at this point in the history
…l on a LIGO GPU. I have reworked the making of the binary pair tuples
  • Loading branch information
scottcoughlin2014 committed Oct 30, 2018
1 parent c5b7d32 commit 0e4951d
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 51 deletions.
7 changes: 7 additions & 0 deletions gravityspy/ml/GS_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,13 @@ def inner_siamese_acc(y_true, y_pred):

return inner_siamese_acc

def eucl_dist_output_shape(shapes):
shape1, shape2 = shapes
return (shape1[0], 1)

def contrastive_loss(y_true, y_pred):
margin = 1
return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))

def create_pairs3_gen(data, class_indices, batch_size):
""" Create the pairs
Expand Down
128 changes: 77 additions & 51 deletions gravityspy/ml/train_semantic_index.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import keras.backend as K
K.set_image_data_format("channels_first")
from GS_utils import cosine_distance
from GS_utils import (cosine_distance, siamese_acc, eucl_dist_output_shape,
create_pairs3_gen)
from keras import regularizers
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.layers import Input, Dense, GlobalAveragePooling2D, Lambda
Expand Down Expand Up @@ -87,10 +88,9 @@ def pickle_trainingset(path_to_trainingset,

def make_model(data, model_folder='model',
unknown_classes_labels=['Whistle', 'Scratchy'],
multi_view=False,
multi_view=True,
batch_size=22, nb_epoch=10,
nb_classes=22, fraction_validation=.125, fraction_testing=None,
best_model_based_validset=0, image_size=[140, 170],
nb_classes=22, image_size=[140, 170],
random_seed=1986, verbose=True):
"""Train a Semantic Index.
Expand Down Expand Up @@ -138,6 +138,7 @@ def make_model(data, model_folder='model',
best_model_based_validset (int,optional):
"""
reglularization = 1e-4
logger = log.Logger('Gravity Spy: Training '
'Semantic Index')

Expand All @@ -151,73 +152,99 @@ def make_model(data, model_folder='model',
logger.info('The size of the images being trained {0}'.format(image_size))

known_df = data.loc[~data.Label.isin(unknown_classes_labels)]
known_df = known_df.groupby('Label').apply(lambda x: x.sample(frac=0.1,random_state=random_seed)).reset_index(drop=True).reset_index()
known_df = known_df.sample(frac=1, random_state=random_seed)

logger.info('Given unknown images here is what is to be considered in '
'the known '
'domain of samples which are {0}'.format(knwon_df.Label.unique()))
'domain of samples which are {0}'.format(known_df.Label.unique()))

logger.info('Selecting images to be considered in the unknown '
'domain of samples which are {0}'.format(unknown_classes_labels))
unknown_df = data.loc[data.Label.isin(unknown_classes_labels)]
unknown_df = unknown_df.groupby('Label').apply(lambda x: x.sample(frac=0.1,random_state=random_seed)).reset_index(drop=True).reset_index()
unknown_df = unknown_df.sample(frac=1, random_state=random_seed)

known_x_1 = numpy.vstack(known_df['0.5.png'].values).reshape(
-1, 1, img_rows, img_cols)
unknownn_x_1 = numpy.vstack(unknown_df['0.5.png'].values).reshape(
-1, 1, img_rows, img_cols)
-1, 3, img_rows, img_cols)
unknown_x_1 = numpy.vstack(unknown_df['0.5.png'].values).reshape(
-1, 3, img_rows, img_cols)

known_x_2 = numpy.vstack(known_df['1.0.png'].values).reshape(
-1, 1, img_rows, img_cols)
unknownn_x_2 = numpy.vstack(unknown_df['1.0.png'].values).reshape(
-1, 1, img_rows, img_cols)
-1, 3, img_rows, img_cols)
unknown_x_2 = numpy.vstack(unknown_df['1.0.png'].values).reshape(
-1, 3, img_rows, img_cols)

known_x_3 = numpy.vstack(known_df['2.0.png'].values).reshape(
-1, 1, img_rows, img_cols)
-1, 3, img_rows, img_cols)
unknown_x_3 = numpy.vstack(unknown_df['2.0.png'].values).reshape(
-1, 1, img_rows, img_cols)
-1, 3, img_rows, img_cols)

known_x_4 = numpy.vstack(known_df['4.0.png'].values).reshape(
-1, 1, img_rows, img_cols)
-1, 3, img_rows, img_cols)
unknown_x_4 = numpy.vstack(unknown_df['4.0.png'].values).reshape(
-1, 1, img_rows, img_cols)

test_set_unlabelled_x_1 = image_data.filter(regex=("1.0.png")).iloc[0].iloc[0]
test_set_unlabelled_x_2 = image_data.filter(regex=("2.0.png")).iloc[0].iloc[0]
test_set_unlabelled_x_3 = image_data.filter(regex=("4.0.png")).iloc[0].iloc[0]
test_set_unlabelled_x_4 = image_data.filter(regex=("0.5.png")).iloc[0].iloc[0]
test_set_unlabelled_x_1 = numpy.concatenate((test_set_unlabelled_x_1[0].reshape(-1, 1, img_rows, img_cols),
test_set_unlabelled_x_1[1].reshape(-1, 1, img_rows, img_cols),
test_set_unlabelled_x_1[2].reshape(-1, 1, img_rows, img_cols)),
axis=1)
test_set_unlabelled_x_2 = numpy.concatenate((test_set_unlabelled_x_2[0].reshape(-1, 1, img_rows, img_cols),
test_set_unlabelled_x_2[1].reshape(-1, 1, img_rows, img_cols),
test_set_unlabelled_x_2[2].reshape(-1, 1, img_rows, img_cols)),
axis=1)
test_set_unlabelled_x_3 = numpy.concatenate((test_set_unlabelled_x_3[0].reshape(-1, 1, img_rows, img_cols),
test_set_unlabelled_x_3[1].reshape(-1, 1, img_rows, img_cols),
test_set_unlabelled_x_3[2].reshape(-1, 1, img_rows, img_cols)),
axis=1)
test_set_unlabelled_x_4 = numpy.concatenate((test_set_unlabelled_x_4[0].reshape(-1, 1, img_rows, img_cols),
test_set_unlabelled_x_4[1].reshape(-1, 1, img_rows, img_cols),
test_set_unlabelled_x_4[2].reshape(-1, 1, img_rows, img_cols)),
axis=1)
-1, 3, img_rows, img_cols)

if multi_view:
known_classes = concatenate_views(known_set_x_1, known_set_x_2,
known_set_x_3, known_set_x_4, [img_rows, img_cols], True)
unknown_classes = concatenate_views(unknown_set_x_1, unknown_set_x_2,
unknown_set_x_3, unknown_set_x_4, [img_rows, img_cols], True)
known_classes = concatenate_views(known_x_1, known_x_2,
known_x_3, known_x_4, [img_rows, img_cols], True)
unknown_classes = concatenate_views(unknown_x_1, unknown_x_2,
unknown_x_3, unknown_x_4, [img_rows, img_cols], True)
else:
# We are only using one duration for the similarity search
known_classes_image_array = known_set_x_2
unknown_classes_image_array = unknown_set_x_2

# Generate the Binary pairs for training.
train_generator = create_pairs3_gen(known_classes, known_classes_indices_for_metric_learning, size_of_batch)
valid_generator = create_pairs3_gen(unknown_classes, unknown_classes_indices_for_clustering, size_of_batch)
known_classes = known_x_2
unknown_classes = unknown_x_2

# create binary pairs
all_pairs_known = numpy.array(list(combinations(known_df['index'],2)))
same_pairs_known = numpy.concatenate(known_df.groupby('Label')['index'].apply(lambda x : list(combinations(x.values,2))))
s_pairs_known = set(map(tuple, same_pairs_known))
a_pairs_known = set(map(tuple, all_pairs_known))
diff_pairs_known = numpy.array(list(a_pairs_known-s_pairs_known))

tmp1 = numpy.ones((same_pairs_known.shape[0], 3))
tmp1[:,:-1] = same_pairs_known
tmp2 = numpy.zeros((diff_pairs_known.shape[0], 3))
tmp2[:,:-1] = diff_pairs_known
pair_labels_known = numpy.vstack([tmp1, tmp2])

all_pairs_unknown = numpy.array(list(combinations(unknown_df['index'],2)))
same_pairs_unknown = numpy.concatenate(unknown_df.groupby('Label')['index'].apply(lambda x : list(combinations(x.values,2))))
s_pairs_unknown = set(map(tuple, same_pairs_unknown))
a_pairs_unknown = set(map(tuple, all_pairs_unknown))
diff_pairs_unknown = numpy.array(list(a_pairs_unknown-s_pairs_unknown))

tmp1 = numpy.ones((same_pairs_unknown.shape[0], 3))
tmp1[:,:-1] = same_pairs_unknown
tmp2 = numpy.zeros((diff_pairs_unknown.shape[0], 3))
tmp2[:,:-1] = diff_pairs_unknown
pair_labels_unknown = numpy.vstack([tmp1, tmp2])

def generate_pairs(image_data, pair_labels, batch_size):
pairs1 = []
pairs2 = []
labels = []
counter = 0
while True:
for pair in pair_labels:
counter += 1
pairs1.append(known_classes[pair[0]])
pairs2.append(known_classes[pair[1]])
labels.append(pair[2])
if counter == batch_size:
#yield np.array(pairs), np.array(labels)
yield [numpy.asarray(pairs1, np.float32), numpy.asarray(pairs2, np.float32)],numpy.asarray(labels, np.int32)
counter = 0
pairs1 = []
pairs2 = []
labels = []

train_generator = generate_pairs(known_classes, pair_labels_known, batch_size)
valid_generator = generate_pairs(unknown_classes, pair_labels_unknown, batch_size)

# Create the model
vgg16 = VGG16(weights='imagenet', include_top=False,
input_shape=data_x_1.shape[1:])
input_shape=known_classes.shape[1:])
x = vgg16.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
Expand Down Expand Up @@ -265,15 +292,14 @@ def make_model(data, model_folder='model',

# train
logger.info('training the model ...')
out_file.write('training the model ...' + '\n')

train_negative_factor = 1
test_negative_factor = 1
# the samples from data_x_2 should be separated for test and valid in future
# the samples from unknown_classes should be separated for test and valid in future

train_batch_num = (len(data_x_1) * (train_negative_factor + 1)) / size_of_batch
train_batch_num = (len(known_classes) * (train_negative_factor + 1)) / batch_size
logger.info('train batch num {0}'.format(train_batch_num))
valid_batch_num = (len(data_x_2) * (test_negative_factor + 1)) / size_of_batch
valid_batch_num = (len(unknown_classes) * (test_negative_factor + 1)) / batch_size

similarity_model.fit_generator(train_generator, validation_data=valid_generator, verbose=2,
steps_per_epoch=train_batch_num, validation_steps=valid_batch_num, epochs=nb_epoch)
Expand Down

0 comments on commit 0e4951d

Please sign in to comment.