In [1]:
import tensorflow as tf
import numpy as np
from tensorflow_addons.losses import metric_learning
import tensorflow_probability as tfp
from tensorflow.keras import layers, models, callbacks
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras import backend as K
import pickle
import os
tf.random.set_seed(1234)

gpu_id = 1
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
# Restrict TensorFlow to only use the first GPU
    try:
        tf.config.experimental.set_visible_devices(gpus[gpu_id], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)

def _pairwise_distances(feature_A, feature_B=None, squared=False):
    """
    Directly from https://www.tensorflow.org/api_docs/python/tf/contrib/losses/metric_learning/triplet_semihard_loss
    Computes the pairwise distance matrix with numerical stability.
    output[i, j] = || feature[i, :] - feature[j, :] ||_2
    Args:
      feature_A: 2-D Tensor of size [number of data A, feature dimension].
      feature_B: 2-D Tensor of size [number of data B, feature dimension].
      squared: Boolean, whether or not to square the pairwise distances.
    Returns:
      pairwise_distances: 2-D Tensor of size [number of data A, number of data B].
    """
    if feature_B is None:
        feature_B = feature_A

    pairwise_distances_squared = tf.add(
        tf.reduce_sum(tf.square(feature_A), axis=[1], keepdims=True),
        tf.reduce_sum(tf.square(tf.transpose(feature_B)), axis=[0], keepdims=True),
    ) - 2.0 * tf.linalg.matmul(feature_A, tf.transpose(feature_B))

    # Deal with numerical inaccuracies. Set small negatives to zero.
    pairwise_distances_squared = tf.maximum(pairwise_distances_squared, 0.0)
    # Get the mask where the zero distances are at.
    error_mask = tf.less_equal(pairwise_distances_squared, 0.0)

    # Optionally take the sqrt.
    if squared:
        pairwise_distances = pairwise_distances_squared
    else:
        pairwise_distances = tf.sqrt(
            pairwise_distances_squared + tf.cast(error_mask, tf.float32) * 1e-16
        )

    # Undo conditionally adding 1e-16.
    pairwise_distances = tf.multiply(
        pairwise_distances, tf.cast(tf.logical_not(error_mask), tf.float32)
    )

    if feature_B is None:
        num_data = tf.shape(feature_A)[0]
        # Explicitly set diagonals to zero.
        mask_offdiagonals = tf.ones_like(pairwise_distances) - tf.linalg.diag(
            tf.ones([num_data])
        )
        pairwise_distances = tf.multiply(pairwise_distances, mask_offdiagonals)

    return pairwise_distances
def get_consistency_distinction_loss(labels,embeddings):
    epsilon = 1e-7
    lshape = tf.shape(labels)
    labels = tf.reshape(labels, [lshape[0], 1])
    clusters_labels, _, num_embeddings_per_cluster = tf.unique_with_counts(
        tf.reshape(labels, [lshape[0]])
        )    
    num_clusters = tf.size(clusters_labels)
    adjacency = tf.equal(
        labels, tf.transpose(clusters_labels)
    )  
    centroids = tf.linalg.matmul(
        tf.cast(adjacency, dtype=tf.float32), embeddings, transpose_a=True
    )
    centroids = tf.divide(
        centroids,
        tf.expand_dims(tf.cast(num_embeddings_per_cluster, dtype=tf.float32), axis=1),
    )
    pairwise_distances_distinction_first = _pairwise_distances(
        feature_A=embeddings, feature_B=centroids, squared=True
    )
    pairwise_distances_distinction_first = pairwise_distances_distinction_first/tf.reshape(tf.reduce_max(pairwise_distances_distinction_first,axis=1),[lshape[0],1])
    adjacency_not = tf.logical_not(adjacency)
    pairwise_distances_distinction = tf.where(tf.cast(adjacency,tf.float32)==1.0,tf.reduce_max(pairwise_distances_distinction_first),pairwise_distances_distinction_first)
    minimum_distance_to_other_cluster = tf.reduce_min(pairwise_distances_distinction,axis=1)
    distinction_loss = tf.reduce_mean(minimum_distance_to_other_cluster)
    mean_intra_class_distance = tf.reduce_mean(tf.boolean_mask(pairwise_distances_distinction_first,adjacency))
    mean_inter_class_distance = tf.reduce_mean(tf.boolean_mask(pairwise_distances_distinction_first,tf.logical_not(adjacency)))
    alpha = mean_intra_class_distance/ (mean_inter_class_distance+epsilon)
    
    
    ## If mean is used as aggregation
    pairwise_distances_distinction_first = tf.multiply(pairwise_distances_distinction_first, tf.cast(adjacency,tf.float32))
    mean_distance_same_class = tf.reduce_max(pairwise_distances_distinction_first,axis=1)
    consistency_loss = tf.reduce_mean(mean_distance_same_class)
    
    
    ## If percentile is used as aggregation
    # pairwise_distances_distinction_first = tf.multiply(pairwise_distances_distinction_first, tf.cast(adjacency,tf.float32))
    # mean_distance_same_class = tf.reduce_max(pairwise_distances_distinction_first,axis=1)
    # loss = tf.constant(0,dtype=tf.float32)
    # for label in clusters_labels:
    #     percentile_95 = tfp.stats.percentile(tf.where(labels==label,mean_distance_same_class,tf.reduce_max(mean_distance_same_class)),95)
    #     loss+=percentile_95
    # consistency_loss = loss/tf.cast(tf.size(clusters_labels),tf.float32)
    
    return  (1+alpha)*consistency_loss - distinction_loss

TensorFlow Addons offers no support for the nightly versions of TensorFlow. Some things might work, some other might not. 
If you encounter a bug, do not file an issue on GitHub.


4 Physical GPUs, 1 Logical GPU


In [2]:
# embeddings = tf.convert_to_tensor(np.random.randn(10,100),dtype=tf.float32)
# labels = tf.convert_to_tensor(np.random.randint(0,4,10),dtype=tf.float32)
# mask_for_equal = tf.math.equal(labels,tf.transpose(labels))
# pairwise_distances = _pairwise_distances(embeddings,squared=True)
# pairwise_distances = pairwise_distances/tf.reshape(tf.reduce_max(pairwise_distances,axis=1),[lshape[0],1])    
# pairwise_distance_for_consistency  = tf.multiply(pairwise_distances, tf.cast(mask_for_equal,tf.float32))
# counts_same_class = tf.reduce_sum(tf.cast(mask_for_equal,tf.float32),axis=1)
# total_distance_same_class = tf.reduce_sum(pairwise_distance_for_consistency,axis=1)
# mean_distance_same_class = total_distance_same_class/(counts_same_class-1+epsilon)


In [5]:

def get_trained_model(X_train,y_train,n_timesteps,n_channels,window_size,filepath):
    n_classes = len(np.unique(y_train))
    model =  get_model(input_shape=(n_timesteps,n_channels),n_classes=n_classes)
    checkpoint = callbacks.ModelCheckpoint(filepath, monitor='val_final_acc', verbose=1, save_best_only=True, mode='max',save_weights_only=False)
    es = callbacks.EarlyStopping(monitor='val_final_acc', mode='max', verbose=0,patience=40)
    callbacks_list = [es,checkpoint]
    train_x,val_x,train_y,val_y = train_test_split(X_train,y_train,test_size=.2,stratify=y_train)
    history = model.fit(train_x,[train_y,train_y],validation_data=(val_x,[val_y,val_y]), epochs=2000, batch_size=500,verbose=1,callbacks=callbacks_list,shuffle=True)
    model.load_weights(filepath)
    val_y_pred = model.predict(val_x)
    if len(val_y_pred)<val_x.shape[0]:
        val_y_pred = val_y_pred[0]
    print('validation accuracy',accuracy_score(val_y,val_y_pred.argmax(axis=1)),end=',')
    return model
import tensorflow_addons as tfa
def get_model(input_shape=(400,3),n_classes=1):
    input_ = layers.Input(shape=input_shape)
    x = layers.Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same')(input_)
    x = layers.MaxPooling1D(2)(x)
    # x = layers.BatchNormalization()(x)
    x = layers.Conv1D(128,2,activation='relu',kernel_initializer='normal',padding='same')(x)
    # x = layers.MaxPooling1D(2)(x)
    x = layers.BatchNormalization()(x)
    # x = layers.Activation('tanh')(x)
    x = layers.Dropout(.2)(x)
    x = layers.GRU(128,return_sequences=False,activation='tanh')(x)
    x = layers.Flatten()(x)
    x = layers.Dense(350,activation='relu')(x)
    x = layers.Dense(n_classes,activation='relu',name='feature_before')(x)
    y2 = layers.Lambda(lambda a:K.l2_normalize(a,axis=1),name='feature')(x)
    y3 = layers.Dense(n_classes,activation='relu',name='final_before')(y2)
    y1  =layers.Activation(activation='softmax',name='final')(y3)
    model = models.Model(input_,[y1,y2])
    model.compile(loss={'final':tf.keras.losses.SparseCategoricalCrossentropy(),
                        'feature':get_consistency_distinction_loss},
                  loss_weights = {'final':5,'feature':1},
                  optimizer='adam',
                  metrics={'final':['acc']})
    return model
def get_X_y_dict(training_data,user_dict = None):
    if user_dict is None:
        user_dict = {a:i for i,a in enumerate(training_data['user'].unique())}
    training_data['label'] = training_data['user'].apply(lambda a:user_dict[a])
    X = np.concatenate(list(training_data['final_data']))
    y = np.array(training_data['label'].values)
    return X,y,user_dict
activity_label = 'Walking'
window_size = 20
base_directory = './data/mORAL_dataset_for_python_upload_09072020/'
training_data = pickle.load(open(os.path.join(base_directory,'processed_data',activity_label,'train.p'),'rb')).sort_values('timestamp').reset_index(drop=True)
testing_data = pickle.load(open(os.path.join(base_directory,'processed_data',activity_label,'test.p'),'rb')).sort_values('timestamp').reset_index(drop=True)
if not os.path.isdir(os.path.join(base_directory,'results',activity_label)):
    os.makedirs(os.path.join(base_directory,'results',activity_label))
result_directory = os.path.join(base_directory,'results',activity_label)
model_directory = os.path.join(result_directory,'activity_{}_window_size_new_v4_{}.h5'.format(activity_label,window_size))
X_train,y_train,user_dict = get_X_y_dict(training_data)
X_test,y_test,user_dict =  get_X_y_dict(testing_data,user_dict)
trained_model = get_trained_model(X_train,y_train,n_timesteps=X_train.shape[1],n_channels=X_train.shape[-1],window_size=window_size,filepath=model_directory)
y_pred_test = trained_model.predict(X_test)
testing_data['embedding'] = list(y_pred_test[0])
testing_data['prediction'] = list(y_pred_test[0].argmax(axis=1))
print(accuracy_score(testing_data['label'],testing_data['prediction']))
pickle.dump(testing_data,open(os.path.join(result_directory,'activity_{}_window_size_new_v4_{}.p'.format(activity_label,window_size)),'wb'))

Epoch 1/2000

Epoch 00001: val_final_acc improved from -inf to 0.10526, saving model to ./data/mORAL_dataset_for_python_upload_09072020/results/Walking/activity_Walking_window_size_new_v4_20.h5
Epoch 2/2000

Epoch 00002: val_final_acc did not improve from 0.10526
Epoch 3/2000

Epoch 00003: val_final_acc did not improve from 0.10526
Epoch 4/2000

Epoch 00004: val_final_acc improved from 0.10526 to 0.11393, saving model to ./data/mORAL_dataset_for_python_upload_09072020/results/Walking/activity_Walking_window_size_new_v4_20.h5
Epoch 5/2000

Epoch 00005: val_final_acc did not improve from 0.11393
Epoch 6/2000

Epoch 00006: val_final_acc did not improve from 0.11393
Epoch 7/2000

Epoch 00007: val_final_acc improved from 0.11393 to 0.11585, saving model to ./data/mORAL_dataset_for_python_upload_09072020/results/Walking/activity_Walking_window_size_new_v4_20.h5
Epoch 8/2000

Epoch 00008: val_final_acc improved from 0.11585 to 0.13960, saving model to ./data/mORAL_dataset_for_python_upload_09

In [None]:
accuracy_score(testing_data['label'],testing_data['prediction'])