In [1]:
import numpy as np
import tensorflow as tf

from model import *
from dataset import *

  from ._conv import register_converters as _register_converters


In [2]:
# set up options

options = {}
options['margin'] = 50000
options['lrate'] = 0.05
options['dim'] = 250
options['hp_epochs'] = 200
options['ic_epochs'] = 50
options['batch_size'] = 256
options['dataset'] = 'awa2'
options['num_base_classes'] = 50
options['abs'] = True

In [3]:
# fixing the seed
np.random.seed(12345)
tf.set_random_seed(12345)

In [4]:
# HYPERNYM PREDICTION DATA
hypernyms, name2index = load_hypernym_dataset(options['dataset'], 
                                              options['num_base_classes'])

options['num_all_classes'] = len(name2index)

# one hot encoding for each class with hierarchy 
hypernyms_per_class = get_hypernyms_per_class(
        hypernyms, options['num_base_classes'], options['num_all_classes'])

print('Number of base and parent classes:', len(name2index))
print('Number of hypernym pairs:', len(hypernyms))

Number of base and parent classes: 106
Number of hypernym pairs: 1332


In [5]:
# IMAGE CLASSIFICATION DATA
X, labels = load_data()
X_train, X_val, X_test, y_train, y_val, y_test = split_data(X, labels)

print('Dataset size:', X.shape)
print('Train size:', len(X_train))
print('Validation size:', len(X_val))
print('Test size:', len(X_test))

Dataset size: (37322, 4096)
Train size: 29857
Validation size: 3733
Test size: 3732


In [6]:
# Setting up our Adam optimizer 
optimizer = tf.train.AdamOptimizer(learning_rate=options['lrate'])

In [7]:
# HYPERNYM PREDICTION MODEL
pos_ch = tf.placeholder(tf.int64, shape=[None])
pos_pr = tf.placeholder(tf.int64, shape=[None])
neg_ch = tf.placeholder(tf.int64, shape=[None])
neg_pr = tf.placeholder(tf.int64, shape=[None])

hypernym_model = get_hypernym_model(pos_ch, pos_pr, neg_ch, neg_pr, options)

(h_acc, pos_acc, neg_acc), _, h_loss = hypernym_model

# hypernym prediction model only updates W_c weights
h_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "hyp")    
h_train_op = optimizer.minimize(h_loss, var_list=h_vars)

In [8]:
# IMAGE CLASSIFICATION MODEL
im = tf.placeholder(tf.float64, shape=[None, X.shape[1]])
y = tf.placeholder(tf.int32, shape=[None])

classification_model = get_classification_model(im, y, options)

cls_acc, _, cls_loss, cls_pred = classification_model

# top 10 predictions
flat_hit_pred = get_prediction(im, 10, options) 

# get errors for all classes
cls_all_errors = get_classification_errors_all_classes(im, options) 

# image classification model only updates W_i weights
cls_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "cls")        
cls_train_op = optimizer.minimize(cls_loss, var_list=cls_vars) 

In [9]:
# define our tf session
sess = tf.Session()


# initialize weights
init = tf.global_variables_initializer()
sess.run(init)
sess.run(tf.local_variables_initializer())

In [10]:
# Train the hypernym prediction model

hp_steps = 0

h_p_acc_list = []
h_n_acc_list = []
h_acc_list = []
h_loss_list = []

for epoch in range(1, options['hp_epochs']):
    print("Epoch:", epoch)


    h_p_acc_avg = 0
    h_n_acc_avg = 0
    h_acc_avg = 0
    h_loss_avg = 0

    for batch_idxs in get_batch_idxs(len(hypernyms), options['batch_size']):
        positive = hypernyms[batch_idxs]
        negative = generate_negative_hypernyms(len(positive), options['num_all_classes'])

        #feed to training and get results
        _, curr_loss, curr_p_acc, curr_n_acc, curr_acc = sess.run(
            [h_train_op, h_loss, pos_acc, neg_acc, h_acc], feed_dict= {
                pos_ch: positive[:,0], # children
                pos_pr: positive[:,1], # parents
                neg_ch: negative[:,0],
                neg_pr: negative[:,1]
            })
    
        # update average loss and accuracy
        h_p_acc_avg += curr_p_acc * len(batch_idxs) / float(len(hypernyms))
        h_n_acc_avg += curr_n_acc * len(batch_idxs) / float(len(hypernyms))
        h_acc_avg += curr_acc * len(batch_idxs) / float(len(hypernyms))
        h_loss_avg += curr_loss * len(batch_idxs) / float(len(hypernyms))

        if hp_steps % 10 == 0:
            print("Steps: %05d Currernt loss %f " % (hp_steps, curr_loss))

        hp_steps += 1

    print("Train Positive Accuracy: ", h_p_acc_avg )
    print("Train Negative Accuracy: ", h_n_acc_avg)
    print("Train Accuracy: ", h_acc_avg)
    
    h_p_acc_list.append(h_p_acc_avg)
    h_n_acc_list.append(h_n_acc_avg)
    h_acc_list.append(h_acc_avg)
    h_loss_list.append(h_loss_avg)


Epoch: 1
Steps: 00000 Currernt loss 12799999.041901 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.0
Train Accuracy:  0.5
Epoch: 2
Steps: 00010 Currernt loss 12793704.795062 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.0
Train Accuracy:  0.5
Epoch: 3
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.0
Train Accuracy:  0.5
Epoch: 4
Steps: 00020 Currernt loss 12773693.931111 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.0
Train Accuracy:  0.5
Epoch: 5
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.0
Train Accuracy:  0.5
Epoch: 6
Steps: 00030 Currernt loss 12734652.869457 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.0
Train Accuracy:  0.5
Epoch: 7
Steps: 00040 Currernt loss 12671128.373767 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.0
Train Accuracy:  0.5
Epoch: 8
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.0
Train Accuracy:  0.5
Epoch: 9
Steps: 00050 Currernt loss 12571064.250095 
Train P

Steps: 00370 Currernt loss 3755686.833205 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.5510510510510511
Train Accuracy:  0.7755255255255256
Epoch: 63
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.5457957957957957
Train Accuracy:  0.7728978978978978
Epoch: 64
Steps: 00380 Currernt loss 2734438.414653 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.5630630630630631
Train Accuracy:  0.7815315315315314
Epoch: 65
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.5968468468468469
Train Accuracy:  0.7984234234234235
Epoch: 66
Steps: 00390 Currernt loss 3064749.512524 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.5870870870870871
Train Accuracy:  0.7935435435435435
Epoch: 67
Steps: 00400 Currernt loss 3842985.867972 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.5660660660660661
Train Accuracy:  0.783033033033033
Epoch: 68
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.5900900900900902
Train Accuracy:  0.795045045

Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.7237237237237237
Train Accuracy:  0.8618618618618619
Epoch: 119
Steps: 00710 Currernt loss 2313952.145577 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.7710210210210209
Train Accuracy:  0.8855105105105106
Epoch: 120
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.7462462462462464
Train Accuracy:  0.8731231231231231
Epoch: 121
Steps: 00720 Currernt loss 2023344.570827 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.7657657657657657
Train Accuracy:  0.882882882882883
Epoch: 122
Steps: 00730 Currernt loss 2707063.534070 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.7635135135135136
Train Accuracy:  0.8817567567567567
Epoch: 123
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.7852852852852853
Train Accuracy:  0.8926426426426426
Epoch: 124
Steps: 00740 Currernt loss 2212299.862314 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.7717717717717718
Train Accuracy:  0.885

Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.7867867867867868
Train Accuracy:  0.8933933933933934
Epoch: 176
Steps: 01050 Currernt loss 2074678.126957 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.789039039039039
Train Accuracy:  0.8945195195195195
Epoch: 177
Steps: 01060 Currernt loss 2103957.765470 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.7785285285285286
Train Accuracy:  0.8892642642642642
Epoch: 178
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.8228228228228228
Train Accuracy:  0.9114114114114112
Epoch: 179
Steps: 01070 Currernt loss 1877532.524092 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.7995495495495496
Train Accuracy:  0.8997747747747749
Epoch: 180
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.8093093093093093
Train Accuracy:  0.9046546546546546
Epoch: 181
Steps: 01080 Currernt loss 1907638.403119 
Train Positive Accuracy:  1.0
Train Negative Accuracy:  0.8175675675675675
Train Accuracy:  0.908

In [11]:
# Train the model end evaluate on our validation set
ic_loss_list = []
ic_train_acc_list = []
ic_val_acc_list = []

ic_steps = 0
for epoch in range(1, options['ic_epochs'] + 1):
    print("Epoch:", epoch)
    
    # variables to keep track of our average loss and accuracy
    ic_loss_avg = 0
    train_acc_avg = 0

    # run batched training
    for batch_idxs in get_batch_idxs(len(X_train), options['batch_size']):
        
        _, accur, curr_cls_loss = sess.run(
            [cls_train_op, cls_acc, cls_loss], feed_dict= {
                im: X_train[batch_idxs], 
                y: y_train[batch_idxs]
            })
        
        ic_steps += 1
        
        # update average loss and accuracy
        ic_loss_avg += curr_cls_loss * len(batch_idxs) / float(len(X_train))
        train_acc_avg += accur * len(batch_idxs) / float(len(X_train))

        if ic_steps % 10 == 0:
            print("Steps: %05d Accuracy: %f" % (
                ic_steps, accur))

    print()
    print("Train Accuracy: %f Loss: %f\n" % (
            train_acc_avg, 
            ic_loss_avg
        )
    )

    ic_loss_list.append(ic_loss_avg)
    ic_train_acc_list.append(train_acc_avg)
    
    # variables to keep track of our average val loss and accuracy
    val_acc_avg = 0
    
    # get validation accuracy in batches
    for batch_idxs in get_batch_idxs(len(X_val), options['batch_size']):
        [curr_val_accur] = sess.run(
            [cls_acc], feed_dict={ 
                im: X_val[batch_idxs], 
                y: y_val[batch_idxs]
            }
        )

        val_acc_avg += curr_val_accur * len(batch_idxs) / float(len(X_val))

    print("\nValidation Accuracy: %f \n" % (val_acc_avg))
    
    ic_val_acc_list.append(val_acc_avg)

Epoch: 1
Steps: 00010 Accuracy: 0.644531
Steps: 00020 Accuracy: 0.679688
Steps: 00030 Accuracy: 0.699219
Steps: 00040 Accuracy: 0.675781
Steps: 00050 Accuracy: 0.726562
Steps: 00060 Accuracy: 0.781250
Steps: 00070 Accuracy: 0.734375
Steps: 00080 Accuracy: 0.718750
Steps: 00090 Accuracy: 0.812500
Steps: 00100 Accuracy: 0.816406
Steps: 00110 Accuracy: 0.777344

Train Accuracy: 0.722946 Loss: 140399554.645086


Validation Accuracy: 0.803107 

Epoch: 2
Steps: 00120 Accuracy: 0.781250
Steps: 00130 Accuracy: 0.800781
Steps: 00140 Accuracy: 0.812500
Steps: 00150 Accuracy: 0.828125
Steps: 00160 Accuracy: 0.785156
Steps: 00170 Accuracy: 0.828125
Steps: 00180 Accuracy: 0.839844
Steps: 00190 Accuracy: 0.835938
Steps: 00200 Accuracy: 0.824219
Steps: 00210 Accuracy: 0.781250
Steps: 00220 Accuracy: 0.820312
Steps: 00230 Accuracy: 0.847656

Train Accuracy: 0.815789 Loss: 88252469.236899


Validation Accuracy: 0.834450 

Epoch: 3
Steps: 00240 Accuracy: 0.796875
Steps: 00250 Accuracy: 0.843750
Steps: 0

Steps: 02090 Accuracy: 0.894531
Steps: 02100 Accuracy: 0.941406

Train Accuracy: 0.910842 Loss: 38201629.974572


Validation Accuracy: 0.874096 

Epoch: 19
Steps: 02110 Accuracy: 0.898438
Steps: 02120 Accuracy: 0.886719
Steps: 02130 Accuracy: 0.921875
Steps: 02140 Accuracy: 0.910156
Steps: 02150 Accuracy: 0.894531
Steps: 02160 Accuracy: 0.929688
Steps: 02170 Accuracy: 0.894531
Steps: 02180 Accuracy: 0.910156
Steps: 02190 Accuracy: 0.910156
Steps: 02200 Accuracy: 0.910156
Steps: 02210 Accuracy: 0.921875
Steps: 02220 Accuracy: 0.945312

Train Accuracy: 0.915732 Loss: 37110712.263007


Validation Accuracy: 0.879721 

Epoch: 20
Steps: 02230 Accuracy: 0.921875
Steps: 02240 Accuracy: 0.902344
Steps: 02250 Accuracy: 0.906250
Steps: 02260 Accuracy: 0.941406
Steps: 02270 Accuracy: 0.921875
Steps: 02280 Accuracy: 0.917969
Steps: 02290 Accuracy: 0.898438
Steps: 02300 Accuracy: 0.894531
Steps: 02310 Accuracy: 0.906250
Steps: 02320 Accuracy: 0.929688
Steps: 02330 Accuracy: 0.941406
Steps: 02340 Acc

Steps: 04140 Accuracy: 0.925781
Steps: 04150 Accuracy: 0.937500
Steps: 04160 Accuracy: 0.937500
Steps: 04170 Accuracy: 0.960938
Steps: 04180 Accuracy: 0.957031
Steps: 04190 Accuracy: 0.929688
Steps: 04200 Accuracy: 0.937500
Steps: 04210 Accuracy: 0.917969

Train Accuracy: 0.937335 Loss: 26780453.796290


Validation Accuracy: 0.884275 

Epoch: 37
Steps: 04220 Accuracy: 0.953125
Steps: 04230 Accuracy: 0.929688
Steps: 04240 Accuracy: 0.953125
Steps: 04250 Accuracy: 0.937500
Steps: 04260 Accuracy: 0.941406
Steps: 04270 Accuracy: 0.933594
Steps: 04280 Accuracy: 0.949219
Steps: 04290 Accuracy: 0.953125
Steps: 04300 Accuracy: 0.902344
Steps: 04310 Accuracy: 0.898438
Steps: 04320 Accuracy: 0.941406

Train Accuracy: 0.938909 Loss: 26352083.406085


Validation Accuracy: 0.888294 

Epoch: 38
Steps: 04330 Accuracy: 0.941406
Steps: 04340 Accuracy: 0.945312
Steps: 04350 Accuracy: 0.957031
Steps: 04360 Accuracy: 0.910156
Steps: 04370 Accuracy: 0.941406
Steps: 04380 Accuracy: 0.917969
Steps: 04390 Acc

In [12]:
#Calculate hierarchical recall and precision

index2name = {v:k for k,v in name2index.items()}

ground_truth = []
predicted = []
idxs_order = []

val_errors = []
for batch_idxs in get_batch_idxs(len(X_val), options['batch_size']):
    [val_pred, curr_val_errors] = sess.run(
        [flat_hit_pred, cls_all_errors], feed_dict={ 
            im: X_val[batch_idxs], 
            y: y_val[batch_idxs]
        }
    )

    ground_truth.extend(y_val[batch_idxs])
    predicted.extend(val_pred)

    val_errors.extend(curr_val_errors)

y_val_hier = [hypernyms_per_class[int(l)] for l in ground_truth]

prec = []
recall = []
for idx in range(len(X_val)):
    pred_hier = val_errors[idx] <= min(val_errors[idx][:options['num_base_classes']])
    curr_precision = sum(y_val_hier[idx] * pred_hier) / sum(pred_hier)
    curr_recall = sum(y_val_hier[idx] * pred_hier) / sum(y_val_hier[idx])

    prec.append(curr_precision)
    recall.append(curr_recall)

print("Hierarchial results:\tPrecision: %.2f\tRecall: %.2f" % (np.mean(prec),np.mean(recall)))

Hierarchial results:	Precision: 0.88	Recall: 0.93


In [13]:
# calculate flat hit accuracy
flat_hit_at_1 = 0
flat_hit_at_3 = 0
flat_hit_at_5 = 0
flat_hit_at_10 = 0

for i in range(len(X_val)):
    try:
        found_idx = list(predicted[i]).index(ground_truth[i])
    except:
        found_idx = 9999999

    if found_idx < 1:
        flat_hit_at_1 += 1.0 / len(X_val)
    if found_idx < 3:
        flat_hit_at_3 += 1.0 / len(X_val)
    if found_idx < 5:
        flat_hit_at_5 += 1.0 / len(X_val)
    if found_idx < 10:
        flat_hit_at_10 += 1.0 / len(X_val)


print("Flat hit accuracy:\t@ 1: %.4f\t@ 3: %.4f\t@ 5: %.4f\t@ 10: %.4f" % (
    flat_hit_at_1,
    flat_hit_at_3,
    flat_hit_at_5,
    flat_hit_at_10
))

Flat hit accuracy:	@ 1: 0.8888	@ 3: 0.9657	@ 5: 0.9778	@ 10: 0.9896
