This Jupyter Notebook performs the Simulation Experiments of Section III-C

In [None]:
%load_ext autoreload
%autoreload 2

from copy import copy
from nltk.corpus import wordnet as wn
import matplotlib.pyplot as plt
import anytree as at
import sys
import random
import numpy as np
from scipy.stats import beta
import pandas as pd
from matplotlib2tikz import save as tikz_save
import datetime
import time

from caal import attributelearning as al

Creation of the Entity-Category Tree from AwA2 and WordNet.

(Code assumes you're running this notebook from the notebooks folder)

In [None]:
entities = list()
entities_id = list()
dataset_name = 'AwA2'

with open('../dataset/'+ dataset_name + '/classes_wn.txt', 'r') as f:
      for line in f:
            entity = line.split()[1].replace('+','_')
            entity_id = int(line.split()[0]) - 1
            entities.append(entity)
            entities_id.append(entity_id)

In [None]:
ct = al.CategoryTree('mammal.n.01', similarity_tree_gamma=0.7)
ct.add_leaves(entities)
ct.simplify_tree()

In [None]:
attributes = list()
attributes_id = list()
with open('../dataset/'+ dataset_name + '/predicates.txt', 'r') as f:
      for line in f:
            attribute = line.split()[1]
            attribute_id = int(line.split()[0]) - 1
            attributes.append(attribute)
            attributes_id.append(attribute_id)
full_table = np.loadtxt(open('../dataset/'+ dataset_name + '/predicate-matrix-binary.txt', 'r'))
binary_table = full_table[np.asarray(entities_id, dtype=int), :]
binary_table = binary_table[:, np.asarray(attributes_id, dtype=int)]

In [None]:
print(at.RenderTree(ct.root, style=at.ContStyle()).by_attr("description"))

In [None]:
w = at.Walker()
paths = list()
for entity in entities:
    for other in entities:
        if entity is not other:
            paths.append(w.walk(ct.node_dictionary[ct.leaves_to_wn[entity]],ct.node_dictionary[ct.leaves_to_wn[other]]))
            path = paths[-1]
            # print("distance {}-{}: {}".format(entity, other, (len(path[0]) + len(path[2]))))
distances = [(len(path[0]) + len(path[2])) for path in paths]

min_distance = min(distances)
max_distance = max(distances)
print('min distance between entities: {}'.format(min_distance))
print('max distance between entities: {}'.format(max_distance))

*Testing the method assumption*: entities in same categories share attributes values

In [None]:
hypo_goodness = dict()
categories = list()

for attribute_id, attribute in enumerate(attributes):
    nodes = [node for node in at.LevelOrderIter(ct.root)]
    for node in nodes:
        if not node.is_leaf: # if we are not in a leaf, we must be in a category
            if node.name not in categories:
                categories.append(node.name)
            pos = 0
            neg = 0
            descendent = node.descendants
            for desc in descendent:
                if desc.is_leaf:
                    if binary_table[entities.index(desc.wn_synset.lemma_names()[0].lower()),attribute_id]:
                        pos += 1
                    else:
                        neg += 1
            hypo_goodness[attribute, node.name,'pos'] = pos
            hypo_goodness[attribute, node.name,'neg'] = neg
            pos = pos/(neg+pos)
            hypo_goodness[attribute, node.name,'var'] = pos*(1-pos)
            # hypo_goodness[attribute, node.name,'ent'] = - pos*np.log(pos) - (1-pos)*np.log(1- pos)
            
hypo_goodness_mat = np.zeros([len(attributes),len(ct.category_dictionary.items())])

attributes_index_dataframe = dict()

for category_id, category in enumerate(categories):
    for attribute_id, attribute in enumerate(attributes):
        hypo_goodness_mat[attribute_id, category_id] = hypo_goodness[attribute, category,'var']
        attributes_index_dataframe[attribute_id] = attribute

In [None]:
import seaborn as sns

cm = sns.light_palette("green", as_cmap=True)

df = pd.DataFrame(hypo_goodness_mat)
df.columns = categories

df.rename(index=attributes_index_dataframe,inplace=True)
df.round(2)
df.style.background_gradient(cmap=cm)
df

In [None]:
mean_response_per_attribute = np.mean(binary_table,axis=0)
mean_response_per_attribute

Let's simulate 4 Active Learners on the entire dataset:
- Random
- Learner C: greedy lowest entropy (closest to the decision boundary)
- Learner M: query similarity (closest to previous query)
- Learner H: hybrid greedy-similarity (phi at 0.8)

In [None]:
"""
Compute Query Similarity (Eq. 6)
"""
def query_similarity(ct, current_index, previous_index, entity_list):
    gamma = 0.7
    similarity = np.exp(-gamma*ct.entities_distance(current_index,previous_index))
    return similarity

In [None]:
## GENERAL PARAMETERS 
attribute_index = 0 # starting from which attribute
attribute_number = len(attributes) # number of attributes to learn
trials = 5 # number of run for each learner and attribute
hard_performance_threshold = 0.49 # decides when to make the hard decision

entropies = dict() # analysing entropy values for the greedy learner

performances = dict()
hard_performances = dict()
query_similarities = dict()
learners = ['random','similar','greedy','hybrid']

for learner in learners:
    performances[learner] = dict()
    hard_performances[learner] = dict()
    query_similarities[learner] = dict()
    
    print('======================== Learning with {} learner'.format(learner))
    
    for attribute in range(attribute_index,attribute_index + attribute_number):
        ct.reset_learning()
        
        shuffled_entities = copy(entities)
        perf = -1*np.ones([len(entities),trials]) # performace over time
        hard_perf = np.zeros([len(entities),trials]) # hard performance over time
        qsim = np.zeros([len(entities),trials]) # similarity over time
        learned = -1*np.ones([len(entities),trials]) # vector of the learned attributes, ordered as AwA2
        entr = list()
        
        print('============ Learning about {}:{} with {}'.format(attribute,attributes[attribute], learner))
        
        for trial in range(0, trials):
            print('{}: run {}'.format(learner,trial + 1))
            previous_index = None
            ct.reset_learning()
            shuffled_entities = copy(entities)
            random.shuffle(shuffled_entities)

            while len(shuffled_entities) != 0:
                # select question
                if learner is 'ere':     
                    awa_index = ct.select_ere_query(shuffled_entities)
                elif learner is 'greedy':
                    awa_index, eee = ct.select_greedy_query(shuffled_entities)
                    entr.append(eee)
                elif learner is 'similar':
                    awa_index, ddd = ct.select_closest_query(shuffled_entities, previous_index)
                elif learner is 'hybrid':
                    awa_index, sss = ct.select_hybrid_query(shuffled_entities, previous_index,min_distance,max_distance,phi=0.8)
                elif learner is 'random':
                    awa_index = ct.leaves.index(shuffled_entities[-1])
                    shuffled_entities.pop()
                else:
                    print('ERROR: unknown learner')
                    break

                # compute the question similarity
                if previous_index is None:
                    previous_index = awa_index
                else:
                    qsim[len(entities) - len(shuffled_entities) - 1, trial] = query_similarity(ct, awa_index, previous_index, entities)
                    previous_index = awa_index

                # ask question
                print('Asking about {} and {}'.format(entities[awa_index], attributes[attribute]))
                learned[awa_index,trial] = binary_table[awa_index, attribute]

                # update backward the tree
                ct.node_dictionary[ct.leaves_to_wn[entities[awa_index]]].push_information(learned[awa_index, trial] == 1)

                # compute the current prediction power
                predicted = np.array(learned[:,trial], copy=True)
                hard_predicted = np.array(learned[:,trial], copy=True)
                for i in range(0,len(predicted)):
                    # if I don't know for sure, I predict
                    if predicted[i]==-1:
                        theta = ct.node_dictionary[ct.leaves_to_wn[entities[i]]].theta
                        predicted[i] = theta if binary_table[i, attribute] == 1 else (1 - theta)
                        if theta > 1 - hard_performance_threshold or theta < hard_performance_threshold:
                            if theta > 1 - hard_performance_threshold:
                                hard_predicted[i] = 1 if binary_table[i, attribute] == 1 else 0
                            else:
                                hard_predicted[i] = 1 if binary_table[i, attribute] == 0 else 0
                        else:
                            hard_predicted[i] = 0
                    else:
                        predicted[i] = 1
                        hard_predicted[i] = 1
                perf[len(entities) - len(shuffled_entities) - 1, trial] = np.sum(predicted)
                hard_perf[len(entities) - len(shuffled_entities) - 1, trial] = np.sum(hard_predicted)

        performances[learner][attribute] = perf
        hard_performances[learner][attribute] = hard_perf
        query_similarities[learner][attribute] = qsim
        
        if learner is 'greedy':
            entropies[attribute] = entr

In [None]:
"""
Helper fuctions for plotting
"""

def compute_statistics_on_performance(performance, many_trial=False):
    matrix = None
    for key, value in performance.items():
            if not many_trial:
                if matrix is None:
                    matrix = value
                else:
                    matrix = np.hstack((matrix,value))
            else:
                if matrix is None:
                    matrix = np.average(value, axis=1).reshape((-1,1))
                else:
                    matrix = np.hstack((matrix,np.average(value, axis=1).reshape((-1,1))))
    avg = np.average(matrix,axis=1)
    std = np.std(matrix,axis=1)
    return avg, std

def compute_similarity_score(similarity, many_trial=False, floor=None):
    avg = list()
    std = list()
    for key, value in similarity.items():
        if not many_trial:
            if floor is None:
                avg.append(np.sum(value))
            else:
                avg.append(np.sum(value - floor))
        else:
            if floor is None:
                avg.append(np.average(np.sum(value, axis=0)))
                std.append(np.std(np.sum(value, axis=0)))
            else:
                avg.append(np.average(np.sum(value, axis=0) - floor))
                std.append(np.std(np.sum(value, axis=0) - floor))
    return np.array(avg), np.array(std)

def compute_statistics_per_attribute(measure, remove_floor = 0):
    avg = list()
    std = list()
    for key, value in measure.items():
        temp = np.sum(value - remove_floor, axis=0) 
        avg.append(np.average(temp))
        std.append(np.std(temp))
    return np.array(avg), np.array(std)

def plot_performance(avg, std, floor, plt, c, label_choice, plot_standard_deviation=True):
    if floor is None:
        floor = np.zeros(np.shape(avg))
    plt.plot(range(1,len(avg)+1), avg - floor, color=c, label=label_choice)
    if plot_standard_deviation:
        y1 = avg + std - floor
        y2 = avg - std - floor
        # plt.plot(range(1,len(avg)+1), y1, color=c,linestyle=':')
        # plt.plot(range(1,len(avg)+1), y2, color=c,linestyle=':')
        plt.fill_between(range(1,len(avg)+1), y1, y2, where=y2 <= y1, facecolor=c, alpha=0.1,interpolate=True)
        
def compute_mean_and_std(measure, remove_floor = 0):
    all_values = list()
    for key, value in measure.items():
        temp = np.sum(value - remove_floor, axis=0) 
        all_values.append(temp)
    return np.mean(all_values), np.std(all_values)

A couple of plots done to visualize the performance of differente learners

In [None]:
plt.figure(figsize=(16,24))
plt.subplot(5, 1, 1)

"""
PERFORMANCE PLOT
"""
colors = {'ere':'r','similar':'g','greedy':'r','random':'m', 'hybrid':'b'}
if 'random' not in learners:
    learners.append('random')
plot_std = False

avg_performances = dict()
std_performances = dict()

for learner in learners:
        avg_performances[learner], std_performances[learner] = compute_statistics_on_performance(\
            performances[learner], many_trial=True)

flat_performance = np.linspace(1 + (len(entities) - 1)/2, len(entities), num=len(entities))

for learner in learners:
    plot_performance(avg_performances[learner],std_performances[learner],flat_performance,\
                     plt,colors[learner],learner, plot_standard_deviation=plot_std)

plt.plot(range(0,len(avg_performances[learner])), flat_performance - flat_performance, label='flat',\
         color='k', linestyle='--')
plt.title('performance - improvement wrt flat performance')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

plt.subplot(5, 1, 2)
avg_hard_performances = dict()
std_hard_performances = dict()

for learner in learners:
        avg_hard_performances[learner], std_hard_performances[learner] = compute_statistics_on_performance(\
            hard_performances[learner], many_trial=True)

flat_hard_performance = np.linspace(1 + (len(entities) - 1)/2, len(entities), num=len(entities))

for learner in learners:
    plot_performance(avg_hard_performances[learner],std_hard_performances[learner],flat_hard_performance,plt,\
                     colors[learner],learner, plot_standard_deviation=plot_std)

plt.plot(range(0,len(avg_hard_performances[learner])), flat_hard_performance - flat_hard_performance,\
         label='flat',color='k', linestyle='--')

plt.title('hard performance - improvement wrt flat performance')

"""
PERFORMANCE FOR EACH ATTRIBUTE
"""
ax = plt.subplot(5,1,3)
performance_per_attribute = dict()
std_performance_per_attribute = dict()

for learner in learners:
        performance_per_attribute[learner], std_performance_per_attribute[learner] =\
        compute_statistics_per_attribute(hard_performances[learner])

for learner in learners:
    plot_performance(performance_per_attribute[learner], std_performance_per_attribute[learner], None, plt,\
                     colors[learner],learner, plot_standard_deviation=plot_std)

plt.title('hard performance for each attribute - integral of the curve over time')
plt.xlabel('attribute')

major_ticks = np.arange(0, len(attributes), 1)

ax.set_xticks(major_ticks)
ax.grid()

"""
QUERY SIMILARITY PLOT
"""
avg_query_similarities = dict()
std_query_similarities = dict()

for learner in learners:
    avg_query_similarities[learner], std_query_similarities[learner] =\
    compute_statistics_on_performance(query_similarities[learner], many_trial=True)

plt.subplot(5, 1, 4)

for learner in learners:
    plot_performance(avg_query_similarities[learner],std_query_similarities[learner],\
                     np.zeros(np.shape(avg_query_similarities[learner])),plt,colors[learner],learner,\
                     plot_standard_deviation=plot_std)

plt.title('query similarity over time')
plt.xlabel('number of queries')

"""
QUERY SIMILARITY for EACH ATTRIBUTE
"""
ax = plt.subplot(5, 1, 5)
similarity_score = dict()
std_score = dict()

for learner in learners:
    similarity_score[learner], std_score[learner] = compute_statistics_per_attribute\
    (query_similarities[learner])

for learner in learners:
    plot_performance(similarity_score[learner], std_score[learner], None, plt, colors[learner],learner,\
                     plot_standard_deviation=plot_std)

plt.title('sum of query similarity')
plt.xlabel('attribute')
major_ticks = np.arange(0, len(attributes), 1)

ax.set_xticks(major_ticks)
ax.grid()

Plot behind Figure 2

In [None]:
plt.style.use('seaborn-notebook')
plt.figure(figsize=(12,8))
avg_hard_performances = dict()
std_hard_performances = dict()

color_tuned = {'similar':'xkcd:jungle green','greedy':'xkcd:red orange','random':'xkcd:medium purple', 'hybrid':'xkcd:medium blue'}

for learner in learners:
        avg_hard_performances[learner], std_hard_performances[learner] = compute_statistics_on_performance(hard_performances[learner])

flat_hard_performance = np.linspace(1 + (len(entities) - 1)/2, len(entities), num=len(entities))

for learner in ['similar', 'greedy', 'hybrid']:
    plot_performance(avg_hard_performances[learner],std_hard_performances[learner],flat_hard_performance,plt,color_tuned[learner],learner, plot_standard_deviation=plot_std)

plt.plot(range(0,len(avg_hard_performances[learner])), flat_hard_performance - flat_hard_performance, label='flat',color='xkcd:steel grey')

plt.legend(bbox_to_anchor=(.8, 1), loc=2, borderaxespad=0.)
plt.xlabel('Query number')
plt.ylabel('Correct predictions')

ts = time.time()
giorno = datetime.datetime.fromtimestamp(ts).strftime('%m%d_%H%M')
tikz_save('plots/performance_{}.tikz'.format(giorno))

Plot the Cumulative Query Similarity score $\mathcal{S}$

In [None]:
plt.figure(figsize=(12,6))
similarity_score = dict()
std_score = dict()

color_tuned = {'similar':'xkcd:jungle green','greedy':'xkcd:red orange','random':'xkcd:steel grey', 'hybrid':'xkcd:medium blue'}

for learner in learners:
    similarity_score[learner], std_score[learner] = compute_statistics_per_attribute\
    (query_similarities[learner])

for learner in learners:
    plot_performance(similarity_score[learner], std_score[learner], None, plt, color_tuned[learner],learner, plot_standard_deviation=True)

plt.title('sum of query similarity')
plt.xlabel('attribute')
plt.xlim([1,85])
ax = plt.gca()
ax.grid()

ts = time.time()
giorno = datetime.datetime.fromtimestamp(ts).strftime('%m%d_%H%M')
tikz_save('plots/similarity_{}.tikz'.format(giorno))

sim_score = dict()
std_sim_score = dict()
for learner in learners:
    sim_score[learner], std_sim_score[learner] = compute_mean_and_std\
    (query_similarities[learner])
print(sim_score)
print(std_sim_score)

In [None]:
performance_per_attribute = dict()
std_performance_per_attribute = dict()

flat_performance = np.linspace(1 + (len(entities) - 1)/2, len(entities), num=len(entities))

floor = np.zeros((50,trials))
for i in range(0,trials):
    floor[:,i] = flat_performance

for learner in learners:
    avg = list()
    std = list()
    measure = hard_performances[learner]
    for key, value in measure.items():
        temp = np.sum(value - floor, axis=0)
        assert np.sum((value - floor)[-1,:]) == 0
        avg.append(np.average(temp))
        std.append(np.std(temp))
    performance_per_attribute[learner] = avg
    std_performance_per_attribute[learner] = std

In [None]:
plt.figure(figsize=(16,4))

matrix = None
for learner in learners:
    if matrix is None:
        matrix = np.array(performance_per_attribute[learner]).reshape(-1,1)
    else:
        matrix = np.hstack((matrix, np.array(performance_per_attribute[learner]).reshape(-1,1)))

matrix = np.hstack((matrix, np.arange(0, len(attributes), 1).reshape(-1,1)))
matrix = matrix[matrix[:,2].argsort()]

for i, learner in enumerate(learners):
    plt.plot(range(0,len(matrix[:,i])), matrix[:,i], color=colors[learner], label=learner)

plt.title('sum of query hard performance per attribute')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.xlabel('attribute from worst performing to best (according to the greedy learner)')
major_ticks = np.arange(0, len(attributes), 1).reshape(-1,1)

ax = plt.gca()
ticks_label = matrix[:,len(learners)].tolist()
ticks_label = [attributes[int(t)] for t in ticks_label]
ax.set_xticks(major_ticks)
plt.xticks(major_ticks,ticks_label,rotation='vertical')
plt.grid()

Plot behind Figure 3

In [None]:
plt.figure(figsize=(16,4))

matrix = None
selected_learners = ['greedy','similar','hybrid']
# for learner in learners:
for learner in selected_learners:
    if matrix is None:
        matrix = np.array(performance_per_attribute[learner]).reshape(-1,1)
        matrix = np.hstack((matrix, np.array(std_performance_per_attribute[learner]).reshape(-1,1)))
    else:
        matrix = np.hstack((matrix, np.array(performance_per_attribute[learner]).reshape(-1,1)))
        matrix = np.hstack((matrix, np.array(std_performance_per_attribute[learner]).reshape(-1,1)))

matrix = np.hstack((matrix, np.arange(0, len(attributes), 1).reshape(-1,1)))
matrix = matrix[matrix[:,0].argsort()]

for i, learner in enumerate(selected_learners):
    plt.plot(range(0,len(matrix[:,2*i])), matrix[:,2*i], color=color_tuned[learner], label=learner)
    avg = matrix[:,2*i]
    std = matrix[:,2*i+1]
    y1 = avg + std
    y2 = avg - std
    plt.fill_between(range(0,len(avg)), y1, y2, where=y2 <= y1, facecolor=color_tuned[learner], alpha=0.1,interpolate=True)

plt.plot(range(0,len(matrix[:,2*i])), np.zeros(np.shape(matrix[:,2*i])), color='xkcd:steel grey')
plt.title('sum of query hard performance per attribute')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.xlabel('attribute from worst performing to best (according to the greedy learner)')
major_ticks = np.arange(0, len(attributes), 1).reshape(-1,1)
plt.xlim([0,84])

ax = plt.gca()
ticks_label = matrix[:,len(selected_learners)*2].tolist()
ticks_label = [attributes[int(t)] for t in ticks_label]
ax.set_xticks(major_ticks)
plt.xticks(major_ticks,ticks_label,rotation='vertical')
plt.grid()

ts = time.time()
giorno = datetime.datetime.fromtimestamp(ts).strftime('%m%d_%H%M')
tikz_save('plots/cumperf_{}.tikz'.format(giorno))