In [1]:
import preprocessing as preproc
from utils import *
import collections
import numpy as np
import nltk
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
from tqdm import tqdm


Using Theano backend.
Using gpu device 0: GeForce GTX 1080 (CNMeM is disabled, cuDNN 5110)


In [2]:
class MetricsData(object):

    def __init__(self,word,appearances,metrics_dict):
        self.word = word
        self.appearances = appearances
        self.metrics_dict = metrics_dict
        

In [3]:
def status((groundTruthValue,predictedValue)):
    if(groundTruthValue):
        if(predictedValue):
            return "TP"
        return "FN"
    
    if(predictedValue):
        return "FP"
    
    return "TN"

def get_metrics(true_word_mask,predicted_word_mask):
    truth2predictedArr = zip(true_word_mask,predicted_word_mask)
    statusArr = [status(truth2predicted) for truth2predicted in truth2predictedArr]
    counter = collections.defaultdict(int,collections.Counter(statusArr).most_common())
    
    tp = counter['TP']
    fp = counter['FP']
    tn = counter['TN']
    fn = counter['FN']
    
    precision = float(tp)/max(1,(tp+fp))
    recall = float(tp)/max(1,(tp+fn))

    return {"precision":precision,"recall":recall}

def containsWord(sentence, word):
    return word in sentence

def get_word_mask(captions,word):
    return [containsWord(caption, word) for caption in captions]

def computeMetricsData(word):
    true_word_mask = get_word_mask(true_captions,metric_word)
    predicted_word_mask = get_word_mask(predicted_captions,metric_word)

    nr_instances = sum(true_word_mask)
    metrics_dict = get_metrics(true_word_mask,predicted_word_mask)

    return MetricsData(metric_word,nr_instances,metrics_dict)

def displayMetrics(metric_data_arr):
    
    for metric_data in metric_data_arr:
       
        print(metric_data.word+" => "+str(metric_data.appearances)+" instances")
        print("\tPrecision: "+str(metric_data.metrics_dict["precision"]))
        print("\tRecall: "+str(metric_data.metrics_dict["recall"]))
        print("")

def displayAggMetrics(metric_data_arr):
    total_instances = sum([metric_data.appearances for metric_data in metric_data_arr])
    
    for key in metric_data_arr[0].metrics_dict:
        metric_sum = sum([metric_data.appearances*metric_data.metrics_dict[key] for metric_data in metric_data_arr])        
        weighted_metric = float(metric_sum)/total_instances
        print("Weighted "+str(key)+str(": ") + str(weighted_metric))
        
def bleu_score_metric(reality, prediction):
    return nltk.translate.bleu_score.sentence_bleu([reality], prediction)

def compute_bleu_score_metric(predicted_captions, real_captions):
    real_captions = reduce(list.__add__, [list(real_caption) for real_caption in tqdm(real_captions)])
    bleu_scores = [bleu_score_metric(real_captions[i], predicted_captions[i]) for i in tqdm(range(len(predicted_captions)))]
    return np.average(bleu_scores)

def get_nr_repetitions(predicted_caption):
    counter = preproc.most_common_words([predicted_caption])
    repetition_values = [nr_app for (word,nr_app) in counter if nr_app > 1]
    return sum(repetition_values) / 2 

# Load Data

In [None]:
base_path = app_3_length_15_data_path

train_path = base_path + train_folder
val_path = base_path + val_folder

test_indexed_captions_path = base_path + val_folder + batch_folder + indexed_captions_folder
test_raw_captions_path = base_path + val_folder + batch_folder+captions_folder

specific_captions = "app_3_length_15_past_word_70_epoch_predicted_captions"

In [None]:
(test_captions_raw,_) = preproc.get_captions_raw_and_indexed(test_raw_captions_path,test_indexed_captions_path)
true_captions = [list(test_caption_raw)[0] for test_caption_raw in test_captions_raw]

max_size = len(true_captions)
predicted_captions = preproc.load_obj(val_path+predictions_folder+specific_captions)[:max_size]


print(len(true_captions))
print(len(predicted_captions))


# Specific words metrics

In [None]:
metric_words = ['tennis','snow','train','cat','red','yellow','food','kitchen']
metric_data_arr = [computeMetricsData(metric_word) for metric_word in tqdm(metric_words)]

In [None]:
displayMetrics(metric_data_arr)

In [None]:
displayAggMetrics(metric_data_arr)

# All words metrics

In [None]:
all_words = dict(preproc.most_common_words(true_captions)).keys()
all_metric_data_arr = [computeMetricsData(metric_word) for metric_word in tqdm(all_words)]
displayAggMetrics(all_metric_data_arr)

# BLEU Metric

In [None]:
compute_bleu_score_metric(predicted_captions,true_captions)

# Repetition Metric

In [None]:
total_nr_repetitions = sum([get_nr_repetitions(predicted_caption) for predicted_caption in tqdm(predicted_captions)])
total_nr_words = sum(len(caption.split()) for caption in tqdm(predicted_captions))
repetition_score = float(total_nr_repetitions) / total_nr_words

print("Total nr repetitions = "+str(total_nr_repetitions))
print("Total nr words = "+str(total_nr_words))
print("Repetition score = "+str(repetition_score))
