In [1]:
import pandas as pd
import numpy as np


In [3]:
annotations_rrf = pd.read_csv('annotations_rrf.csv')
annotations_simple = pd.read_csv('annotations_simple.csv')


In [4]:
annotations_rrf.groupby(['ingredients', 'keywords']).size()

ingredients                                                                                                                keywords                          
almond flour, eggs, coconut oil                                                                                            gluten-free, dessert                  10
beans                                                                                                                      crock pot                             10
beef, bell peppers, onion                                                                                                  quick, stir-fry                       10
chicken, garlic, onion, tomato, basil, oregano, parsley, thyme, rosemary, lemon, olive oil, butter, salt, pepper, paprika  spicy                                 10
chicken, garlic, rice                                                                                                      spicy, quick                          10
eggplant, tomato, feta

In [5]:
annotations_rrf

Unnamed: 0,ingredients,keywords,url,recipe,score,label
0,"broccoli, chicken",,https://www.food.com/recipe/chicken-broccoli-a...,"{'_id': {'$oid': '67d41dd8293985ce9b43d92b'}, ...",0.999399,Relevant
1,"broccoli, chicken",,https://www.food.com/recipe/elvas-chicken-and-...,"{'_id': {'$oid': '67d44d48293985ce9b450790'}, ...",0.999206,Relevant
2,"broccoli, chicken",,https://www.food.com/recipe/finicky-feline-die...,"{'_id': {'$oid': '67d4518b293985ce9b452520'}, ...",0.998995,Relevant
3,"broccoli, chicken",,https://www.food.com/recipe/chicken-divan-386117,"{'_id': {'$oid': '67d41918293985ce9b43bba5'}, ...",0.998896,Not Relevant
4,"broccoli, chicken",,https://www.food.com/recipe/chicken-and-brocco...,"{'_id': {'$oid': '67d416a4293985ce9b43abfd'}, ...",0.998513,Relevant
...,...,...,...,...,...,...
95,"eggplant, tomato, feta cheese, olives, cucumbe...","Mediterranean, vegan, fresh, quick",https://www.food.com/recipe/greek-walnut-salsa...,"{'_id': {'$oid': '67d4625d293985ce9b459a83'}, ...",0.166667,Not Relevant
96,"eggplant, tomato, feta cheese, olives, cucumbe...","Mediterranean, vegan, fresh, quick",https://www.food.com/recipe/greek-salad-with-b...,"{'_id': {'$oid': '67d46267293985ce9b459aca'}, ...",0.142857,Relevant
97,"eggplant, tomato, feta cheese, olives, cucumbe...","Mediterranean, vegan, fresh, quick",https://www.food.com/recipe/horiatiki-salata-1...,"{'_id': {'$oid': '67d4701a293985ce9b45fa09'}, ...",0.125000,Relevant
98,"eggplant, tomato, feta cheese, olives, cucumbe...","Mediterranean, vegan, fresh, quick",https://www.food.com/recipe/baked-veggie-quesa...,"{'_id': {'$oid': '67d3f03b293985ce9b42b994'}, ...",0.111111,Relevant


In [6]:
annotations_simple.groupby(['ingredients', 'keywords']).size()

ingredients                                                                                                                keywords                          
almond flour, eggs, coconut oil                                                                                            gluten-free, dessert                  10
beans                                                                                                                      crock pot                             10
beef, bell peppers, onion                                                                                                  quick, stir-fry                       10
chicken, garlic, onion, tomato, basil, oregano, parsley, thyme, rosemary, lemon, olive oil, butter, salt, pepper, paprika  spicy                                 10
chicken, garlic, rice                                                                                                      spicy, quick                          10
eggplant, tomato, feta

In [7]:
def compute_ap(query_scored, k=10):
    """Given a list of list of dictionary of query relevancy scores, compute the average precision"""
    query_sorted = sorted(query_scored, key=lambda x: x['score'], reverse=True)[:k]
    relevancy = [1 if query_sorted[i]['label'] == "Relevant" else 0 for i in range(len(query_scored))]
    
    precision = np.cumsum(relevancy) / (np.arange(len(relevancy)) + 1)
    ap = np.sum(precision * relevancy) / np.sum(relevancy)
    return ap

def compute_map(querries_all, k=10):
    """Given a list of list of dictionary of query relevancy scores, compute the mean average precision"""
    aps = []
    for i in range(len(querries_all)):
        query_scored = querries_all[i]
        ap = compute_ap(query_scored, k)
        aps.append(ap)
    map_score = np.mean(aps)
    return map_score

def compute_ap_df(query_scored, k=10):
    """Given a dataframce of query relevancy scores, compute the average precision"""
    query_sorted = query_scored.sort_values(by='score', ascending=False).head(k)
    relevancy = query_sorted['label'].values
    relevancy = [1 if relevancy[i] == "Relevant" else 0 for i in range(len(relevancy))]
    precision = np.cumsum(relevancy) / (np.arange(len(relevancy)) + 1)
    if np.sum(relevancy) == 0:
        return 0.0
    ap = np.sum(precision * relevancy) / np.sum(relevancy)
    return ap

def compute_map_df(annotations, k=10):
    """Given an ungrouped dataframe of query relevancy scores, compute the mean average precision"""
    # if anontations do not exist, return none
    if annotations.empty:
        return np.nan
    vals = annotations.groupby("ID").apply(lambda x: compute_ap_df(x, k=k), include_groups=False).values
    maper = np.mean(vals)
    return maper
    

def compute_score_for_annotations(annotations, k=10):
    """Given a list of annotations, compute the mean average precision for the annotations split in 3 groups: ingredient only, keyword only, and both"""
    
    # select only the relevant columns
    annotations = annotations[['ingredients', 'keywords', 'label', 'score']]
    
    # assign ID to each of the keyword and ingredient pair (even if it is NaN)
    annotations.loc[:, ['ID']] = annotations['ingredients'].astype(str) + '_' + annotations['keywords'].astype(str)
    
    
    # split the annotations into 3 groups
    annotations_ingredient = annotations[pd.isna(annotations['keywords'])]
    annotations_keyword = annotations[pd.isna(annotations['ingredients'])]
    annotations_both = annotations[~pd.isna(annotations['ingredients']) & ~pd.isna(annotations['keywords'])]
    
    # group by the ingredients and keywords and compute the mean average precision for each group
    map_ingredient = compute_map_df(annotations_ingredient, k=k)
    map_keyword = compute_map_df(annotations_keyword, k=k)
    map_both = compute_map_df(annotations_both, k=k)
    return map_ingredient, map_keyword, map_both 

In [8]:
 
map_ingredient, map_keyword, map_both = compute_score_for_annotations(annotations_rrf, k=10)

print(f"Annotations RRF")
print(f"MAP for ingredients only: {map_ingredient}")
print(f"MAP for keywords only: {map_keyword}")
print(f"MAP for both: {map_both}")

Annotations RRF
MAP for ingredients only: 0.8326719576719577
MAP for keywords only: 1.0
MAP for both: 0.5892937977660201


In [9]:
 
map_ingredient, map_keyword, map_both = compute_score_for_annotations(annotations_simple, k=10)

print(f"Annotations Simple")
print(f"MAP for ingredients only: {map_ingredient}")
print(f"MAP for keywords only: {map_keyword}")
print(f"MAP for both: {map_both}")

Annotations Simple
MAP for ingredients only: 0.8495995002939448
MAP for keywords only: 1.0
MAP for both: 0.5974289808515999
