In [1]:
import os
import re
import numpy as np
import transformers
from sklearn.metrics.pairwise import cosine_similarity
from transformers import GPT2Tokenizer, AutoTokenizer
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


## Defining some utility functions

In [2]:
def print_raw_recipe(full_raw_recipe):
    '''
    Print a raw recipe (containing the special tokens) to be easier to read
    '''
    markdown = re.sub("<RECIPE_(START|END)>", "", full_raw_recipe)
    recipe_n_title = markdown.split("<TITLE_START>")
    title = "# " + recipe_n_title[1].replace("<TITLE_END>", "") + " #\n"
    markdown = recipe_n_title[0].replace("<INPUT_START>", "## Input ingredients ##\n`").replace("<INPUT_END>", "`\n")
    markdown = markdown.replace("<NEXT_INPUT>", "`\n`").replace("<INGR_START>","## Ingredients ##\n* ").replace("<NEXT_INGR>","\n* ").replace("<INGR_END>", "\n")
    markdown = markdown.replace("<INSTR_START>", "## Instructions ##\n1) ")

    # Count each instruction
    count = 2
    while markdown.find("<NEXT_INSTR>") != -1:
        markdown = markdown.replace("<NEXT_INSTR>", f"\n{count}) ", 1)
        count += 1

    markdown = markdown.replace("<INSTR_END>", "\n")
    markdown = re.sub("$ +#", "#", markdown)
    markdown = re.sub("( +`|` +)", "`", markdown)
    print('\n' + title + markdown)

## Creating the dataset

In [3]:
local_path = os.path.normpath(os.getcwd() + os.sep + os.pardir)
sample_path = local_path + "/results/2023-07-25_16-14-49/sample_gpt2.txt"
finetuned_path = local_path + "/results/2023-07-25_16-14-49/finetuned_gpt2.txt"
print(sample_path)
print(finetuned_path)

/home/hazot/code/recipe-generation-project/results/2023-07-25_16-14-49/sample_gpt2.txt
/home/hazot/code/recipe-generation-project/results/2023-07-25_16-14-49/finetuned_gpt2.txt


In [4]:
data_dir="data"

data = {
    "sample": [],
    "finetuned": [],
    "vanilla": []
}

In [5]:
with open(sample_path, 'r') as f:
    content = f.readlines()
    data["sample"] = [content[i * 2].replace('\n','') for i in range(len(content) // 2)]

In [6]:
with open(finetuned_path, 'r') as f:
    content = f.readlines()
    data["finetuned"] = [content[i * 2].replace('\n','') for i in range(len(content) // 2)]

In [7]:
print(len(data['sample']))
print(len(data['finetuned']))

100
1000


In [12]:
print_raw_recipe(data['finetuned'][21])


#  Spiced Christmas Gingersnaps   #
 ## Input ingredients ##
`Mix`
`baking soda`
`cream of tartar`
`egg`
`cinnamon`
`ginger`
`cloves`
`nutmeg`
`salt`
`white sugar`
`butter`
`sugar`
`cinnamon`
 ## Ingredients ##
*  2-1/2 qt. (26 cups) Nestle Quik Original Baking Mix 
*  1 tsp. baking soda 
*  1 tsp. cream of tartar 
*  1 egg, beaten 
*  1 tsp. cinnamon 
*  1 tsp. ginger 
*  1 tsp. cloves 
*  1/4 tsp. nutmeg 
*  1/2 tsp. salt 
*  1 cup white sugar 
*  2 Tbsp. butter or margarine 
*  1 cup sugar 
*  1 tsp. cinnamon 
 ## Instructions ##
1)  Combine dry mix, baking soda, cream of tartar, egg, spices and salt; mix well. 
2)  Gradually add sugar, stirring until well blended. 
3)  Shape dough into 1-in. balls. 
4)  Place on ungreased cookie sheets. 
5)  Press into 1/2-in. circles with bottom of glass dipped in sugar. 
6)  Bake at 325° for 8-10 minutes or until lightly browned. 
7)  Cool 1 minute on cookie sheets; remove to wire racks. 
8)  Sift 2 Tbsp. of the sugar and cinnamon over each cook

## Cosine similarity

In [None]:
tokenizer = AutoTokenizer.from_pretrained(local_path + '/checkpoints/gpt2/checkpoint-gpt2/')

In [None]:
sample_tensor = [tokenizer.encode(recipe) for recipe in data['sample']]
finetuned_tensor = [tokenizer.encode(recipe) for recipe in data['finetuned']]

In [None]:
avg = 0
for k, rec1 in enumerate(sample_tensor):
    best = 0
    for i in range(0,10):
        rec2 = finetuned_tensor[k*10 + i]
        
        # pad
        pad_len = np.abs(len(rec1) - len(rec2))
        if len(rec1) < len(rec2):
            rec1.extend([0]*pad_len)
        else:
            rec2.extend([0]*pad_len)
        
        cos = cosine_similarity([rec1], [rec2])
        best = max(best, cos)
    avg += best

avg = avg/len(sample_tensor)
print("avg: ", avg)

## Language check

In [None]:
import language_tool_python
tool = language_tool_python.LanguageTool('en-US')
#tool.disable_spellchecking()
results = tool.check(data["finetuned"][0])
results_filtered = [result for result in results if result.ruleId!='WHITESPACE_RULE' ]

In [None]:
import language_tool_python
tool = language_tool_python.LanguageTool('en-US')
#tool.disable_spellchecking()

avg = 0
dataset = "finetuned"

for rec in tqdm(data[dataset], desc="Iteration", disable=False, position=0, leave=True):
    results = tool.check(rec)
    results_filtered = [result for result in results if result.ruleId!='WHITESPACE_RULE' ]
    avg += len(results_filtered)

print(avg / len(data[dataset]))

## Readibility

* textstat.smog_index(test_data)
* textstat.flesch_kincaid_grade(test_data)
* textstat.coleman_liau_index(test_data)
* textstat.automated_readability_index(test_data)
* textstat.dale_chall_readability_score(test_data)
* textstat.difficult_words(test_data)
* textstat.linsear_write_formula(test_data)
* textstat.gunning_fog(test_data)
* textstat.text_standard(test_data)

In [None]:
import numpy as np
import textstat
from scipy import stats

In [None]:
ret = []
dataset = "finetuned"

for rec in data[dataset]:
    result = textstat.flesch_reading_ease(rec)
    #print(result)
    ret.append(result)
    
print(np.mean(ret), np.median(ret), stats.mode(ret))

In [None]:
ret = []
dataset = "sample"

for rec in data[dataset]:
    result = textstat.smog_index(rec)
    #print(result)
    ret.append(result)
    
print(np.mean(ret), np.median(ret), stats.mode(ret))

In [None]:
ret = []
dataset = "finetuned"

for rec in data[dataset]:
    result = textstat.gunning_fog(rec)
    #print(result)
    ret.append(result)
    
print(np.mean(ret), np.median(ret), stats.mode(ret))

In [None]:
ret = []
dataset = "sample"

for rec in data[dataset]:
    result = textstat.dale_chall_readability_score(rec)
    #print(result)
    ret.append(result)
    
print(np.mean(ret), np.median(ret), stats.mode(ret))

## Translation

In [None]:
import nltk
import nltk.translate.bleu_score as bleu
from nltk.translate.bleu_score import SmoothingFunction

import nltk.translate.gleu_score as gleu
import nltk.translate.meteor_score as meteor
from jiwer import wer, mer

In [None]:
def wer_count(hyp, ref, print_matrix=False):
    N = len(hyp)
    M = len(ref)
    L = np.zeros((N,M))
    for i in range(0, N):
        for j in range(0, M):
            if min(i,j) == 0:
                L[i,j] = max(i,j)
            else:
                deletion = L[i-1,j] + 1
                insertion = L[i,j-1] + 1
                sub = 1 if hyp[i] != ref[j] else 0
                substitution = L[i-1,j-1] + sub
                L[i,j] = min(deletion, min(insertion, substitution))
    return int(L[N-1, M-1])

def bleu_score(recipe, refer):
    hyp = recipe
    refs = refer
    smoothie = SmoothingFunction().method4
    score_ref_a = bleu.sentence_bleu(refs, hyp, smoothing_function=smoothie)
    return score_ref_a

def gleu_score(recipe, refer):
    hyp = recipe
    refs = refer
    score_ref_a = gleu.sentence_gleu(refs, hyp)
    return score_ref_a

def wer_score(recipe, refer):
    hyp = recipe
    refs = refer

    mn = 99999
    for ref in refs:
        b = wer(ref, hyp)
        mn = min(mn, b)
       
    return mn

In [None]:
ret = []
for k, rec1 in enumerate(tqdm(data["sample"])):
    rec2 = data["finetuned"][k*10: k*10 + 10]
    res = bleu_score(rec1, rec2)
    ret.append(res)

np.mean(ret)

In [None]:
ret = []
for k, rec1 in enumerate(tqdm(data["sample"])):
    rec2 = data["finetuned"][k*10: k*10 + 10]
    res = gleu_score(rec1, rec2)
    ret.append(res)

np.mean(ret)

In [None]:
ret = []
for k, rec1 in enumerate(tqdm(data["sample"])):
    rec2 = data["finetuned"][k*10: k*10 + 10]
    res = wer_score(rec1, rec2)
    ret.append(res)

np.mean(ret)

# Ingredients evaluations

#### Test if all the ingredients from the input list are used inside the generated instructions

In [13]:
def get_input_ingredients_list(recipe):
    '''
    Gets the list of input ingredients from a raw recipe
    '''
    ingr_start_index = recipe.find("<INPUT_START>")
    ingr_end_index = recipe.find("<INPUT_END>")

    ingredients_sequence = " ".join(recipe[ingr_start_index + len("<INPUT_START>"):ingr_end_index].strip().split())  # Find the input ingredients list sequence
    ingredients_list = ingredients_sequence.split("<NEXT_INPUT>")  # split the ingredients when the next input token is reached
    return [x.strip() for x in ingredients_list]  # strip whitespaces before and after ingredients


def get_instructions(recipe):
    '''
    Gets the string sequence of instructions from a raw recipe
    '''
    instr_start_index = recipe.find("<INSTR_START>")
    instr_end_index = recipe.find("<INSTR_END>")

    instruction_sequence = " ".join(recipe[instr_start_index + len("<INSTR_START>"):instr_end_index].strip().split())  # Find the input ingredients list sequence
    instructions = instruction_sequence.split("<NEXT_INSTR>")  # split the ingredients when the next input token is reached
    instructions = [x.strip() for x in instructions]  # strip whitespaces before and after ingredients
    return " ".join(instructions)


def input_ingredients_coverage_in_instructions(recipe):
    '''
    Returns the percentage of the number of ingredients from the input list that are actually present in the instructions for one recipe.
    '''
    ingredients = get_input_ingredients_list(recipe)
    number_of_ingredients = len(ingredients)  # keeps the number of ingredients before removing duplicates
    instructions = get_instructions(recipe).lower()

    ingredients = list(dict.fromkeys(ingredients))  # remove duplicate ingredients to reduce bias
    nb_ingr_found = sum([1 if ingredient.lower() in instructions else 0 for ingredient in ingredients])  # Gets the number of ingredients found in the instructions

    return nb_ingr_found/number_of_ingredients


def evaluate_recipes_input_ingredients_coverage_in_instructions(recipes):
    '''
    Evaluation on all the generated recipes (finetuned) for the coverage of the input list in the instructions.
    Returns a list of percentage for the number of ingredients from the input list that are actually present in the instructions.
    '''
    results = []
    for recipe in recipes:
        results.append(input_ingredients_coverage_in_instructions(recipe))
    return results


In [17]:
res = evaluate_recipes_input_ingredients_coverage_in_instructions(data['finetuned'])
print('Average percentage of input ingredients covered:', np.mean(res))
nb_of_zeros = sum([1 if x == 0.0 else 0 for x in res])
print('Number of 0% coverage of input ingredients:', nb_of_zeros)

Average percentage of input ingredients covered: 0.5537150981253905
Number of 0% coverage of input ingredients: 57


#### ONLY TESTING PURPOSE

In [18]:
# Checking on the samples to see how many zeros there are, therfore checking how many "mix, combine, there are still in the training data".
res = evaluate_recipes_input_ingredients_coverage_in_instructions(data['sample'])
print('Average percentage of input ingredients covered:', np.mean(res))
nb_of_zeros = sum([1 if x == 0.0 else 0 for x in res])
print('Number of 0% coverage of input ingredients:', nb_of_zeros)

Average percentage of input ingredients covered: 0.5261598679098679
Number of 0% coverage of input ingredients: 11


In [19]:
for i, x in enumerate(res):
    if x == 0.0:
        print(i)

0
15
16
29
30
39
49
61
69
73
75


In [25]:
print_raw_recipe(data['sample'][15])


#  Double Chocolate Oatmeal Cookies   #
 ## Input ingredients ##
`sugar`
`margarine`
`egg`
`water`
`vanilla`
`flour`
`cocoa`
`soda`
`salt`
`oats`
`chocolate chips`
 ## Ingredients ##
*  1 1/2 c. sugar 
*  1 c. margarine 
*  1 egg 
*  1/4 c. water 
*  1 tsp. vanilla 
*  1 1/4 c. flour 
*  1/3 c. cocoa 
*  1/2 tsp. soda 
*  1/2 tsp. salt 
*  3 c. oats (quick cooking) 
*  1 pkg. chocolate chips 
 ## Instructions ##
1)  Mix first 5 ingredients well. 
2)  Add the rest at once. 
3)  Put rounded teaspoonfuls 2 inches apart on ungreased pan. 
4)  Bake at 350° for 10 to 12 minutes. 
 


#### Test if all the ingredients from the input list are in the ingredient list

In [74]:
def get_listed_ingredients_list(recipe):
    '''
    Gets the string sequence of listed ingredients (list with quantities) from a raw recipe
    '''
    ingr_start_index = recipe.find("<INGR_START>")
    ingr_end_index = recipe.find("<INGR_END>")

    ingredients_sequence = " ".join(recipe[ingr_start_index + len("<INGR_START>"):ingr_end_index].strip().split())  # Find the input ingredients list sequence
    ingredients_list = ingredients_sequence.split("<NEXT_INGR>")  # split the ingredients when the next input token is reached
    ingredients_list = [x.strip() for x in ingredients_list]  # strip whitespaces before and after ingredients
    return " ".join(ingredients_list)


def input_ingredients_coverage_in_listed_ingredients(recipe):
    '''
    Returns the percentage of the number of ingredients from the input list that are actually present in the listed ingredients (list with quantities) for one recipe.
    '''
    input_ingredients = get_input_ingredients_list(recipe)  # Gets input ingredients (without quantities)
    number_of_ingredients = len(input_ingredients)  # keeps the number of ingredients before removing duplicates
    
    listed_ingredients = get_listed_ingredients_list(recipe).lower()  # Gets listed ingredients (the one with quanities)

    ingredients = list(dict.fromkeys(input_ingredients))  # remove duplicate ingredients to reduce bias
    nb_ingredients_found = sum([1 if input_ingredient.lower() in listed_ingredients else 0 for input_ingredient in input_ingredients])  # Gets the number of ingredients found in the listed ingredients
    return nb_ingredients_found / number_of_ingredients


def evaluate_recipes_input_ingredients_coverage_in_listed_ingredients(recipes):
    '''
    Evaluation on all the generated recipes (finetuned) for the coverage of the input list in the listed ingredients (list with quantities).
    Returns a list of percentage for the number of ingredients from the input list that are actually present in the listed ingredients.
    '''
    results = []
    for recipe in recipes:
        results.append(input_ingredients_coverage_in_listed_ingredients(recipe))
    return results

In [75]:
res = evaluate_recipes_input_ingredients_coverage_in_listed_ingredients(data['finetuned'])
print('Average percentage of input ingredients covered:', np.mean(res))
indx_where_not_one = [i for i, x in enumerate(res) if x != 1.0]
print('Number of non-100% coverage of input ingredients:', len(indx_where_not_one))

Average percentage of input ingredients covered: 0.9887254493485531
Number of non-100% coverage of input ingredients: 77


In [76]:
input_ingredients_coverage_in_listed_ingredients(data['finetuned'][13])

0.9090909090909091

In [77]:
print_raw_recipe(data['finetuned'][13])


#  Apricot Spice Cookies Recipe   #
 ## Input ingredients ##
`butter`
`sugar`
`eggs`
`sour cream`
`Apricot brandy`
`Vanilla`
`rum`
`All-purpose`
`Salt`
`Baking soda`
`Black pepper`
 ## Ingredients ##
*  1/2 c. Unsalted butter 
*  1 c. Sugar 
*  3 Large eggs 
*  1 c. Lowfat sour cream 
*  1/2 c. Apricot brandy 
*  1 tsp Vanilla 
*  1/4 tsp Nutmeg (or possibly 1/4 c.) 
*  2 1/4 c. All-purpose flour 
*  1 tsp Salt 
*  1/2 tsp Baking soda 
*  1/2 tsp Black pepper 
 ## Instructions ##
1)  Heat oven to 350 degrees. 
2)  In large mixing bowl, cream butter and sugar. 
3)  Add in Large eggs, sour cream, Apricot brandy and vanilla. 
4)  In medium bowl, sift together flour, salt, baking soda and pepper. 
5)  Add in to egg mix alternately with flour mix. 
6)  Stir till just mixed. 
7)  Drop by teaspoonful on ungreased cookie sheet. 
8)  Bake at 350 degrees for 12 to 14 min. 
 


#### Test if there are any duplicates ingredients in the input list

In [82]:
def evaluate_duplicated_input_ingredients(recipes):
    '''
    Returns percentage of recipes without duplicated inputs
    '''
    count = 0
    for recipe in recipes:
        ingredients = get_input_ingredients_list(recipe)
        filtered_list = list(dict.fromkeys(ingredients))
        if len(ingredients) == len(filtered_list):
            count +=1
    return count / len(recipes)

In [83]:
evaluate_duplicated_input_ingredients(data['finetuned'])

0.757

In [None]:
ingredients = ['test', 'test', 'x', 'a', 'b', 'c']

In [None]:
l2 = list(dict.fromkeys(ingredients))
l2

#### Test if there are any duplicates ingredients in the ingredient list

#### Test if all the ingredients found in the generated instructions are mentioned in the input list

#### Test if all the ingredients found in the generated instructions are mentioned in the ingredients list