In [24]:
import pandas as pd
import ast
import re
import unidecode
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem import WordNetLemmatizer
import string
from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import HTML

In [25]:
df = pd.read_csv('Cleaned_data.csv') # Read in clean data
del df['Unnamed: 0'] # Delete 'Unnamed: 0' column

In [26]:
# Get rid of duplicate ingredients in Ingredients_final
df['Ingredients_final'] = df['Ingredients_final'].apply(lambda x: ' '.join(set(x.split())))

In [27]:
tfidf = TfidfVectorizer() # Set TfidfVectorizer()
tfidf.fit(df['Ingredients_final'].values.astype('U')) # Create model
tfidf_recipe = tfidf.transform(df['Ingredients_final']) # Create encodings

In [28]:
# Define function that parses user input ingredients
def ingredient_parser(ingredients):
    measures = ['teaspoon', 't', 'tsp.', 'tsp', 'tablespoon', 'T', 'tbl.', 'tb', 'tbsp.','tbsp', 'fluid ounce', 'fl oz', 'gill', 'cup', 'c', 'pint', 'p', 'pt', 'fl pt', 'quart', 'q', 'qt', 'fl qt', 'gallon', 'g', 'gal', 'ml', 'milliliter', 'millilitre', 'cc', 'mL', 'l', 'liter', 'litre', 'L', 'dl', 'deciliter', 'decilitre', 'dL', 'bulb', 'level', 'heaped', 'rounded', 'whole', 'pinch', 'medium', 'slice', 'pound', 'lb', '#', 'ounce', 'oz', 'mg', 'milligram', 'milligramme', 'g', 'gram', 'gramme', 'kg', 'kilogram', 'kilogramme', 'x', 'of', 'mm', 'millimetre', 'millimeter', 'cm', 'centimeter', 'centimetre', 'm', 'meter', 'metre', 'inch', 'in', 'milli', 'centi', 'deci', 'hecto', 'kilo']
    bad_words = ['oil', 'fresh', 'olive', 'a', 'red', 'sauce', 'clove', 'or', 'pepper', 'bunch', 'salt', 'and', 'leaf', 'chilli', 'large', 'extra', 'water', 'white', 'ground', 'dried', 'sprig', 'small', 'free', 'handful', 'sugar', 'from', 'virgin', 'soy', 'black', 'chopped', 'vinegar', 'green', 'piece', 'seed', 'for', 'sustainable', 'range', 'cornstarch', 'higher', 'welfare', 'to', 'peeled', 'sesame', 'flour', 'tin', 'finely', 'the', 'freshly', 'bean', 'quality', 'few', 'ripe', 'parsley', 'sea', 'stock', 'source', 'flat', 'vegetable', 'smoked', 'organic', 'spring', 'fillet', 'sliced', 'plus', 'corn', 'plain', 'stick', 'cooking', 'light', 'picked', 'mixed', 'your', 'powder', 'bay', 'optional', 'baby', 'serve', 'stalk', 'unsalted', 'plum', 'natural', 'fat', 'fish', 'sweet', 'skin', 'such', 'juice', 'ask', 'brown', 'into', 'on', 'yellow', 'roughly', 'cut', 'good', 'dark', 'chili', 'orange', 'cherry', 'grated', 'frozen', 'bread', 'trimmed', 'breast', 'low', 'knob', 'dusting', 'salad', 'bell', 'cooked', 'runny', 'deseeded', 'balsamic', 'with', 'paste', 'bouillon', 'curry', 'streaky', 'use', 'pin', 'rasher', 'nut', 'cream', 'if', 'groundnut', 'soft', 'you', 'squash', 'tamari', 'chinese', 'zest', 'baking', 'grating', 'bone', 'hot', 'steak', 'boiling', 'minced', 'thigh', 'can', 'other', 'colour', 'shiitake', 'puree', 'dry', 'halved', 'skinless', 'spice', 'amount', 'chive', 'tinned', 'english', 'butternut', 'splash', 'shoulder', 'king', 'leftover', 'washed', 'firm', 'thick', 'flake', 'stir', 'broth', 'caper', 'big', 'dijon', 'is', 'little', 'pastry', 'five', 'sized', 'fishmonger', 'deep', 'removed', 'any', 'cube', 'frying', 'raw']

    translator = str.maketrans('', '', string.punctuation) # Get rid of punctuations using maketrans
    lemmatizer = WordNetLemmatizer() # Set lemmatizer
    new_ingred_list = [] # Empty list for parsed ingredients

    # Loop through each ingredient
    for i in ingredients:
        i.translate(translator) # Get rid of punctuation
        items = re.split(' |-', i) # Split with where there is a space or hyphenated words
        items = [word for word in items if word.isalpha()] # Get rid of non alphabet words
        items = [word.lower() for word in items] # Make everything lower case
        items = [unidecode.unidecode(word) for word in items] # Unidecode each word
        items = [lemmatizer.lemmatize(word) for word in items] # Lemmatize each word
        items = [word for word in items if word not in measures] # Take out measure words
        items = [word for word in items if word not in bad_words] # Take out bad words

        new_ingred_list.append(' '.join(items))# Append to list

    return ' '.join(new_ingred_list) # Return new parsed ingredients

In [29]:
# Example
input_i = ['rice', 'chicken', 'egg', 'spring onion', 'beetroot'] # Example user input ingredients

# Parse the ingredients using ingredient_parser()
input_parsed = ingredient_parser(input_i)
# Use pretrained tfidf model to encode our input ingredients
ingredients_tfidf = tfidf.transform([input_parsed])
# Calculate cosine similarity between actual recipe ingredients and test ingredients
cos_sim = cosine_similarity(tfidf_recipe, ingredients_tfidf)
cos_sim

array([[0.11710823],
       [0.        ],
       [0.08470417],
       ...,
       [0.07795387],
       [0.13567231],
       [0.07026621]])

In [30]:
df['Rec_score'] = cos_sim
df.sort_values('Rec_score', ascending=False).head(5)

Unnamed: 0,Title,Ingredients,Servings,Difficulty,Calories,Time,URL,Ingredients_parsed,Ingredients_final,Rec_score
1023,Ginger Fried Rice (薑炒飯),"['2 oz ginger', '3 oz chicken', '3 egg', '1 ts...",4 servings,unknown,unknown,00:30,https://www.madewithlau.com/recipes/ginger-fri...,ginger chicken egg salt white rice water stalk...,chicken egg ginger rice onion,0.608761
115,Potato rostis with beetroot horseradish,"['2 large potatoes', '½ a red onion', '1 clove...",4 rostis,Super easy,220.0,00:25,https://www.jamieoliver.com/recipes/vegetables...,large potato a red onion clove garlic cumin se...,potato cumin horseradish beetroot creamed garl...,0.379857
922,Sliced Fish Congee (魚片粥),"['10 oz fish', '0.50 lb rice', '6 cup water', ...",4 servings,unknown,unknown,00:45,https://www.madewithlau.com/recipes/sliced-fis...,fish rice water ginger stalk green onion cilan...,cilantro egg ginger rice onion,0.358017
89,Mustard chicken Milanese,"['4 x 120 g free-range chicken breasts', '4 ta...",4,Showing off,640.0,00:45,https://www.jamieoliver.com/recipes/chicken-re...,free range chicken breast english mustard powd...,cheddar mustard chicken beetroot breadcrumb ea...,0.350187
105,"Farro, cauliflower & asparagus salad","['200 g farro (see tip)', '1 cauliflower', '25...",4 - 6,Not too tricky,566.0,00:40,https://www.jamieoliver.com/recipes/vegetables...,farro cauliflower asparagus olive oil large fr...,farro wine raisin cauliflower herb dill beetro...,0.321246


In [31]:
# Define a function that gets cosine similarity scores with each recipe, taking in ingredient parameters
def get_scores(i):
    input_parsed = ingredient_parser(i)
    # Use pretrained tfidf model to encode our input ingredients
    ingredients_tfidf = tfidf.transform([input_parsed])
    # Calculate cosine similarity between actual recipe ingredients and test ingredients
    cos_sim = cosine_similarity(tfidf_recipe, ingredients_tfidf)
    return cos_sim

In [32]:
# Define a function that recommends N recipes based on highest cos_sim scores
def get_recommendations(scores, N=5):
    df_copy = df.copy() # Copy dataframe
    df_copy['Rec_score'] = scores # Put scores into dataframe
    # Order the scores with 'Rec_score' and sort to get the highest N scores
    top = df_copy.sort_values('Rec_score', ascending=False).head(N)
    # Create dataframe to load in recommendations
    recommendation = top[['Title', 'Ingredients','Servings', 'Difficulty', 'Calories', 'Time', 'URL', 'Rec_score']]
    return recommendation

In [33]:
# Final function that takes in user ingredients and outputs N highest scoring recipes
def recipe_recommender():

    user_ingreds = [] # Empty list to store users ingredients
    user_input = '' # Set user_input as nothing to enter while loop below

    while user_input != 'done': # When user enters 'done', while loop exits
        user_input = input('Please enter an ingredient, when finished enter "done":').lower() # Ask for input ingredient
        if user_input != 'done': # Append to list if user_input is not 'done'
            user_ingreds.append(user_input)

    # Ask user for number of recipes to be recommended
    while True:

        # If number is given, break out of while loop
        try:
            N = int(input('How many recipes would you like to be recommended?:'))
            break

        # If not number, loop back and ask user again
        except ValueError:
            print('Please enter a number!')

    # Calculate scores and recommendations
    scores = get_scores(user_ingreds) # Get scores using get_scores() function
    recommendations = get_recommendations(scores, N) # Get recommendations using get_recommendations() function

    # Output results
    print(f'Ingredients inputted: {user_ingreds}') # Print ingredients given
    print('----------------------------------------')

    # Iterate through each recommendation
    for i,j in recommendations.iterrows():
        display(HTML(f"<b>{j[0]}</b>")) # Display recipe name in bold

        # Loop through each ingredient
        for ingred in ast.literal_eval(j[1]):
            print(f'- {ingred}') # Print ingredients in bullet pointed list
        print('----------------------------------------')

    del recommendations['Ingredients'] # Delete 'Ingredients' column as that is displayed above
    return recommendations # Return dataframe with rest of the information

In [35]:
recipe_recommender()

Ingredients inputted: ['rice', 'chicken', 'egg', 'spring onion', 'beetroot']
----------------------------------------


- 2 oz ginger
- 3 oz chicken
- 3 egg
- 1 tsp salt (for eggs)
- 2 cup white rice (uncooked)
- 2 cup water
- 2 stalk green onion
- 2 tbsp oil
- 1 tsp salt (for cooking)
- 1 tbsp water
- 1 tsp cornstarch
- 0.50 tsp salt
----------------------------------------


- 2 large potatoes
- ½ a red onion
- 1 clove of garlic
- 1½ teaspoons cumin seeds
- 3 tablespoons vegetable oil
- 2 medium beetroots
- 2 tablespoons creamed horseradish
----------------------------------------


- 10 oz fish
- 0.50 lb rice
- 6 cup water
- 1 oz ginger
- 2 stalk green onion
- 0.25 oz cilantro
- 1 egg (egg whites only)
- 1 tbsp cornstarch
- 0.50 tsp salt
- 0.25 tsp white pepper
- 1 tbsp water
- 1 tbsp oil
- 1 tbsp oil
- 1 tsp salt
- 1 tsp fish sauce (or light soy sauce)
----------------------------------------


- 4 x 120 g free-range chicken breasts
- 4 tablespoons English mustard powder , plus 4 teaspoons
- 50 g mature Cheddar cheese
- 4 slices of higher-welfare smoked ham
- 4 tablespoons plain flour
- 7 large free-range eggs
- 200 g breadcrumbs
- olive oil
- 1 eating apple
- 2 medium beetroot
- 1 tablespoon cider vinegar
- extra virgin olive oil
- 100 g watercress
- 12 cornichons
----------------------------------------


- 200 g farro (see tip)
- 1 cauliflower
- 250 g asparagus
- olive oil
- 4 large free-range eggs
- 2 tablespoons pine nuts
- 3 tablespoons raisins
- 1 handful of beetroot leaves, or other small leaves
- HERB DRESSING
- a few sprigs of fresh dill
- a few sprigs of fresh flat-leaf parsley
- a few fresh chives
- 90 ml extra virgin olive oil
- 2 tablespoons red wine vinegar
----------------------------------------


Unnamed: 0,Title,Servings,Difficulty,Calories,Time,URL,Rec_score
1023,Ginger Fried Rice (薑炒飯),4 servings,unknown,unknown,00:30,https://www.madewithlau.com/recipes/ginger-fri...,0.608761
115,Potato rostis with beetroot horseradish,4 rostis,Super easy,220.0,00:25,https://www.jamieoliver.com/recipes/vegetables...,0.379857
922,Sliced Fish Congee (魚片粥),4 servings,unknown,unknown,00:45,https://www.madewithlau.com/recipes/sliced-fis...,0.358017
89,Mustard chicken Milanese,4,Showing off,640.0,00:45,https://www.jamieoliver.com/recipes/chicken-re...,0.350187
105,"Farro, cauliflower & asparagus salad",4 - 6,Not too tricky,566.0,00:40,https://www.jamieoliver.com/recipes/vegetables...,0.321246
