### Recipe Recommender Model
A more complicated model that I'll be using for my Recipe Book application. Its job is to look at the different recipes that the user has created or favourited in the past, and using that, recommend them other recipes when they browse for recipes that other people have made.

Input: User-made recipes, including names, description, ingredients, and steps. Along with this, recipes that other people have made.

Output: A score given to each recipe for how relevant they seem to the user.

Success Rate Goal: 90% accuracy/confidence.

In [None]:
# Imports
import torch
import torch.nn as nn

import numpy as np
import random
import time
import copy

import matplotlib.pyplot as plt

from transformers import BertTokenizer, BertModel

In [None]:
# Setting torch to use GPU acceleration if possible.
device = torch.device("cpu")

if torch.cuda.is_available():
    device = torch.device("cuda")

torch.set_default_device(device)
print(f"Using device: {torch.get_default_device()}")

In [None]:
# ====================== DATA COLLECTION ======================

In [None]:
# Read all of the data.
lines = []

with open("data.txt", "r") as file:
    for line in file.readlines():
        lines.append(line.strip())

In [None]:
# Testing - Ensuring that the dataset is properly loaded and that there are no extreme indexing errors.
cIndex = 0
while cIndex < len(lines):
    if len(lines[cIndex]) > 100:
        print(cIndex)

    cIndex += 4

In [None]:
# Parse all of the data into recipes.
recipes = []

index = 0

while index < len(lines) - 3:
    recipes.append({
        "name": lines[index],
        "description": lines[index + 1],
        "ingredients": lines[index + 2],
        "steps": lines[index + 3]
    })

    index += 4

In [None]:
# Create the training and testing samples.
SAMPLES = 1000
TRAIN_RATIO = 0.9

RECIPE_MEDIAN = 5
RECIPE_VARIANCE = 3

RECIPE_TEST_MEDIAN = 10
RECIPE_TEST_VARIANCE = 5

dataSamples = []

for i in range(SAMPLES):
    cDataSample = [[], []]
    
    recipeCount = random.choice(range(RECIPE_MEDIAN - RECIPE_VARIANCE, RECIPE_MEDIAN + RECIPE_VARIANCE + 1))
    testCount = random.choice(range(RECIPE_TEST_MEDIAN - RECIPE_TEST_VARIANCE, RECIPE_TEST_MEDIAN + RECIPE_TEST_VARIANCE + 1))

    for j in range(recipeCount + testCount):
        chosenRecipe = copy.deepcopy(random.choice(recipes))
        chosenRecipe["favourites"] = random.choice(range(0, 1001))
        cDataSample[0 if (j >= recipeCount) else 1].append(chosenRecipe)

    dataSamples.append(cDataSample)

xTrain = dataSamples[:int(SAMPLES * TRAIN_RATIO)]
xTest = dataSamples[int(SAMPLES * TRAIN_RATIO):]

In [None]:
# ====================== MODEL CONSTRUCTION (PROOF OF CONCEPT) ======================

In [None]:
class Model:
    def __init__(self):
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.model = BertModel.from_pretrained('bert-base-uncased')

    def compute(self, recipes):
        def get_recipe_distances(recipe1, recipe2):
            def calculate_sentence_embedding(text):
                inputs = self.tokenizer(text, return_tensors="pt", truncation = True, padding = True, max_length = 512)
    
                with torch.no_grad():
                    outputs = self.model(**inputs)
    
                return outputs.last_hidden_state[:, 0, :]

            tags = { # Tag - Multiplier
                "name": 1.5,
                "description": 1.0,
                "ingredients": 0.8,
                "steps": 0.5
            }

            total = 0

            for tag in tags.keys():
                emb1 = calculate_sentence_embedding(recipe1["name"])
                emb2 = calculate_sentence_embedding(recipe2["name"])
        
                total += nn.functional.cosine_similarity(emb1, emb2) * tags[tag]

            return total

        recipeScores = copy.deepcopy(recipes[1])

        for recipe in recipeScores:
            recipe["score"] = 0

            for userRecipe in recipes[0]:
                recipe["score"] += get_recipe_distances(recipe, userRecipe)

            recipe["score"] = recipe["score"].item() * (np.log10(recipe["favourites"] + 1) + 1)

        recipeScores = sorted(recipeScores, key = lambda x: x["score"])
        
        return recipeScores

In [None]:
model = Model()

In [None]:
model.compute(xTrain[0])