# Recommendation system

The recommendation system will maximize the number of product that are saved from expiration and the client preferences. The recommendation system need thus 3 parts, one that compute the number of saved products, one that compute the preferences of the client and one function that joins the 2 variables and find the maximum.

We will a first import all recipes id with their ingredients, and 'clean' the ingredients column to get only a list of ingredient id.

In [580]:
import sqlite3
import pandas as pd
import ast

In [581]:
# import the data

conn = sqlite3.connect('Data/Supermarket.db')

query = "SELECT id, ingredients FROM Recipes"

df_recipe = pd.read_sql_query(query, conn)

conn.commit()
conn.close()

In [582]:
def extract_ingredients(row: str) -> list:
    row = ast.literal_eval(row)
    ingr_list = []

    for ingr in row:
        ingr_list.extend([ingr['id']])

    return ingr_list

In [583]:
df_recipe['ingredients'] = df_recipe['ingredients'].apply(extract_ingredients)

## Compute expiration date parametter

Here I will use sql codes. Pandas would be easier and probably even more efficient in this case but I just wanted to do _AS IF_ I had to use SQL.

In [584]:
# A view is created with the ingredients and their product closer to expiration date
# A view will change when the database is updated (with new command, and so, new stocks for example)

conn = sqlite3.connect('Data/Supermarket.db')

conn.execute("DROP VIEW IF EXISTS ingredients_close_expiration")
query = ("CREATE VIEW ingredients_close_expiration AS " 
         "SELECT ingredient_id, MAX(urgent) AS max_urgent " 
         "FROM Stocks " 
         "GROUP BY ingredient_id")

conn.execute(query)

conn.commit()
conn.close()

In [585]:
def exp_coef(ingr_ids: list) -> int:
    """ 
    the function take a list of ingredients ids as input
    return the 'coefficient' of saved product
    coef = sum(urgent_column) for each ingredients
      
    """
    global conn
    list_ingr_ids = '(' + str(ingr_ids)[1:-1] + ')'

    query = ("SELECT SUM(max_urgent) " 
             "FROM  ingredients_close_expiration " 
             f"WHERE ingredient_id IN {list_ingr_ids}")
    coef = conn.execute(query).fetchone()

    #return coef[0]
    return coef[0] if coef is not None else 0


In [586]:
conn = sqlite3.connect('Data/Supermarket.db')

df_recipe['saved_coef'] = df_recipe['ingredients'].apply(exp_coef)

conn.commit()
conn.close()

## User taste suggestion
This part of the suggestion algorithm will use naive bayes and the commands table to give a coefficient for each recepie (the higher the coefficient the more likely the user will like the recepie)

$$ P(like\_recipe(i)|all\_recipe\_already\_liked) = P(all\_recipe\_already\_liked|like\_recipe(i)) * P(like\_recipe(i))$$

with:
$$P(like\_recipe(i)) = \frac{number\_people\_who\_liked\_the\_recipe(i)}{total\_number\_of\_people }$$

$$ P(\text{{all\_recipe\_already\_liked}}|\text{{like\_recipe}}(i)) = \prod_{recipes} \frac{{\text{{number\_people\_who\_liked\_both\_recipe + 1}}}}{{\text{{number\_of\_people\_like\_recipe}}(i)}} $$ 


In [587]:
def a_priori(n_people, recipe_id):
    global conn

    query = ("SELECT number_id " 
            "FROM  id_liked_recipe "
            f"WHERE recepie_id = {recipe_id}")
    response = conn.execute(query).fetchone()
    n_like_tested = response[0]

    p_like_tested = n_like_tested / n_people

    return p_like_tested

In [588]:
def likelihood(recipe_id, user_recipes):

    global conn
    likelihood = 1

    query = ("SELECT number_id " 
            "FROM  id_liked_recipe "
            f"WHERE recepie_id = {recipe_id}") 
    response = conn.execute(query).fetchone()
    liked_recipe_i = response[0]

    query = ("SELECT user_ids " 
            "FROM  id_liked_recipe "
            f"WHERE recepie_id = {recipe_id}")
    response = conn.execute(query).fetchone()
    users_id_recipe_i = list(response[0])

    for recipe in user_recipes:

        query = ("SELECT user_ids " 
        "FROM  id_liked_recipe "
        f"WHERE recepie_id = {recipe}")
        response = conn.execute(query).fetchone()
        user_id_other_recipe = list(response[0])

        common_elements = set(users_id_recipe_i) & set(user_id_other_recipe)
        liked_both = len(common_elements)

        likelihood *= (1 + liked_both) / (liked_recipe_i)

    return likelihood


In [589]:
def taste_coef(recipe_id: int) ->float:
    """  
    """
    global conn, user_id, n_people, user_recipes

    p_a_priori = a_priori(n_people, recipe_id)
    likely = likelihood(recipe_id, user_recipes)

    return p_a_priori * likely

    


In [590]:
conn = sqlite3.connect('Data/Supermarket.db')

#in the app the user id will be a parameter
user_id = 20

#number of people remains the same for each line
query = ("SELECT COUNT(*) " 
        "FROM  Users ")
response = conn.execute(query).fetchone()
n_people = response[0]

# recipe id the user already liked will remains constant too
query = ("SELECT recepie_id " 
        "FROM  Commands "
        f"WHERE user_id = {user_id}")
response = conn.execute(query).fetchall()
user_recipes = [row[0] for row in response]

#create a view that will store all user id that like the recipe for each recipe
#conn.execute("DROP VIEW IF EXISTS id_liked_recipe")
#query = ("CREATE VIEW id_liked_recipe AS " 

conn.execute("DROP TABLE IF EXISTS id_liked_recipe")
query = ("CREATE TABLE id_liked_recipe AS "
         "SELECT recepie_id, GROUP_CONCAT(user_id, ',') AS user_ids, COUNT(user_id) AS number_id " 
         "FROM Commands " 
         "GROUP BY recepie_id;")
conn.execute(query)

df_recipe['taste_coef'] = df_recipe['id'].apply(taste_coef)

conn.commit()
conn.close()
df_recipe


Unnamed: 0,id,ingredients,saved_coef,taste_coef
0,715495,"[11333, 11297, 10211821, 18334, 10211529, 1041...",20,0.000021
1,665282,"[14412, 93684, 18375, 10719335, 14412, 1001, 2...",40,0.000020
2,632197,"[1002050, 19336, 1123, 20081, 4073, 19081, 1095]",18,0.000026
3,658536,"[11135, 11246, 11215, 4053, 6194, 1082047, 100...",22,0.000023
4,639836,"[15261, 9150, 12118, 93740, 10012108, 2047, 10...",32,0.000028
...,...,...,...,...
1005,716202,"[99017, 1032035, 18372, 2031, 16157, 2009, 989...",42,0.000021
1006,632071,"[6150, 18350, 10211215, 11333, 10023572, 6168,...",37,0.000034
1007,648474,"[11304, 20444, 11124, 10211215, 5062, 4582, 99...",35,0.000019
1008,662376,"[16018, 10011693, 11124, 2009, 98839, 1002014,...",66,0.000018
