# Recommendation system

The recommendation system will maximize the number of product that are saved from expiration and the client preferences. The recommendation system need thus 3 parts, one that compute the number of saved products, one that compute the preferences of the client and one function that joins the 2 variables and find the maximum.

We will a first import all recipes id with their ingredients, and 'clean' the ingredients column to get only a list of ingredient id.

In [612]:
import sqlite3
import pandas as pd
import ast

In [613]:
def extract_ingredients(row: str) -> list:
    """ 
    the function take a list of dictionnaries under the str format,
    return all the value of the keys 'id' into a list
    """
    row = ast.literal_eval(row)
    ingr_list = []

    for ingr in row:
        ingr_list.extend([ingr['id']])

    return ingr_list

In [614]:
# import the data

conn = sqlite3.connect('Data/Supermarket.db')

query = "SELECT id, ingredients FROM Recipes"

df_recipe = pd.read_sql_query(query, conn)

conn.commit()
conn.close()

In [615]:
df_recipe['ingredients'] = df_recipe['ingredients'].apply(extract_ingredients)

## Compute expiration date parametter

Here I will use sql codes. Pandas would be easier and probably even more efficient in this case but I just wanted to do _AS IF_ I had to use SQL.

In [616]:
# A view is created with the ingredients and their product closer to expiration date

conn = sqlite3.connect('Data/Supermarket.db')

conn.execute("DROP VIEW IF EXISTS ingredients_close_expiration")
query = ("CREATE VIEW ingredients_close_expiration AS " 
         "SELECT ingredient_id, MAX(urgent) AS max_urgent " 
         "FROM Stocks " 
         "GROUP BY ingredient_id")

conn.execute(query)

conn.commit()
conn.close()

In [617]:
def exp_coef(ingr_ids: list) -> int:
    """ 
    the function take a list of ingredients ids as input
    return the 'coefficient' of saved product
    coef = sum(urgent_column) for each ingredients
      
    """
    global conn

    list_ingr_ids = '(' + str(ingr_ids)[1:-1] + ')' #make it a string

    query = ("SELECT SUM(max_urgent) " 
             "FROM  ingredients_close_expiration " 
             f"WHERE ingredient_id IN {list_ingr_ids}")
    coef = conn.execute(query).fetchone()

    return coef[0] if coef is not None else 0


In [618]:
conn = sqlite3.connect('Data/Supermarket.db')

df_recipe['saved_coef'] = df_recipe['ingredients'].apply(exp_coef)

conn.commit()
conn.close()

## User taste suggestion
This part of the suggestion algorithm will use naive bayes and the commands table to give a coefficient for each recepie (the higher the coefficient the more likely the user will like the recepie)

$$ P(like\_recipe(i)|all\_recipe\_already\_liked) = P(all\_recipe\_already\_liked|like\_recipe(i)) * P(like\_recipe(i))$$

with:
$$P(like\_recipe(i)) = \frac{number\_people\_who\_liked\_the\_recipe(i)}{total\_number\_of\_people }$$

$$ P(\text{{all\_recipe\_already\_liked}}|\text{{like\_recipe}}(i)) = \prod_{recipes} \frac{{\text{{number\_people\_who\_liked\_both\_recipe + 1}}}}{{\text{{number\_of\_people\_like\_recipe}}(i)}} $$ 


In [619]:
def a_priori(n_people, recipe_id):
    """ 
    return the a priori probability 
    """
    global conn

    query = ("SELECT number_id " 
            "FROM  id_liked_recipe "
            f"WHERE recepie_id = {recipe_id}")
    response = conn.execute(query).fetchone()
    n_like_tested = response[0]

    p_like_tested = n_like_tested / n_people

    return p_like_tested

In [620]:
def likelihood(recipe_id, user_recipes):
    """
    return the likelihood probability 
    """

    global conn
    likelihood = 1

    query = ("SELECT number_id, user_ids "
             "FROM id_liked_recipe " 
             f" WHERE recepie_id = {recipe_id};")
    response = conn.execute(query).fetchone()
    liked_recipe_i = response[0]
    users_id_recipe_i = list(response[1].split(','))

    for recipe in user_recipes:

        query = ("SELECT user_ids " 
                "FROM  id_liked_recipe "
                f"WHERE recepie_id = {recipe}")
        response = conn.execute(query).fetchone()
        user_id_other_recipe = list(response[0].split(','))

        common_elements = set(users_id_recipe_i) & set(user_id_other_recipe)
        liked_both = len(common_elements)

        likelihood *= (1 + liked_both) / (liked_recipe_i)

    return likelihood


In [621]:
def taste_coef(recipe_id: int) ->float:
    """  
    return a probability that the user like the recipe of id 'recipe_id'
    """
    global conn, user_id, n_people, user_recipes

    p_a_priori = a_priori(n_people, recipe_id) 
    likely = likelihood(recipe_id, user_recipes) 
    

    return p_a_priori * likely

    


In [622]:
conn = sqlite3.connect('Data/Supermarket.db')

#in the app the user id will be a parameter
user_id = 79 #3000 not working

#number of people remains the same for each line
query = ("SELECT COUNT(*) " 
        "FROM  Users ")
response = conn.execute(query).fetchone()
n_people = response[0]

# recipe id the user already liked will remains constant too
query = ("SELECT recepie_id " 
        "FROM  Commands "
        f"WHERE user_id = {user_id}")
response = conn.execute(query).fetchall()
user_recipes = [row[0] for row in response]

#create a table that will store all user id that like the recipe for each recipe
conn.execute("DROP TABLE IF EXISTS id_liked_recipe")
query = ("CREATE TABLE id_liked_recipe AS "
         "SELECT recepie_id, GROUP_CONCAT(user_id, ',') AS user_ids, COUNT(user_id) AS number_id " 
         "FROM Commands " 
         "GROUP BY recepie_id;")
conn.execute(query)

#add an index on recepie_id, this increase drasticly the speed
query = "DROP INDEX IF EXISTS idx_recepie_id;"
conn.execute(query)
query = "CREATE INDEX idx_recepie_id ON id_liked_recipe (recepie_id);" 
conn.execute(query)


<sqlite3.Cursor at 0x134163225c0>

In [623]:
df_recipe['taste_coef'] = df_recipe['id'].apply(taste_coef)

conn.commit()
conn.close()

## suggestion model
In the end, we will normalize the 2 columns, add them and the maximum will be the suggested recipe

In [624]:
df_recipe['saved_coef'] = (df_recipe['saved_coef'] - df_recipe['saved_coef'].min()) / (df_recipe['saved_coef'].max() - df_recipe['saved_coef'].min()) / 2 #/2 to balance coefs
df_recipe['taste_coef'] = (df_recipe['taste_coef'] - df_recipe['taste_coef'].min()) / (df_recipe['taste_coef'].max() - df_recipe['taste_coef'].min())
df_recipe['suggestion_coef'] = df_recipe['saved_coef'] + df_recipe['taste_coef']

In [625]:
recommendation = df_recipe[df_recipe["suggestion_coef"] == df_recipe["suggestion_coef"].max()]
print(recommendation)

print(df_recipe.sort_values(by = ['taste_coef'], ascending=False).head(20))

         id                          ingredients  saved_coef  taste_coef   
791  636589  [11485, 1159, 1226, 1085, 10211821]    0.065934         1.0  \

     suggestion_coef  
791         1.065934  
          id                                        ingredients  saved_coef   
791   636589                [11485, 1159, 1226, 1085, 10211821]    0.065934  \
119   664786  [10023618, 11960, 14412, 11352, 11282, 11960, ...    0.175824   
56    636576  [4053, 1032009, 11294, 11215, 99231, 11485, 20...    0.153846   
94    654534  [1012, 11282, 11215, 11216, 2043, 1002013, 101...    0.247253   
183   661557  [99233, 2014, 14106, 11165, 10211215, 10015157...    0.170330   
573   639599  [14106, 10511297, 10211215, 1082047, 1088, 151...    0.170330   
416   645032  [10012023, 98846, 12006, 98863, 98874, 98994, ...    0.148352   
621   656227  [1002030, 9112, 11109, 4641, 10011282, 2047, 1...    0.098901   
33    636593  [93610, 11457, 11282, 11485, 12147, 10123, 989...    0.137363   
240   63180