</title> Food Recommendation <title>

In [344]:
import json
import sys
import pandas as pd
import numpy as np
from IPython.display import display
from itertools import combinations
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.metrics.pairwise import euclidean_distances

from scipy.stats import skewnorm

import matplotlib.pyplot as plt

Extract required data from JSON

In [345]:
with open('food_datasets/test.json', 'r') as file:
    data = json.load(file)

#convert JSON data to DataFrame
food_list = []
for entry in data['all']:
    for menu_item in entry['menu']:
        for food_item in menu_item['items']:
            if 'calories' in food_item['nutrition'] and food_item['nutrition']['calories'] != None:
                food = {
                    'type': menu_item['category'],
                    'name': food_item['name'],
                    'calories': float(food_item['nutrition']['calories']),
                }
                food_list.append(food)


# with open('food_datasets/food_samples.json', 'r') as file:
#     data = json.load(file)
# food_list = []
# for category in data:
#     for food_item in category['items']:
#         if 'calories' in food_item['nutrition'] and food_item['nutrition']['calories'] != None:
#             food = {
#                 'type': category['category'],
#                 'name': food_item['name'],
#                 'calories': float(food_item['nutrition']['calories']),
#             }
#             food_list.append(food)


food_df = pd.DataFrame(food_list)

Display the Food DataFrame

In [346]:
display(food_df)

Unnamed: 0,type,name,calories
0,Entr脙漏es,Beef Harissa Rice Bowl,260.0
1,Entr脙漏es,Grilled BBQ Pork Chops,180.0
2,Sides,Baked Potato Wedges,90.0
3,Pizza,Classic Cheese Pizza,340.0
4,Pizza,"Onion, Jalapeno & Sausage Pizza",350.0
5,Pizza,Pepperoni Pizza,370.0
6,Entr脙漏es,Chicken Teriyaki,130.0
7,Sides,Spicy Vegetable Lo Mein,180.0
8,Sides,Brown Rice,110.0
9,Sides,Jasmine Rice,130.0


In [347]:
le = LabelEncoder()
food_df['type'] = le.fit_transform(food_df['type'])

In [348]:
display(food_df)

Unnamed: 0,type,name,calories
0,3,Beef Harissa Rice Bowl,260.0
1,3,Grilled BBQ Pork Chops,180.0
2,9,Baked Potato Wedges,90.0
3,6,Classic Cheese Pizza,340.0
4,6,"Onion, Jalapeno & Sausage Pizza",350.0
5,6,Pepperoni Pizza,370.0
6,3,Chicken Teriyaki,130.0
7,9,Spicy Vegetable Lo Mein,180.0
8,9,Brown Rice,110.0
9,9,Jasmine Rice,130.0


Since we are recommending based on the user's daily calories intake. Deploying distance method is a good way for predicting the food. In this case, we will use Lp norm where p=2

In [349]:
def find_foods(input_calories, c=1, debug=False):
    """
    Find k top food that have closest calories to the input calories
    : k - number of food in ranked list
    : c - size of combinations of food items
    """
    # random split of input calories to get different combinations of food
    def skewed_random(a, b, skewness=2, size=None):
        # usually one food of a meal should have larger weight (e.g. primary dish)
        # therefore we need to skew the splits
        loc = (a + b) / 2
        scale = (b - a) / 6
        a_param = skewness
        x = skewnorm.rvs(a_param, loc, scale, size)
        return x

    # generate c random calories splits (skewed)
    cal_seg = []
    total_cal = input_calories
    for _ in range(c):
        cal = skewed_random(0, total_cal, skewness=-2)
        cal_seg.append(cal)
        total_cal -= cal

    if debug == True:
        print(cal_seg)

    # find c foods closest to the c splits
    cdist = [(0, None)] * c
    for idx, cal in enumerate(cal_seg):
        min_cal = sys.float_info.max
        for _, row in food_df[['name', 'calories']].iterrows():
            dist = np.abs(cal - row.values[1])
            
            if dist < min_cal:
                cdist[idx] = (dist, row.values[0])
                min_cal = dist

    return cdist

Let's say we want to recommend three foods for the user's lunch

In [350]:
lunch_cal = 2200 * 0.4

foods = find_foods(input_calories=lunch_cal, c=3, debug=True)

for dist, food_name in foods:
    print(f"\033[1mFood Name\033[0m: {food_name} \t \033[1mDist\033[0m: {dist}")


[89.4013616891857, 344.59499543108745, 151.88083959006087]
[1mFood Name[0m: Baked Potato Wedges 	 [1mDist[0m: 0.5986383108142945
[1mFood Name[0m: Classic Cheese Pizza 	 [1mDist[0m: 4.594995431087455
[1mFood Name[0m: Grilled Chicken Breast 	 [1mDist[0m: 8.119160409939127
