In [2]:
import json

import numpy as np
import pandas as pd
import pymongo
import scipy
from bson import ObjectId, json_util
from pandas.io.json import json_normalize
from pymongo import MongoClient as Connection
from scipy import sparse, spatial
from scipy.spatial.distance import cdist, pdist, squareform
from sklearn.metrics.pairwise import cosine_similarity

# scientific notation disabled form smaller numbers
pd.options.display.float_format = '{:.2f}'.format

# alles resultate anzeigen und nicht nur das letzte
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"



In [3]:
# display multiple outputs in one row
import pandas as pd
import numpy as np
from IPython.display import display, HTML

CSS = """
.output {
    flex-direction: row;
}
"""

HTML('<style>{}</style>'.format(CSS))

In [4]:
connection = Connection()
db = connection.recipe_db
input_data = db.recipes_without_reviews

data = json.loads(json_util.dumps(input_data.find()))

In [5]:
ing = pd.DataFrame(json_normalize(data, record_path='ingredients',
                             meta='id', record_prefix='ingredients_', errors='ignore'))


nutritions = pd.DataFrame(json_normalize(data, record_path='nutritions',
                            meta=['id', 'prep_time', 'rating', 'rating_count', 'ready_in_time', 'review_count']))


In [6]:
#  ------  erstellung und data cleansing - Ingredients

# schmeiss alle zutaten raus, die weniger als 5 mal verwendet werden
# setze multiinde auf 'id' und 'ingredients_id'
ingredients = ing.set_index(['id', 'ingredients_id'])

# filtere alle Zutaten samt ihrer rezepte id, die weniger gleich 5 mal vorkommen
ingredients_eqles_5_ing = ingredients.groupby(
    'ingredients_id').filter(lambda x: len(x) <= 5)

# droppe alle rezepte, die eine Zutate besitzen, die weniger gleich 5 Mal vorkommen
ingredients_filt = ingredients.drop(ingredients_eqles_5_ing.index.get_level_values('id').values, level=0)

# drop alls rows with ingredients_id == 0
ingredients_eqal_zero = ingredients_filt[ingredients_filt.index.get_level_values('ingredients_id') == 0]
ingredients_filt = ingredients_filt.drop(ingredients_eqal_zero.index.get_level_values('id').values, level=0)


In [8]:
#  ------ Erstellung und cleansing des Nutrition Dataframes

# erstelle neue liste auf basis der bereits gefilterten rezepte aus ingredients_filt
id_overlap_mask = nutritions['id'].isin(ingredients_filt.index.get_level_values('id').values)

# erstelle datenframe auf basis der overlapliste
nutritions_filt = nutritions.loc[id_overlap_mask]

nutrition_db = nutritions_filt.pivot_table(
    index=['id'],
    columns=['name'],
    values=['amount'],
).reset_index()

nutrition_db.set_index('id', inplace=True)

# remove multiindex 'amount'
nutrition_db.columns = nutrition_db.columns.droplevel(0)

# entferne alle NA
nutrition_db = nutrition_db.dropna()

# gleiche nochmals die ids der beiden dataframe nutrition und ingredients ab, da der nutrition dataframe noch NA Werte hatt
id_overlap_mask = ingredients_filt.index.get_level_values('id').isin(nutrition_db.index)
ingredients_db = ingredients_filt[id_overlap_mask]

# abgleich ob anzahl der indizes von nutrition und zutaten dataframe gleich sind


In [9]:


ingredients_db.reset_index(inplace=True)

recipe_db = pd.get_dummies(ingredients_db['ingredients_id']).groupby(
    ingredients_db['id']).apply(max)



In [10]:
new_ingredients = ingredients_db.copy()
#new_ingredients = new_ingredients.groupby("id")["ingredients_grams"].sum().reset_index()
gramms_ingredients = new_ingredients.groupby("id")["ingredients_grams"].sum().reset_index().copy()

Q1 = gramms_ingredients.quantile(0.25)
Q3 = gramms_ingredients.quantile(0.75)
IQR = Q3 - Q1

#Filter out all recipes which are outlier by their weight (gramms)
df = gramms_ingredients[~((gramms_ingredients < (Q1 - 1.5 * IQR)) |(gramms_ingredients > (Q3 + 1.5 * IQR))).any(axis=1)].copy()

#filter out recipes by weight which are not in the range 500 - 2373.59 gramms
df_start_at_fivehundret = df[df['ingredients_grams'].between(500, 2373.58225, inclusive=False)].copy()

df_start_at_fivehundret.set_index('id', inplace=True)
id_overlap_mask = nutritions['id'].isin(df_start_at_fivehundret.index.get_level_values('id').values)


# erstelle datenframe auf basis der overlapliste
nutritions_filt_gramm = nutritions.loc[id_overlap_mask]

nutrition_db2 = nutritions_filt_gramm.pivot_table(
    index=['id'],
    columns=['name'],
    values=['amount'],
).reset_index()

#create new nutrition db based on the above filtering
nutrition_db2.set_index('id', inplace=True)
nutrition_db2.columns = nutrition_db2.columns.droplevel(0)



In [11]:
# random
subset_random_top10 = nutrition_db2.sample(n=10).copy()

In [12]:
erg_2_fat_10_naiv = [20458, 231233,  38004, 228134,  35469,  23434,  24683,  12066, 30007,  72277]

In [13]:
subset_fat_id = erg_2_fat_10_naiv

In [14]:
# remove recipes which are missing in nutrition df from the ingredients tecipes dataframe
mask = recipe_db.index.isin(nutrition_db2.index.values)
recipe_db = recipe_db.loc[mask]


In [15]:
user_recipes_fat = recipe_db[recipe_db.index.isin(subset_fat_id)]
#user_recipes_normal = recipe_db[recipe_db.index.isin(subset_normal_id)]

In [16]:
#top10 most common ingredients
#16421    2125 salt
#4342     1506 garlic
#4397     1412 onion
#16406    1185 ground black pepper
#16157    1016 butter
#6307      944 olive oil
#6494      786 skinless bone less chicken breast
#2496      749 water
#16238     574 grated Parmesan cheese
#16317     538 eggs


drop_id_list = [16421, 4342, 4397, 16406, 16157, 6307, 6494, 2496, 16238, 16317]





In [17]:
#filter out ingredients which are too common and the recipes in the user profile from the recipes database
new_recipe_db = recipe_db.drop(axis=1, labels=drop_id_list)

new_recipe_db_wo_userrecipes_fat = new_recipe_db.drop(axis=0, labels=subset_fat_id)

In [18]:
#remove basic ingredients like garlic, olive oil, skinless bone less chicken breast, butter, milk, salt and pepper, onion
new_user_recipes_fat = user_recipes_fat.drop(axis=1, labels=drop_id_list)


In [19]:
sample_fat = new_user_recipes_fat.sample(n=10).copy()


In [20]:
## Jaccard - fat

result_array = cdist(sample_fat, new_recipe_db_wo_userrecipes_fat,'jaccard')
result_w_filter_fat = pd.DataFrame(result_array, columns=new_recipe_db_wo_userrecipes_fat.index.values, index=sample_fat.index.values)
# result_2

result_w_filter_10_fat = pd.DataFrame(result_w_filter_fat.sum(), columns=['jaccard_distance_sum'])
result_w_filter_10_fat = result_w_filter_10_fat.sort_values(by='jaccard_distance_sum')
result_w_filter_10_fat['jaccard_distance_sum'] = result_w_filter_10_fat['jaccard_distance_sum'].div(10)
result_w_filter_10_fat[0:10]


Unnamed: 0,jaccard_distance_sum
103503,0.88
240619,0.88
53075,0.88
23105,0.89
14622,0.89
8945,0.89
16700,0.89
26656,0.89
14587,0.89
216902,0.9


In [21]:
result_w_filter_10_fat[0:10].index.to_numpy()

array([103503, 240619,  53075,  23105,  14622,   8945,  16700,  26656,
        14587, 216902])

In [22]:
####### results

In [23]:
# userrecipes fat
sample_fat.index.to_numpy()

array([ 23434,  35469,  12066,  38004,  20458,  72277, 231233, 228134,
        30007,  24683])

In [24]:
###### jaccard fat top 10 naiv
result_w_filter_10_fat[0:10].index.to_numpy()

array([103503, 240619,  53075,  23105,  14622,   8945,  16700,  26656,
        14587, 216902])

In [25]:
##### random top 10
subset_random_top10.index.to_numpy()

array([ 14629, 175666, 222850,  15030, 229764,  72715, 241165,  54611,
        24009,  85007])

In [65]:
nutrition_db.describe()

name,Calcium,Calories,Calories from Fat,Carbohydrates,Cholesterol,Dietary Fiber,Fat,Folate,Iron,Magnesium,Niacin Equivalents,Potassium,Protein,Saturated Fat,Sodium,Sugars,Thiamin,Vitamin A - IU,Vitamin B6,Vitamin C
count,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0
mean,146.25,436.38,205.35,29.33,96.54,2.63,22.82,60.76,3.17,51.62,12.3,537.35,27.76,8.69,846.6,6.73,0.3,1221.15,0.51,16.72
std,159.44,208.87,142.06,23.54,64.78,2.64,15.78,60.62,2.61,32.33,7.29,325.03,13.36,7.23,736.41,9.18,0.3,2557.36,0.34,26.92
min,0.93,13.63,0.93,0.0,0.0,0.0,0.1,0.0,0.07,0.86,0.04,4.66,0.45,0.02,2.68,0.0,0.0,0.0,0.0,0.0
25%,41.7,293.54,110.93,10.18,58.52,0.72,12.33,15.83,1.76,31.96,7.14,330.89,19.11,3.47,403.29,1.7,0.1,226.36,0.27,2.04
50%,82.98,402.2,179.09,24.77,83.22,1.91,19.9,39.22,2.73,45.61,10.96,474.24,26.44,6.95,719.32,3.83,0.2,569.15,0.44,6.49
75%,191.85,543.93,270.07,43.31,126.23,3.67,30.01,88.14,3.95,63.72,16.49,660.27,34.3,11.96,1084.21,7.82,0.39,1118.61,0.69,19.57
max,1264.33,4709.2,3455.29,236.72,979.78,27.83,383.92,519.54,55.89,590.39,109.02,6063.16,273.22,103.44,22099.37,136.51,3.26,40386.87,5.98,507.28


In [66]:
w_outlier_nut = nutrition_db[['Calories', 'Fat', 'Protein', 'Carbohydrates', 'Dietary Fiber', 'Iron', 'Calcium', 'Magnesium' ]]

In [67]:
nutrition_db2.describe()

name,Calcium,Calories,Calories from Fat,Carbohydrates,Cholesterol,Dietary Fiber,Fat,Folate,Iron,Magnesium,Niacin Equivalents,Potassium,Protein,Saturated Fat,Sodium,Sugars,Thiamin,Vitamin A - IU,Vitamin B6,Vitamin C
count,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0
mean,147.62,437.41,206.09,29.14,97.13,2.6,22.9,60.51,3.15,51.42,12.44,532.02,28.05,8.75,851.17,6.73,0.3,1168.79,0.51,16.29
std,159.99,183.08,125.84,22.82,60.48,2.51,13.98,59.88,2.44,29.11,6.89,267.47,12.01,6.8,643.54,8.81,0.3,2321.84,0.31,24.36
min,2.25,51.34,1.32,0.0,0.0,0.0,0.15,0.0,0.2,1.0,0.04,21.36,0.75,0.02,5.2,0.0,0.0,0.0,0.0,0.0
25%,41.9,302.98,116.53,10.23,60.39,0.75,12.95,15.88,1.81,33.23,7.4,345.46,19.98,3.63,418.68,1.77,0.11,236.94,0.28,2.15
50%,83.17,407.19,182.76,24.79,85.16,1.95,20.31,38.7,2.77,45.93,11.16,480.73,26.87,7.17,734.11,3.91,0.2,581.03,0.45,6.66
75%,193.6,541.44,271.86,43.26,126.76,3.67,30.21,88.07,3.93,63.26,16.56,654.21,34.32,12.14,1092.86,7.84,0.4,1113.54,0.68,19.57
max,1264.33,1828.19,989.1,236.72,708.85,27.83,109.9,519.54,55.89,590.39,47.58,1931.49,99.73,59.31,7322.29,104.82,2.35,38664.7,3.84,270.41


In [68]:
wo_outlier_nut = nutrition_db2[['Calories', 'Fat', 'Protein', 'Carbohydrates', 'Dietary Fiber', 'Iron', 'Calcium', 'Magnesium' ]]

In [69]:
wo_outlier_nut.describe()

name,Calories,Fat,Protein,Carbohydrates,Dietary Fiber,Iron,Calcium,Magnesium
count,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0,3770.0
mean,437.41,22.9,28.05,29.14,2.6,3.15,147.62,51.42
std,183.08,13.98,12.01,22.82,2.51,2.44,159.99,29.11
min,51.34,0.15,0.75,0.0,0.0,0.2,2.25,1.0
25%,302.98,12.95,19.98,10.23,0.75,1.81,41.9,33.23
50%,407.19,20.31,26.87,24.79,1.95,2.77,83.17,45.93
75%,541.44,30.21,34.32,43.26,3.67,3.93,193.6,63.26
max,1828.19,109.9,99.73,236.72,27.83,55.89,1264.33,590.39


In [94]:
w_outlier_nut.describe()

name,Calories,Fat,Protein,Carbohydrates,Dietary Fiber,Iron,Calcium,Magnesium
count,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0,4062.0
mean,436.38,22.82,27.76,29.33,2.63,3.17,146.25,51.62
std,208.87,15.78,13.36,23.54,2.64,2.61,159.44,32.33
min,13.63,0.1,0.45,0.0,0.0,0.07,0.93,0.86
25%,293.54,12.33,19.11,10.18,0.72,1.76,41.7,31.96
50%,402.2,19.9,26.44,24.77,1.91,2.73,82.98,45.61
75%,543.93,30.01,34.3,43.31,3.67,3.95,191.85,63.72
max,4709.2,383.92,273.22,236.72,27.83,55.89,1264.33,590.39


In [70]:
#https://www.ncbi.nlm.nih.gov/books/NBK56068/table/summarytables.t4/?report=objectonly
#https://www.ncbi.nlm.nih.gov/books/NBK56068/table/summarytables.t1/?report=objectonly
#https://www.ncbi.nlm.nih.gov/books/NBK56068/table/summarytables.t5/?report=objectonly
#männer alter 31 - 50 pro tag geteil durch 3 mahlzeiten

recommenden_nut_low_fat = pd.DataFrame(data=None, columns=nutrition_db2.columns, index=None)


recommenden_nut_low_fat.loc['index'] = [
    # "Calcium":[1],
    266.67,
    # "Calories":[1],
    716,
    # "Calories from Fat":[1],
    0,
    # "Carbohydrates":[1],
    103.7,
    # "Cholesterol":[1],
    0,
    # "Dietary Fiber	":[1],
    12.67,
    # "Fat":[1],16.67,
    10,
    # "Folate":[1],
    133.3, 
    # "Iron":[1],
    2,
    # "Magnesium":[1],
    116.67, 
    # "Niacin Equivalents":[1],
    4,
    # "Potassium":[1],
    1,
    # "Protein":[1],
    51.75,
    # "Saturated Fat":[1],
    1,
    # "Sodium":[1],
    1,
    # "Sugars":[1],
    1,
    # "Thiamin":[1],
    0.3, 
    # "Vitamin A - IU":[1],
    208, 
    # "Vitamin B6":[1],
    0.36, 
    # "Vitamin C":[1]}
    25] 

In [71]:
no_weight = [
    # "Calcium":[1],
    1,
    # "Calories":[1],
    1,
    # "Calories from Fat":[1],
    1,
    # "Carbohydrates":[1],
    1,
    # "Cholesterol":[1],
    1,
    # "Dietary Fiber	":[1],
    1,
    # "Fat":[1],
    1,
    # "Folate":[1],
    1,
    # "Iron":[1],
    1,
    # "Magnesium":[1],
    1,
    # "Niacin Equivalents":[1],
    1,
    # "Potassium":[1],
    1,
    # "Protein":[1],
    1,
    # "Saturated Fat":[1],
    1,
    # "Sodium":[1],
    1,
    # "Sugars":[1],
    1,
    # "Thiamin":[1],
    1,
    # "Vitamin A - IU":[1],
    1,
    # "Vitamin B6":[1],
    1,
    # "Vitamin C":[1]}
    1]

weighted_fat = [
    # "Calcium":[1],
    1,
    # "Calories":[1],
    10,
    # "Calories from Fat":[1],
    1,
    # "Carbohydrates":[1],
    10,
    # "Cholesterol":[1],
    1,
    # "Dietary Fiber	":[1],
    1,
    # "Fat":[1],
    10000,
    # "Folate":[1],
    1,
    # "Iron":[1],
    1,
    # "Magnesium":[1],
    1,
    # "Niacin Equivalents":[1],
    1,
    # "Potassium":[1],
    1,
    # "Protein":[1],
    10,
    # "Saturated Fat":[1],
    1,
    # "Sodium":[1],
    1,
    # "Sugars":[1],
    1,
    # "Thiamin":[1],
    1,
    # "Vitamin A - IU":[1],
    1,
    # "Vitamin B6":[1],
    1,
    # "Vitamin C":[1]}
    1]



In [72]:
result_w_filter_fat_100 = result_w_filter_10_fat[0:500].copy()
result_w_filter_fat_100['pos'] = np.arange(len(result_w_filter_fat_100))
result_w_filter_fat_100 = result_w_filter_fat_100.reset_index()
result_w_filter_fat_100 = result_w_filter_fat_100.set_index('index')

In [73]:
nutrition_fat_naiv = nutrition_db2[nutrition_db2.index.isin(result_w_filter_fat_100.index.to_numpy())]

In [74]:
# euklidische distanz für nährwerte

result_array = cdist(recommenden_nut_low_fat, nutrition_fat_naiv, 'minkowski', p=2, w=weighted_fat)
euclid_distance = pd.DataFrame(
    result_array, columns=nutrition_fat_naiv.index.values, index=recommenden_nut_low_fat.index.values)

euclid_distance_sum_fat_naiv = pd.DataFrame(euclid_distance.sum(), columns=['euclid_distance_sum'])
euclid_distance_sum_fat_naiv = euclid_distance_sum_fat_naiv.sort_values(by='euclid_distance_sum')
euclid_distance_sum_fat_naiv[0:10]



Unnamed: 0,euclid_distance_sum
9043,1057.73
18440,1272.45
61824,1279.47
21699,1362.52
20004,1371.5
159456,1372.93
26640,1379.87
11961,1380.25
62212,1385.98
48477,1391.2


In [75]:
jaccard_euclid_joined_fat_naiv = pd.merge(euclid_distance_sum_fat_naiv, result_w_filter_fat_100, left_index=True, right_index=True)

In [76]:
jaccard_euclid_joined_fat_naiv[0:10]

Unnamed: 0,euclid_distance_sum,jaccard_distance_sum,pos
9043,1057.73,0.95,360
18440,1272.45,0.93,134
61824,1279.47,0.94,251
21699,1362.52,0.93,163
20004,1371.5,0.95,327
159456,1372.93,0.96,475
26640,1379.87,0.95,375
11961,1380.25,0.96,490
62212,1385.98,0.92,51
48477,1391.2,0.92,60


In [77]:
nutrition_top10_fat_naiv = nutrition_db2[nutrition_db2.index.isin(jaccard_euclid_joined_fat_naiv[0:10].index.to_numpy())]

In [78]:
nutrition_top10_fat_naiv = nutrition_top10_fat_naiv[['Calories', 'Fat', 'Protein', 'Carbohydrates', 'Dietary Fiber', 'Iron', 'Calcium', 'Magnesium' ]]
nutrition_top10_fat_naiv.describe()

name,Calories,Fat,Protein,Carbohydrates,Dietary Fiber,Iron,Calcium,Magnesium
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,441.38,13.69,24.04,54.71,5.42,4.39,160.9,61.28
std,64.21,3.43,7.34,12.67,5.58,1.63,86.59,31.1
min,316.96,7.94,11.78,39.48,1.03,2.6,14.76,15.32
25%,414.03,10.88,21.37,46.64,2.7,3.07,99.26,43.03
50%,458.06,14.59,23.09,50.17,2.9,4.14,172.27,58.63
75%,467.09,16.1,26.77,63.11,4.1,5.26,207.37,71.01
max,546.36,17.66,37.92,80.65,17.09,7.72,282.49,132.42


In [79]:
nutrition_top10_fat_naiv.index.to_numpy()

array([  9043,  11961,  18440,  20004,  21699,  26640,  48477,  61824,
        62212, 159456])

In [80]:
nutrition_fat_naiv_copy = nutrition_fat_naiv[['Calories', 'Fat', 'Protein', 'Carbohydrates', 'Dietary Fiber', 'Iron', 'Calcium', 'Magnesium' ]]

In [81]:
nutrition_fat_naiv_copy.describe()

name,Calories,Fat,Protein,Carbohydrates,Dietary Fiber,Iron,Calcium,Magnesium
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,507.62,28.1,27.86,35.73,3.24,3.37,293.79,53.79
std,175.36,12.53,10.74,18.85,2.63,1.52,184.09,23.85
min,92.89,1.81,3.68,1.15,0.0,0.49,14.76,8.57
25%,375.38,18.7,20.01,23.46,1.46,2.26,153.91,35.69
50%,481.01,26.15,26.77,33.23,2.66,3.12,268.4,50.88
75%,612.43,35.06,33.57,46.82,4.19,4.26,396.7,66.11
max,1203.63,79.29,74.17,127.42,19.62,10.34,1264.33,151.16


In [82]:
subset_fat_asd = nutrition_fat_naiv[(nutrition_fat_naiv['Fat'] >= 10) & (nutrition_fat_naiv['Fat'] <= 15)].sample(n=1).copy()

In [83]:
nutrition_fat_naiv[(nutrition_fat_naiv['Fat'] >= 1) & (nutrition_fat_naiv['Fat'] <= 20)].shape

(153, 20)

In [84]:
#nutrition_random_10 = nutrition_db2[nutrition_db2.index.isin(new_recipe_db_wo_userrecipes_fat.sample(n=10, random_state=0).index.to_numpy())]
nutrition_random_10 = nutrition_db2[nutrition_db2.index.isin(new_recipe_db_wo_userrecipes_fat.sample(n=10).index.to_numpy())]

In [85]:
nutrition_random_10 = nutrition_random_10[['Calories', 'Fat', 'Protein', 'Carbohydrates', 'Dietary Fiber', 'Iron', 'Calcium', 'Magnesium' ]]
nutrition_random_10.describe()

name,Calories,Fat,Protein,Carbohydrates,Dietary Fiber,Iron,Calcium,Magnesium
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,478.81,25.55,27.72,31.72,3.16,3.22,189.52,53.16
std,235.48,17.03,12.99,19.73,2.96,2.14,229.37,31.16
min,170.64,6.36,4.11,10.65,0.09,0.66,19.58,15.73
25%,304.05,11.35,18.69,14.63,1.41,1.93,44.25,32.93
50%,461.29,22.57,30.61,26.3,1.94,2.16,93.22,43.0
75%,667.02,32.89,35.3,44.97,4.62,4.39,289.92,66.28
max,878.44,58.37,44.78,64.0,9.11,6.74,736.59,110.71


In [86]:
nutrition_random_10.index.to_numpy()

array([ 12782,  18349,  21291,  24162,  26508,  52464,  74708,  77927,
       111840, 163162])

In [87]:
nutrition_user_recipes_fat = nutrition_db2[nutrition_db2.index.isin(user_recipes_fat.index.to_numpy())]

In [88]:
nutrition_user_recipes_fat = nutrition_user_recipes_fat[['Calories', 'Fat', 'Protein', 'Carbohydrates', 'Dietary Fiber', 'Iron', 'Calcium', 'Magnesium' ]]

In [89]:
nutrition_user_recipes_fat.describe()

name,Calories,Fat,Protein,Carbohydrates,Dietary Fiber,Iron,Calcium,Magnesium
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,601.26,37.14,32.71,34.18,3.45,3.54,322.9,61.13
std,94.67,1.3,12.12,16.57,1.97,1.0,186.45,30.91
min,481.47,35.04,16.83,12.24,0.64,1.36,65.01,29.87
25%,514.65,36.38,24.73,18.64,2.3,3.03,204.59,36.7
50%,606.23,36.65,30.65,33.28,2.74,3.78,322.06,54.9
75%,681.18,38.01,39.36,50.31,4.62,4.19,401.17,67.54
max,728.84,39.27,52.23,55.31,6.82,4.58,599.82,123.06


In [90]:
nutrition_jaccard_fat = nutrition_db2[nutrition_db2.index.isin(result_w_filter_10_fat[0:10].index.to_numpy())]

In [91]:
nutrition_jaccard_fat = nutrition_jaccard_fat[['Calories', 'Fat', 'Protein', 'Carbohydrates', 'Dietary Fiber', 'Iron', 'Calcium', 'Magnesium' ]]

In [92]:
nutrition_jaccard_fat.describe()

name,Calories,Fat,Protein,Carbohydrates,Dietary Fiber,Iron,Calcium,Magnesium
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,570.74,35.18,31.38,32.84,3.73,3.44,389.92,62.67
std,216.69,17.37,10.31,15.2,2.29,1.51,210.14,20.68
min,286.68,12.44,19.55,6.76,0.42,1.34,163.86,31.31
25%,395.82,20.95,26.17,28.43,2.31,2.53,262.84,49.87
50%,553.77,34.44,29.92,30.97,3.61,3.11,344.69,57.99
75%,697.41,45.21,35.07,38.64,5.05,4.43,440.99,76.13
max,978.25,69.1,55.09,58.86,8.2,5.84,918.7,99.35
