In [3]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
pd.set_option("Display.max_colwidth",100)

In [4]:
foods = pd.read_csv("Data/foods.csv")
foods.head()    

Unnamed: 0,Food_ID,Category,Description,Data.Alpha Carotene,Data.Beta Carotene,Data.Beta Cryptoxanthin,Data.Carbohydrate,Data.Cholesterol,Data.Choline,Data.Fiber,...,Data.Major Minerals.Phosphorus,Data.Major Minerals.Potassium,Data.Major Minerals.Sodium,Data.Major Minerals.Zinc,Data.Vitamins.Vitamin A - RAE,Data.Vitamins.Vitamin B12,Data.Vitamins.Vitamin B6,Data.Vitamins.Vitamin C,Data.Vitamins.Vitamin E,Data.Vitamins.Vitamin K
0,1,Milk,"Milk, human",0,7,0,6.89,14,16.0,0.0,...,14,51,17,0.17,61,0.05,0.011,5.0,0.08,0.3
1,2,Milk,"Milk, NFS",0,4,0,4.87,8,17.9,0.0,...,103,157,39,0.42,59,0.56,0.06,0.1,0.03,0.2
2,3,Milk,"Milk, whole",0,7,0,4.67,12,17.8,0.0,...,101,150,38,0.41,32,0.54,0.061,0.0,0.05,0.3
3,4,Milk,"Milk, low sodium, whole",0,7,0,4.46,14,16.0,0.0,...,86,253,3,0.38,29,0.36,0.034,0.9,0.08,0.3
4,5,Milk,"Milk, calcium fortified, whole",0,7,0,4.67,12,17.8,0.0,...,101,150,38,0.41,32,0.54,0.061,0.0,0.05,0.3


In [5]:
users = pd.read_csv("Data/users.csv")
users.head()

Unnamed: 0,User-ID,Name,Location,Country,Age
0,1,stockton,california,usa,18
1,2,porto,v.n.gaia,portugal,17
2,3,santa monica,california,usa,61
3,4,albacete,wisconsin,spain,26
4,5,melbourne,victoria,australia,14


In [6]:
ratings = pd.read_csv("Data/ratings.csv")
ratings.head()

Unnamed: 0,User_ID,Food_ID,Rating
0,1.0,88.0,4.0
1,1.0,46.0,3.0
2,1.0,24.0,5.0
3,1.0,25.0,4.0
4,2.0,49.0,1.0


In [7]:
foods_with_rating = ratings.merge(foods,on="Food_ID")
foods_with_rating.head()

Unnamed: 0,User_ID,Food_ID,Rating,Category,Description,Data.Alpha Carotene,Data.Beta Carotene,Data.Beta Cryptoxanthin,Data.Carbohydrate,Data.Cholesterol,...,Data.Major Minerals.Phosphorus,Data.Major Minerals.Potassium,Data.Major Minerals.Sodium,Data.Major Minerals.Zinc,Data.Vitamins.Vitamin A - RAE,Data.Vitamins.Vitamin B12,Data.Vitamins.Vitamin B6,Data.Vitamins.Vitamin C,Data.Vitamins.Vitamin E,Data.Vitamins.Vitamin K
0,1.0,88.0,4.0,Frozen yogurt bar,"Frozen yogurt bar, vanilla",0,9,0,21.6,13,...,89,156,63,0.28,49,0.07,0.04,0.7,0.09,0.3
1,1.0,46.0,3.0,Yogurt,"Yogurt, Greek, NS as to type of milk or flavor",0,6,0,3.94,10,...,137,141,34,0.6,90,0.52,0.055,0.8,0.04,0.2
2,1.0,24.0,5.0,Milk,"Milk, dry, reconstituted, whole",0,8,0,5.86,15,...,118,203,60,0.52,39,0.5,0.046,1.3,0.09,0.3
3,1.0,25.0,4.0,Milk,"Milk, dry, reconstituted, low fat (1%)",0,2,0,4.55,4,...,85,157,51,0.37,65,0.35,0.032,0.6,0.03,0.1
4,2.0,49.0,1.0,Yogurt,"Yogurt, whole milk, plain",0,5,0,4.66,13,...,95,155,46,0.59,27,0.37,0.032,0.5,0.06,0.2


In [8]:
number_of_ratangs = foods_with_rating.groupby("Description")["Rating"].count().reset_index().rename(columns={
    "Rating":"Number Of Ratings"
})
number_of_ratangs.head()

Unnamed: 0,Description,Number Of Ratings
0,"Almond milk, sweetened",1
1,"Almond milk, sweetened, chocolate",1
2,"Almond milk, unsweetened",1
3,"Almond milk, unsweetened, chocolate",2
4,"Buttermilk, fat free (skim)",3


In [9]:
final_ratings = foods_with_rating.merge(number_of_ratangs,on="Description")
final_ratings.head()

Unnamed: 0,User_ID,Food_ID,Rating,Category,Description,Data.Alpha Carotene,Data.Beta Carotene,Data.Beta Cryptoxanthin,Data.Carbohydrate,Data.Cholesterol,...,Data.Major Minerals.Potassium,Data.Major Minerals.Sodium,Data.Major Minerals.Zinc,Data.Vitamins.Vitamin A - RAE,Data.Vitamins.Vitamin B12,Data.Vitamins.Vitamin B6,Data.Vitamins.Vitamin C,Data.Vitamins.Vitamin E,Data.Vitamins.Vitamin K,Number Of Ratings
0,1.0,88.0,4.0,Frozen yogurt bar,"Frozen yogurt bar, vanilla",0,9,0,21.6,13,...,156,63,0.28,49,0.07,0.04,0.7,0.09,0.3,1
1,1.0,46.0,3.0,Yogurt,"Yogurt, Greek, NS as to type of milk or flavor",0,6,0,3.94,10,...,141,34,0.6,90,0.52,0.055,0.8,0.04,0.2,5
2,1.0,24.0,5.0,Milk,"Milk, dry, reconstituted, whole",0,8,0,5.86,15,...,203,60,0.52,39,0.5,0.046,1.3,0.09,0.3,3
3,1.0,25.0,4.0,Milk,"Milk, dry, reconstituted, low fat (1%)",0,2,0,4.55,4,...,157,51,0.37,65,0.35,0.032,0.6,0.03,0.1,4
4,2.0,49.0,1.0,Yogurt,"Yogurt, whole milk, plain",0,5,0,4.66,13,...,155,46,0.59,27,0.37,0.032,0.5,0.06,0.2,6


In [10]:
pivot_food = final_ratings.pivot_table(columns="User_ID",index="Description",values="Rating")
pivot_food.head()

User_ID,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,...,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0
Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Almond milk, sweetened",,,,,,,,,,,...,,,,,,,,,,
"Almond milk, sweetened, chocolate",,,,,,,,,,,...,,,,,,,,10.0,,
"Almond milk, unsweetened",,,,,,,,,,,...,,,,,,,,,,
"Almond milk, unsweetened, chocolate",,,,,,,,,,,...,,,,,,,,,,
"Buttermilk, fat free (skim)",,,,,,,,,,,...,,,,,,,,,,


In [11]:
pivot_food.fillna(0,inplace=True)
pivot_food.head()

User_ID,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,...,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0
Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Almond milk, sweetened",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Almond milk, sweetened, chocolate",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0
"Almond milk, unsweetened",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Almond milk, unsweetened, chocolate",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Buttermilk, fat free (skim)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
food_sparse = csr_matrix(pivot_food)

In [13]:
model = NearestNeighbors(algorithm="brute")
model.fit(food_sparse)

In [14]:
def recommemd_food(food_name):
    food_id = np.where(pivot_food.index == food_name)[0][0]
    destance,suggestion = model.kneighbors(pivot_food.iloc[food_id,:].values.reshape(1,-1),n_neighbors=16)
    for i in range(len(suggestion)):
        suggestion_food = pivot_food.index[suggestion[i]]
        
        id = 1
        for j in suggestion_food:
            if j == food_name:
                print("Recommanded to :",j,"\n")
            else:             
                print(f"{id}) {j} \n \n")
                id += 1

In [15]:
Name = "Milk, calcium fortified, whole"
recommemd_food(Name)

Recommanded to : Milk, calcium fortified, whole 

1) Yogurt, low fat milk, plain 
 

2) Hot chocolate / Cocoa, made with no sugar added dry mix and whole milk 
 

3) Yogurt, Greek, low fat milk, plain 
 

4) Milk, evaporated, fat free (skim) 
 

5) Eggnog 
 

6) Infant formula, ready-to-feed (Enfamil EnfaCare) 
 

7) Milk, NFS 
 

8) Infant formula, powder, made with tap water (Similac Expert Care Alimentum) 
 

9) Chocolate milk, made from no sugar added dry mix with fat free milk  (Nesquik) 
 

10) Frozen yogurt, soft serve, chocolate 
 

11) Almond milk, sweetened, chocolate 
 

12) Infant formula, powder, made with water, NFS (Similac Sensitive) 
 

13) Infant formula, powder, made with water, NFS (Similac Expert Care Alimentum) 
 

14) Infant formula, powder, made with water, NFS (Similac Go and Grow) 
 

15) Frozen yogurt, NFS 
 

