In [266]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
import warnings
warnings.filterwarnings('ignore', module='sklearn')

In [267]:
df = pd.read_csv('data/Nutritions_US_Adjusted.csv', encoding='latin1')

In [270]:
categorical_columns = df.select_dtypes(include=['object', 'category']).columns
numerical_columns = df.select_dtypes(include=['int64', 'float64']).columns
print('categorical_columns')
print(categorical_columns )
print("=" * 20)
print('numerical_columns')
print(numerical_columns)


categorical_columns
Index(['Name', 'GmWtDesc1', 'GmWtDesc2'], dtype='object')
numerical_columns
Index(['ID', 'Water', 'Kcal', 'Protein', 'Lipid', 'Ash', 'Carbohydrates',
       'Fiber', 'Sugar', 'Calcium', 'Iron', 'Magnesium', 'Phosphorus',
       'Potassium', 'Sodium', 'Zinc', 'Copper', 'Manganese', 'Selenium',
       'Vit_C', 'Thiamin', 'Riboflavin', 'Niacin', 'PantoAcid', 'VitB6',
       'Folate', 'FolicAcid', 'FoodFolate', 'FolateDFE', 'CholineTot',
       'VitB12', 'VitAIU', 'VitARAE', 'Retinol', 'AlphaCarot', 'BetaCarot',
       'BetaCrypt', 'Lycopene', 'LutZea', 'VitE', 'VitDg', 'VitDIU', 'VitK',
       'FASat', 'FAMono', 'FAPoly', 'Cholestrl', 'GmWt1', 'GmWt2'],
      dtype='object')


In [271]:
class Meal:
    def __init__(self, df):
        self.df = df
        self.df_norm = df.copy()

        #dropping categorical columns
        self.df_norm = self.df_norm.drop(df.select_dtypes(include=['object', 'category']).columns,axis=1)
        
        # Normalization
        self.df_scalers = {}
        for c in self.df_norm:
            scaler = MinMaxScaler()
            scaler.fit(self.df_norm[[c]])
            self.df_scalers[c] = scaler
            self.df_norm[c]=scaler.transform(self.df_norm[[c]])


        self.display_columns = df.columns
        self.number_suggestions = 5


    def set_display_columns(display_columns):
        return display_columns

    def select_columns(self, dict_point_columns, df_target):
        temp_df = df_target[dict_point_columns].copy()
        temp_df = temp_df.dropna()
        return temp_df

    def scale_point_dict(self, dict_point, df_point_columns):
        point = {column:self.df_scalers[column].transform([[value]])[0][0] for column, value in dict_point.items()}
        return [point[column] for column in df_point_columns]

    def get_nearest_meal(self,dict_point, k=None,df_target=None):
        dict_point_columns = dict_point.keys()

        if k is None:
            k = self.number_suggestions
        
        if df_target is None:
            df_target = self.df_norm

        temp_df = self.select_columns(dict_point_columns, df_target)
        point = self.scale_point_dict(dict_point, temp_df.columns)
    
        model = NearestNeighbors(n_neighbors=k, algorithm='auto').fit(temp_df)

        distances, indices = model.kneighbors(np.array(point).reshape(1,-1))
        indices = temp_df.iloc[indices[0]].index
        nearest_points = df.iloc[indices]

        print(f"The nearest distances are:")
        display(distances)
        print(f"The nearest points to {point} are:")

        display(nearest_points[self.display_columns])

        return nearest_points[self.display_columns]
        

    
    def suggest_meals(self, ingredients, strict_search=False):
        ingredients = [i.lower() for i in ingredients]

        if strict_search:
            masks = [self.df["Name"].str.contains(word, case=False, na=False) for word in ingredients]
            df_target = self.df.copy()
            mask = masks[0]
            for m in masks[1:]:
                mask *= m
            
            df_target = df_target[mask]

        else:
            regex_pattern = '|'.join(ingredients)
            mask = self.df["Name"].str.contains(regex_pattern, na=False, case=False)
            df_target = self.df[mask]


        display(df_target)
        return df_target
    
    def suggest_meals_with_focus(self, ingredients, point, k, strict_search=False):
        if strict_search:
            masks = [self.df["Name"].str.contains(word, case=False, na=False) for word in ingredients]
            df_target = self.df.copy()
            mask = masks[0]
            for m in masks[1:]:
                mask *= m
            
            df_target = df_target[mask]
        else:
            regex_pattern = '|'.join(ingredients)
            mask = self.df["Name"].str.contains(regex_pattern, na=False, case=False)
            df_target = self.df_norm[mask].copy()

        if k > len(df_target.index):
            k = len(df_target.index)

        return self.get_nearest_meal(point, k, df_target)



In [272]:
engine = Meal(df)
rs1 = engine.get_nearest_meal({'Lipid':16,'Protein':26, "Calcium":15}, k=4)
rs2 = engine.suggest_meals(['goat','CHEESE'],strict_search=True)
rs3 = engine.suggest_meals_with_focus(['Cheese'],{'Protein':30,'Kcal':500, "Sugar":1}, k=4, strict_search=True)

The nearest distances are:


array([[0.00222607, 0.00349893, 0.00356165, 0.00412741]])

The nearest points to [0.16, 0.2943840579710145, 0.0020369364475828354] are:


Unnamed: 0,ID,Name,Water,Kcal,Protein,Lipid,Ash,Carbohydrates,Fiber,Sugar,...,VitDIU,VitK,FASat,FAMono,FAPoly,Cholestrl,GmWt1,GmWtDesc1,GmWt2,GmWtDesc2
859,5037,"CHICKEN,BROILERS OR FRYERS,DK MEAT,MEAT&SKN,CK...",58.63,253,25.97,15.78,0.92,0.0,0.0,0.0,...,0,0.0,4.37,6.19,3.49,91,101.0,"1 unit, (yield from 1 lb ready-to-cook chicken)",167.0,".5 chicken, bone removed"
7309,23229,"BEEF,RIB EYE STEK,BNLES,LIP OFF,LN & FAT,0 FAT...",57.42,248,26.29,15.9,1.05,0.0,0.0,0.0,...,5,1.6,7.226,7.786,0.81,77,85.0,3 oz,266.0,1 steak
7573,23502,"USDA COMMODITY,BF,GROUND BULK/COARSE GROUND,FR...",56.49,259,26.06,16.34,0.95,0.0,0.0,0.0,...,0,0.0,5.744,7.504,0.62,89,28.35,1 oz,0.0,0
2642,10195,"PORK,FRSH,LOIN,CNTR RIB (CHOPS),BNLESS,LN&FAT,...",58.15,255,26.29,15.79,1.2,0.0,0.0,0.0,...,0,0.0,6.12,7.21,1.32,73,85.0,3 oz,81.0,"1 chop, excluding refuse (yield from 1 raw cho..."


Unnamed: 0,ID,Name,Water,Kcal,Protein,Lipid,Ash,Carbohydrates,Fiber,Sugar,...,VitDIU,VitK,FASat,FAMono,FAPoly,Cholestrl,GmWt1,GmWtDesc1,GmWt2,GmWtDesc2
138,1156,"CHEESE,GOAT,HARD TYPE",29.01,452,30.52,35.59,3.72,2.17,0.0,2.17,...,26,3.0,24.609,8.117,0.845,105,28.35,1 oz,0.0,0
139,1157,"CHEESE,GOAT,SEMISOFT TYPE",45.52,364,21.58,29.84,2.94,0.12,0.0,0.12,...,22,2.5,20.639,6.808,0.709,79,28.35,1 oz,0.0,0
140,1159,"CHEESE,GOAT,SOFT TYPE",60.75,264,18.52,21.08,1.58,0.0,0.0,0.0,...,15,1.8,14.575,4.807,0.501,46,28.35,1 oz,0.0,0


The nearest distances are:


array([[67.3460345 , 72.16561998, 72.50544123, 72.50635914]])

The nearest points to [0.3396739130434783, 0.5543237250554324, 0.01002004008016032] are:


Unnamed: 0,ID,Name,Water,Kcal,Protein,Lipid,Ash,Carbohydrates,Fiber,Sugar,...,VitDIU,VitK,FASat,FAMono,FAPoly,Cholestrl,GmWt1,GmWtDesc1,GmWt2,GmWtDesc2
8729,43396,"CHEESE,COTTAGE,LOWFAT,1% MILKFAT,W/VEG",83.5,67,10.9,1.0,1.6,3.0,0.0,3.0,...,0,2.6,0.619,0.282,0.039,3,113.0,4 oz,226.0,1 cup
13,1014,"CHEESE,COTTAGE,NONFAT,UNCRMD,DRY,LRG OR SML CURD",81.01,72,10.34,0.29,1.71,6.66,0.0,1.85,...,0,0.0,0.169,0.079,0.003,7,145.0,"1 cup, (not packed)",113.0,4 oz
15,1016,"CHEESE,COTTAGE,LOWFAT,1% MILKFAT",82.48,72,12.39,1.02,1.39,2.72,0.0,2.72,...,0,0.1,0.645,0.291,0.031,4,113.0,4 oz,226.0,"1 cup, (not packed)"
8712,43352,"CHEESE,COTTAGE,LOWFAT,1% MILKFAT,NO NA",83.5,72,12.4,1.0,0.4,2.7,0.0,2.7,...,0,0.1,0.632,0.284,0.031,4,113.0,4 oz,226.0,1 cup
