In [25]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
import warnings
warnings.filterwarnings('ignore', module='sklearn')

In [26]:
df = pd.read_csv('data/Nutritions_US_Adjusted.csv', encoding='latin1')

In [34]:
class Meal:
    def __init__(self, df):
        self.df = df
        self.df_norm = df.copy()

        #dropping categorical columns
        self.df_norm = self.df_norm.drop(df.select_dtypes(include=['object', 'category']).columns,axis=1)
        
        # Normalization
        self.df_scalers = {}
        for c in self.df_norm:
            scaler = MinMaxScaler()
            scaler.fit(self.df_norm[[c]])
            self.df_scalers[c] = scaler
            self.df_norm[c]=scaler.transform(self.df_norm[[c]])


        self.display_columns = df.columns.drop(['Ash','Folate', 'FoodFolate','FolateDFE','VitARAE', 'VitDg'])
        self.number_suggestions = 5


    def set_display_columns(display_columns):
        return display_columns

    def select_columns(self, dict_point_columns, df_target):
        temp_df = df_target[dict_point_columns].copy()
        temp_df = temp_df.dropna()
        return temp_df

    def scale_point_dict(self, dict_point, df_point_columns):
        point = {column:self.df_scalers[column].transform([[value]])[0][0] for column, value in dict_point.items()}
        return [point[column] for column in df_point_columns]

    def get_nearest_meal(self,dict_point, k=None,df_target=None):
        dict_point_columns = dict_point.keys()

        if k is None:
            k = self.number_suggestions
        
        if df_target is None:
            df_target = self.df_norm

        temp_df = self.select_columns(dict_point_columns, df_target)
        point = self.scale_point_dict(dict_point, temp_df.columns)
    
        model = NearestNeighbors(n_neighbors=k, algorithm='auto').fit(temp_df)

        distances, indices = model.kneighbors(np.array(point).reshape(1,-1))
        indices = temp_df.iloc[indices[0]].index
        nearest_points = df.iloc[indices]

        print(f"The nearest distances are:")
        display(distances)
        print(f"The nearest points to {point} are:")

        display(nearest_points[self.display_columns])

        return nearest_points[self.display_columns]
        

    
    def suggest_meals(self, ingredients, strict_search=False):
        ingredients = [i.lower() for i in ingredients]

        if strict_search:
            masks = [self.df["Name"].str.contains(word, case=False, na=False) for word in ingredients]
            df_target = self.df.copy()
            mask = masks[0]
            for m in masks[1:]:
                mask *= m
            
            df_target = df_target[mask]

        else:
            regex_pattern = '|'.join(ingredients)
            mask = self.df["Name"].str.contains(regex_pattern, na=False, case=False)
            df_target = self.df[mask]


        display(df_target)
        return df_target
    
    def suggest_meals_with_focus(self, ingredients, point, k, strict_search=False):
        if strict_search:
            masks = [self.df["Name"].str.contains(word, case=False, na=False) for word in ingredients]
            df_target = self.df.copy()
            mask = masks[0]
            for m in masks[1:]:
                mask *= m
            
            df_target = self.df_norm[mask]
        else:
            regex_pattern = '|'.join(ingredients)
            mask = self.df["Name"].str.contains(regex_pattern, na=False, case=False)
            df_target = self.df_norm[mask].copy()

        if k > len(df_target.index):
            k = len(df_target.index)

        return self.get_nearest_meal(point, k, df_target)



In [35]:
engine = Meal(df)
rs1 = engine.suggest_meals(['goat','CHEESE'], strict_search=True)

Unnamed: 0,ID,Name,Water,Kcal,Protein,Lipid,Ash,Carbohydrates,Fiber,Sugar,...,VitDIU,VitK,FASat,FAMono,FAPoly,Cholestrl,GmWt1,GmWtDesc1,GmWt2,GmWtDesc2
138,1156,"CHEESE,GOAT,HARD TYPE",29.01,452,30.52,35.59,3.72,2.17,0.0,2.17,...,26,3.0,24.609,8.117,0.845,105,28.35,1 oz,0.0,0
139,1157,"CHEESE,GOAT,SEMISOFT TYPE",45.52,364,21.58,29.84,2.94,0.12,0.0,0.12,...,22,2.5,20.639,6.808,0.709,79,28.35,1 oz,0.0,0
140,1159,"CHEESE,GOAT,SOFT TYPE",60.75,264,18.52,21.08,1.58,0.0,0.0,0.0,...,15,1.8,14.575,4.807,0.501,46,28.35,1 oz,0.0,0


In [36]:
rs2 = engine.get_nearest_meal({'Lipid':16,'Protein':26, "Calcium":15}, k=4)

The nearest distances are:


array([[0.00222607, 0.00349893, 0.00356165, 0.00412741]])

The nearest points to [0.16, 0.2943840579710145, 0.0020369364475828354] are:


Unnamed: 0,ID,Name,Water,Kcal,Protein,Lipid,Carbohydrates,Fiber,Sugar,Calcium,...,VitDIU,VitK,FASat,FAMono,FAPoly,Cholestrl,GmWt1,GmWtDesc1,GmWt2,GmWtDesc2
859,5037,"CHICKEN,BROILERS OR FRYERS,DK MEAT,MEAT&SKN,CK...",58.63,253,25.97,15.78,0.0,0.0,0.0,15,...,0,0.0,4.37,6.19,3.49,91,101.0,"1 unit, (yield from 1 lb ready-to-cook chicken)",167.0,".5 chicken, bone removed"
7309,23229,"BEEF,RIB EYE STEK,BNLES,LIP OFF,LN & FAT,0 FAT...",57.42,248,26.29,15.9,0.0,0.0,0.0,10,...,5,1.6,7.226,7.786,0.81,77,85.0,3 oz,266.0,1 steak
7573,23502,"USDA COMMODITY,BF,GROUND BULK/COARSE GROUND,FR...",56.49,259,26.06,16.34,0.0,0.0,0.0,9,...,0,0.0,5.744,7.504,0.62,89,28.35,1 oz,0.0,0
2642,10195,"PORK,FRSH,LOIN,CNTR RIB (CHOPS),BNLESS,LN&FAT,...",58.15,255,26.29,15.79,0.0,0.0,0.0,5,...,0,0.0,6.12,7.21,1.32,73,85.0,3 oz,81.0,"1 chop, excluding refuse (yield from 1 raw cho..."


In [37]:
rs3 = engine.suggest_meals_with_focus(['SOUP'],{'Protein':2,'Kcal':30,'Carbohydrates':1}, k=4, strict_search=False)

The nearest distances are:


array([[0.01564939, 0.01598605, 0.01653936, 0.0167151 ]])

The nearest points to [0.022644927536231887, 0.03325942350332594, 0.01] are:


Unnamed: 0,ID,Name,Water,Kcal,Protein,Lipid,Carbohydrates,Fiber,Sugar,Calcium,...,VitDIU,VitK,FASat,FAMono,FAPoly,Cholestrl,GmWt1,GmWtDesc1,GmWt2,GmWtDesc2
1593,6970,"SOUP,CHICK BROTH,LO NA,CND",96.0,16,2.0,0.6,1.2,0.0,0.13,4,...,0,0.0,0.179,0.274,0.126,0,240.0,1 cup,0.0,0
1471,6448,"SOUP,OYSTER STEW,CND,PREP W/ EQ VOLUME H2O",94.86,24,0.87,1.59,1.69,0.0,0.0,9,...,0,0.0,1.04,0.38,0.07,6,241.0,"1 cup, (8 fl oz)",586.0,"1 can, (10.5 oz), prepared"
1511,6547,"SOUP,BF MUSHROOM,CND,PREP W/ EQ VOLUME H2O",92.6,30,2.37,1.23,2.6,0.1,0.0,2,...,0,0.0,0.61,0.51,0.05,3,244.0,"1 cup, (8 fl oz)",593.0,"1 can, (10.75 oz), prepared"
1445,6413,"SOUP,CHICK BROTH,CND,PREP W/ EQ VOLUME H2O",95.95,16,2.02,0.57,0.38,0.0,0.29,4,...,0,0.0,0.16,0.24,0.11,0,244.0,"1 cup, (8 fl oz)",593.0,"1 can, (10.75 oz), prepared"
