In [78]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings('ignore')

In [79]:
def calculate_bmi(height, weight):
    height /= 100
    bmi = weight/np.square(height)
    return round(bmi,2)

In [80]:
def get_min_max(col_name, df):
    q1 = df[col_name].quantile(0.25)
    q3 = df[col_name].quantile(0.75)
    iqr = q3-q1
    q_min = q1-(1.5*iqr)
    q_max = q3+(1.5*iqr)

    return q_min,q1,q3,q_max    


In [81]:
food_df = pd.read_csv('food_new.csv')
food_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169 entries, 0 to 168
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Food_items     169 non-null    object 
 1   Calories       169 non-null    int64  
 2   Proteins       169 non-null    float64
 3   Carbohydrates  169 non-null    float64
 4   Sugars         169 non-null    float64
 5   Fibre          169 non-null    float64
 6   Fats           169 non-null    float64
 7   VitaminD       169 non-null    float64
 8   Calcium        169 non-null    int64  
 9   Iron           169 non-null    float64
 10  Sodium         169 non-null    int64  
 11  Potassium      169 non-null    int64  
dtypes: float64(7), int64(4), object(1)
memory usage: 16.0+ KB


In [82]:
# Load the food dataset
food_data = pd.read_csv('food_new.csv')


# Split the data into features (nutritional values) and target variable (food category)
X = food_data.drop('Food_items', axis=1)
y = food_data['Food_items']


In [83]:
# Create a random forest classifier with 100 trees and train it on the entire dataset
tree_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
tree_classifier.fit(X, y)

RandomForestClassifier(random_state=42)

In [32]:
#bmi < 18.5 underweight -> 0
#bmi between 18.5 and 24.9 normal weight ->1
#bmi > 25 overweight / obese ->2

#Underweight high proteins,carbohydrates, calories
#overweight low calories,fats high proteins,fibre
#normal balanced amount of nutrients


In [84]:
user_input = pd.DataFrame({
    'nutrient': ['Calories','Proteins', 'Carbohydrates', 'Sugars', 'Fibre', 'Fats','VitaminD', 'Calcium', 'Iron', 'Sodium', 'Potassium'],
    'min_value': [61, 0.14, 14.08, 4.04, 2.0 ,1.28, 0, 13, 0.28, 0, 104],
    'max_value': [61, 0.14, 14.08, 4.04, 2.0 ,1.28, 0, 13, 0.28, 0, 104]
})

print(user_input)

         nutrient  min_value  max_value
0        Calories      61.00      61.00
1        Proteins       0.14       0.14
2   Carbohydrates      14.08      14.08
3          Sugars       4.04       4.04
4           Fibre       2.00       2.00
5            Fats       1.28       1.28
6        VitaminD       0.00       0.00
7         Calcium      13.00      13.00
8            Iron       0.28       0.28
9          Sodium       0.00       0.00
10      Potassium     104.00     104.00


In [95]:
bmi = calculate_bmi(172,49)
weight = 0
if(bmi<18.5):
    weight = 0
elif(bmi>=18.5 and bmi<=24.9):
    weight = 1
else:
    weight = 2
    
available_cols = ['Calories', 'Proteins', 'Carbohydrates', 'Fibre']  #0 1 2 4
if(weight == 0):
    for i, col in enumerate(available_cols):
        q_min,q1,q3,q_max = get_min_max(col, food_data)
        if(i == 3):
            user_input['min_value'][i+1] = q3
            user_input['max_value'][i+1] = q_max
        else:
            user_input['min_value'][i] = q3
            user_input['max_value'][i] = q_max
elif(weight == 1):
    for i, col in enumerate(available_cols):
        q_min,q1,q3,q_max = get_min_max(col, food_data)
        if(i == 3):
            user_input['min_value'][i+1] = q1
            user_input['max_value'][i+1] = q3
        else:
            user_input['min_value'][i] = q1
            user_input['max_value'][i] = q3
else:
    for i, col in enumerate(available_cols):
        q_min,q1,q3,q_max = get_min_max(col, food_data)
        if(i == 3):
            user_input['min_value'][i+1] = q_min
            user_input['max_value'][i+1] = q1
        else:
            user_input['min_value'][i] = q_min
            user_input['max_value'][i] = q1

    

In [96]:
user_input

Unnamed: 0,nutrient,min_value,max_value
0,Calories,279.0,609.0
1,Proteins,10.56,24.9
2,Carbohydrates,25.4,56.255
3,Sugars,4.04,4.04
4,Fibre,1.9,4.75
5,Fats,1.28,1.28
6,VitaminD,0.0,0.0
7,Calcium,13.0,13.0
8,Iron,0.28,0.28
9,Sodium,0.0,0.0


In [27]:
#Gives only one food options based on nutrients
#preferences for user
user_preferences = {'Calories': 61,
                    'Proteins': 0.14,
                    'Carbohydrates':14.08,
                    'Sugars': 4.04,
                    'Fibre':2.0,
                    'Fats':1.28,
                    'VitaminD':0,
                    'Calcium':13,
                    'Iron':0.28,
                    'Sodium':0,
                    'Potassium':104}

# Predict the meal category based on the user's preferences
user_input = pd.DataFrame(user_preferences, index=[0])
predicted_category = tree_classifier.predict(user_input)[0]

# Get a list of foods in the predicted category
recommended_foods = food_data[food_data['Food_items'] == predicted_category]

# Compute the cosine similarity between the user's preferences and the nutritional values of the recommended foods
similarity_scores = cosine_similarity(user_input, recommended_foods.drop('Food_items', axis=1))

# Get the top 5 recommended foods based on the cosine similarity scores
top_recommendations = recommended_foods.iloc[np.argsort(similarity_scores)[0][-5:], :]['Food_items'].values.tolist()

# Display the recommended foods to the user
print(f"Based on your preferences, we recommend the following {predicted_category} foods:")
for food in top_recommendations:
    print(food)

Based on your preferences, we recommend the following Pineapple foods:
Pineapple


In [108]:
#Gives multiple food options based on nutrients

for i, col in enumerate(available_cols):
    # Filter the food data by the nutrient range values
    if(i==3):
        min_value = user_input['min_value'][i+1]
        max_value = user_input['max_value'][i+1]
    else:
        min_value = user_input['min_value'][i]
        max_value = user_input['max_value'][i]
        
    filtered_data = food_data[(food_data[col] >= min_value) & (food_data[col] <= max_value)]
    if(filtered_data.empty):
        continue
    # Predict the meal category based on the user's preferences
    predicted_category = tree_classifier.predict(filtered_data.drop('Food_items', axis=1))

    # Get a list of foods in the predicted category
    recommended_foods = filtered_data[filtered_data['Food_items'] == predicted_category[0]]
        
    print(f"For {col} range of {min_value} to {max_value}, we recommend {predicted_category[0:5]} foods")
    print('--------------------------------------------------------------------------------')
#     for food in predicted_category:
#         print(food)

For Calories range of 279.0 to 609.0, we recommend ['Flavored milk, whole' 'Not included in a food category'
 'Cream cheese, sour cream, whipped cream' 'Cakes and pies' 'Cheese'] foods
--------------------------------------------------------------------------------
For Proteins range of 10.56 to 24.9, we recommend ['Cheese' 'Cottage/ricotta cheese' 'Pizza' 'Cheese sandwiches'
 'Baby food: meat and dinners'] foods
--------------------------------------------------------------------------------
For Carbohydrates range of 25.4 to 56.254999999999995, we recommend ['Flavored milk, whole' 'Not included in a food category' 'Cakes and pies'
 'Cheese sandwiches' 'Deli and cured meat sandwiches'] foods
--------------------------------------------------------------------------------
For Fibre range of 1.9 to 4.75, we recommend ['Other dark green vegetables' 'Deli and cured meat sandwiches'
 'Bean, pea, legume dishes' 'Vegetable dishes'
 'Vegetable sandwiches/burgers'] foods
----------------------