In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

In [3]:
# Load Data
df = pd.read_csv("/content/drive/MyDrive/Capstone/dataset.csv")
df.count()

RecipeId                 22717
Name                     22717
CookTime                 22717
PrepTime                 22717
TotalTime                22717
RecipeIngredientParts    22717
Calories                 22717
FatContent               22717
SaturatedFatContent      22717
CholesterolContent       22717
SodiumContent            22717
CarbohydrateContent      22717
FiberContent             22717
SugarContent             22717
ProteinContent           22717
RecipeInstructions       22717
ImageUrl                 22717
dtype: int64

In [4]:
# Fill missing values
df[["RecipeId","Name","Calories", "FatContent", "SaturatedFatContent", "CholesterolContent",
             "SodiumContent", "CarbohydrateContent", "FiberContent", "SugarContent", "ProteinContent"]].fillna(value=df[["RecipeId","Name","Calories", "FatContent", "SaturatedFatContent", "CholesterolContent",
             "SodiumContent", "CarbohydrateContent", "FiberContent", "SugarContent", "ProteinContent"]].mean(), inplace=True)

# Remove duplicates
df.drop_duplicates(inplace=True)

# Convert data types
df[["Calories", "FatContent", "SaturatedFatContent", "CholesterolContent",
             "SodiumContent", "CarbohydrateContent", "FiberContent", "SugarContent", "ProteinContent"]] = df[["Calories", "FatContent", "SaturatedFatContent", "CholesterolContent",
             "SodiumContent", "CarbohydrateContent", "FiberContent", "SugarContent", "ProteinContent"]].astype('float')

  "SodiumContent", "CarbohydrateContent", "FiberContent", "SugarContent", "ProteinContent"]].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[["RecipeId","Name","Calories", "FatContent", "SaturatedFatContent", "CholesterolContent",


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 22717 entries, 0 to 22716
Data columns (total 17 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   RecipeId               22717 non-null  int64  
 1   Name                   22717 non-null  object 
 2   CookTime               22717 non-null  object 
 3   PrepTime               22717 non-null  object 
 4   TotalTime              22717 non-null  object 
 5   RecipeIngredientParts  22717 non-null  object 
 6   Calories               22717 non-null  float64
 7   FatContent             22717 non-null  float64
 8   SaturatedFatContent    22717 non-null  float64
 9   CholesterolContent     22717 non-null  float64
 10  SodiumContent          22717 non-null  float64
 11  CarbohydrateContent    22717 non-null  float64
 12  FiberContent           22717 non-null  float64
 13  SugarContent           22717 non-null  float64
 14  ProteinContent         22717 non-null  float64
 15  Re

In [6]:
data = df[["Calories", "FatContent", "SaturatedFatContent", "CholesterolContent",
             "SodiumContent", "CarbohydrateContent", "FiberContent", "SugarContent", "ProteinContent"]]

# Preprocess Data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

# Split data into training and test sets
train_data, test_data = train_test_split(scaled_data, train_size=0.8, random_state=1)

In [9]:
# Define Model Architecture
num_features = train_data.shape[1]

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=(num_features,)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(num_features),
])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 256)               2560      
                                                                 
 dense_4 (Dense)             (None, 128)               32896     
                                                                 
 dense_5 (Dense)             (None, 64)                8256      
                                                                 
 dense_6 (Dense)             (None, 9)                 585       
                                                                 
Total params: 44,297
Trainable params: 44,297
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Compile and Train Model
opt = keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='mse', metrics=['accuracy'])

model.fit(train_data, train_data, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f0f81f3b010>

In [11]:
# Generate Recommendations
num_recommendations = 10

# Get the latent representation of the input data
latent_features = model.predict(train_data)

# Calculate cosine similarity between each pair of latent features
similarity_matrix = np.dot(latent_features, latent_features.T)
norms = np.linalg.norm(latent_features, axis=1, keepdims=True)
similarity_matrix /= np.dot(norms, norms.T)



In [12]:
similarity_matrix

array([[ 0.9999999 , -0.25630778,  0.3018314 , ..., -0.5978832 ,
        -0.64850926, -0.02889896],
       [-0.25630778,  1.0000002 ,  0.31288385, ..., -0.19813134,
         0.5170585 , -0.6131357 ],
       [ 0.3018314 ,  0.31288385,  1.0000001 , ..., -0.6825695 ,
         0.29529133, -0.8054305 ],
       ...,
       [-0.5978832 , -0.19813134, -0.6825695 , ...,  1.        ,
        -0.04505737,  0.6737233 ],
       [-0.64850926,  0.5170585 ,  0.29529133, ..., -0.04505737,
         0.99999994, -0.6081946 ],
       [-0.02889896, -0.6131357 , -0.8054305 , ...,  0.6737233 ,
        -0.6081946 ,  0.9999999 ]], dtype=float32)

In [13]:
# Generate recommendations for each item

data.reset_index(drop=True, inplace=True)
recommendations = {}
for i in range(len(data)):
    similar_indices = similarity_matrix[i].argsort()[:-1]  # Remove -1 from the indexing
    similar_items = [(similarity_matrix[i][j], j) for j in similar_indices]
    recommendations[i] = similar_items[:num_recommendations]  # Adjust indexing to include num_recommendations

# Exclude the item itself from recommendations
for key, value in recommendations.items():
    recommendations[key] = value[1:]

IndexError: ignored

In [None]:
# Print recommendations for a sample item
sample_item_index = 0
sample_item_recommendations = recommendations[sample_item_index]
print("Recommendations for Item:", sample_item_index)
for similarity, item_index in sample_item_recommendations:
    print("Similarity:", similarity)
    print("Item:", item_index)
    print("Name:", df.iloc[item_index]["Name"])
    print("Calories:", df.iloc[item_index]["Calories"])
    print()

In [None]:
# Generate user input
height = 170  # user's height in cm
weight = 70  # user's weight in kg
max_calories = 2000  # user's maximum daily calorie intake

#user_input = pd.DataFrame({'Height': [height], 'Weight': [weight], 'maxCalories': [max_calories]})
user_input = [[height,weight,max_calories]]

# Preprocess user input
user_input = scaler.transform(user_input)

# Make predictions for user input
predictions = model.predict(user_input)

# Get the top 5 recommended items for each meal
top_recommendations = {}
for meal in ['Breakfast', 'Lunch', 'Dinner']:
    # Filter the dataset based on meal type
    meal_data = data[data['Meal'] == meal]

    # Calculate the similarity scores between user input and meal items
    similarity_scores = cosine_similarity(user_input, meal_data[feature_columns])

    # Get the indices of top 5 recommended items
    top_indices = similarity_scores.argsort()[0][-5:][::-1]

    # Get the names of the recommended items
    recommendations = meal_data.iloc[top_indices]['Name'].tolist()

    # Store the recommendations for the meal
    top_recommendations[meal] = recommendations

# Print the recommendations
for meal, recommendations in top_recommendations.items():
    print(f"Top 5 Recommendations for {meal}:")
    for i, recommendation in enumerate(recommendations):
        print(f"{i+1}. {recommendation}")
    print()