In [11]:
from tensorflow.keras.models import load_model 
import pandas as pd 
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

np.random.seed(42) 

In [12]:
model = load_model('model.h5')

In [13]:
df = pd.read_csv('D:\proand\data\ingredient.csv')

In [34]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['ingredients'])
encoded_ingredients = tokenizer.texts_to_sequences(df['ingredients'])
max_length = max([len(seq) for seq in encoded_ingredients])
padded_ingredients = pad_sequences(encoded_ingredients, maxlen=max_length, padding='post')

In [72]:
def calculate_matching_percentage(user_input_ingredients, recipe_ingredients):
    ''' 
    Calculate the matching percentage of the ingredients lists between the user inputs and recipe
    '''
    user_ingredients = set(user_input_ingredients.split(', '))
    recipe_ingredients = set(recipe_ingredients.split(', '))
    matching_ingredients = user_ingredients.intersection(recipe_ingredients)
    matching_percentage = len(matching_ingredients) / len(recipe_ingredients) * 100
    return matching_percentage


# Function to recommend dishes based on user input
def predict(user_input, dataset, threshold=0.4, top_n=10):
    # Tokenize and pad user input
    encoded_input = tokenizer.texts_to_sequences([user_input])
    padded_input = pad_sequences(encoded_input, maxlen=max_length, padding='post')
    
    # Repeat the user input to match the number of samples in the dataset
    repeated_padded_input = np.repeat(padded_input, len(dataset), axis=0)
    
    # Predictions
    predictions = model.predict([repeated_padded_input, np.zeros((repeated_padded_input.shape[0], 1)), padded_ingredients])
    
    # Calculate matching percentage for each dish
    matching_percentages = []
    for i, (_, row) in enumerate(dataset.iterrows()):
        matching_percentage = calculate_matching_percentage(user_input, row['ingredients'])
        if matching_percentage >= threshold:
            matching_percentages.append((i, matching_percentage, predictions[i][1]))  # We assume rating is the second element of predictions
    
    # Sort by matching percentage and rating
    matching_percentages.sort(key=lambda x: (x[1], x[2]), reverse=True)
    score_list = []
    for i in matching_percentages[:top_n]:
        score_list.append((round(i[1])))
    # Select top dishes
    recommended_dishes = []
    for index, _, _ in matching_percentages[:top_n]:
        recommended_dishes.append(dataset.iloc[index])
    
    return recommended_dishes, score_list



In [73]:
user_input = 'broil, dairy free, meat, peanut free, pork, pork rib, soy free'
names, scores = predict(user_input, df)



In [76]:
name_list = []
rating_list = []

for i in names:
    name_list.append(i[0])
    rating_list.append(i[1])

result = pd.DataFrame()
result['Name'] = name_list
result['Rating'] = rating_list
result['Matching_Percentage'] = scores


In [77]:
result

Unnamed: 0,Name,Rating,Matching_Percentage
0,Sweet-and-Sour Balsamic-Glazed Spareribs,4.375,78
1,Meatballs: The Spuntino Way,4.375,60
2,Breakfast Banh Mi Sandwich with Eggs and Sausage,4.375,50
3,Fluffy Cathead Biscuits With Honey Butter,5.0,50
4,Pork Sausage with Braised Purple Cabbage and A...,4.375,38
5,Sweet and Sour Pork,3.125,33
6,Lamb Bacon,3.75,33
7,Croque Madame Casserole With Ham and GruyAre,4.375,25
8,Lobster Fra Diavolo,3.75,25
9,Lemon-Blueberry Poke Cake,3.75,22
