In [1]:
import pandas as pd

filepath = '/Users/hitakiran/Desktop/FA24-Group6/Research/13k-recipes.csv'  
data = pd.read_csv(filepath)

#First 1000 rows
data = data.head(1000)

#Take out unwanted columns
data = data.drop(columns=['Unnamed: 0', 'Ingredients', 'Image_Name'])

#Clean the "Cleaned_Ingredients" column
cleanedIngredients = []
for row in data['Cleaned_Ingredients']:
    #Convert the string representation of a list into an actual list
    ingredientsList = eval(row) 
    #Remove single quotes & whitespace
    cleanedList = [ingredient.replace("'", "").strip() for ingredient in ingredientsList]
    #Put cleaned ingredients back into a single string
    cleanedIngredients.append(", ".join(cleanedList))

data['Cleaned_Ingredients'] = cleanedIngredients
data

Unnamed: 0,Title,Instructions,Cleaned_Ingredients
0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"Pat chicken dry with paper towels, season all ...","1 (3½–4-lb.) whole chicken, 2¾ tsp. kosher sal..."
1,Crispy Salt and Pepper Potatoes,Preheat oven to 400°F and line a rimmed baking...,"2 large egg whites, 1 pound new potatoes (abou..."
2,Thanksgiving Mac and Cheese,Place a rack in middle of oven; preheat to 400...,"1 cup evaporated milk, 1 cup whole milk, 1 tsp..."
3,Italian Sausage and Bread Stuffing,Preheat oven to 350°F with rack in middle. Gen...,"1 (¾- to 1-pound) round Italian loaf, cut into..."
4,Newton's Law,Stir together brown sugar and hot water in a c...,"1 teaspoon dark brown sugar, 1 teaspoon hot wa..."
...,...,...,...
995,Winter of Our Content,"Combine the brandy, lemon juice, maple syrup, ...","3/4 ounce Laird’s Straight Apple Brandy, 3/4 o..."
996,Perfect Circle,"Up to 24 hours before serving, make the batch....","1 1/2 cups chilled fino or manzanilla sherry, ..."
997,All She Wrote,"At least 2 hours before serving, make the batc...","2 1/4 cups chilled Punt e Mes, 1 cup plus 2 ta..."
998,Mr. Tingles' Punch,At least 24 hours before you plan to serve the...,"1 (750 ml) bottle light rum, 2 tablespoons Sic..."


In [2]:
measurements = ["g", "kg", "lb", "oz", "cup", "cups", "tsp", "tbsp", "tablespoon", "teaspoon", "ml", "l", "liter", "liters", "dash", "pinch", "pound", "ounce"]
fractions = ["½", "¼", "¾", "⅓", "⅔"] 

cleanedIngredientsFinal = []
for row in data['Cleaned_Ingredients']:
    ingredientsList = row.split(", ") #Split ingredients
    cleanedList = []
    
    for ingredient in ingredientsList: #Remove numbers and fractions
        words = ingredient.split()  #Split into individual words
        filteredWords = [
            word for word in words 
            if not (word.isdigit() or word in fractions or word.lower() in measurements)
        ]
        #Putting filtered words back into a cleaned ingredient string
        cleanedList.append(" ".join(filteredWords).strip())
    
    #Combine cleaned ingredients to string
    cleanedIngredientsFinal.append(", ".join(cleanedList))

# Update the column
data['Cleaned_Ingredients'] = cleanedIngredientsFinal
data

Unnamed: 0,Title,Instructions,Cleaned_Ingredients
0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"Pat chicken dry with paper towels, season all ...","(3½–4-lb.) whole chicken, 2¾ tsp. kosher salt,..."
1,Crispy Salt and Pepper Potatoes,Preheat oven to 400°F and line a rimmed baking...,"large egg whites, new potatoes (about inch in ..."
2,Thanksgiving Mac and Cheese,Place a rack in middle of oven; preheat to 400...,"evaporated milk, whole milk, tsp. garlic powde..."
3,Italian Sausage and Bread Stuffing,Preheat oven to 350°F with rack in middle. Gen...,"(¾- to 1-pound) round Italian loaf, cut into 1..."
4,Newton's Law,Stir together brown sugar and hot water in a c...,"dark brown sugar, hot water, oz. bourbon, oz. ..."
...,...,...,...
995,Winter of Our Content,"Combine the brandy, lemon juice, maple syrup, ...","3/4 Laird’s Straight Apple Brandy, 3/4 lemon j..."
996,Perfect Circle,"Up to 24 hours before serving, make the batch....","1/2 chilled fino or manzanilla sherry, 1/2 Cam..."
997,All She Wrote,"At least 2 hours before serving, make the batc...","1/4 chilled Punt e Mes, plus tablespoons dry v..."
998,Mr. Tingles' Punch,At least 24 hours before you plan to serve the...,"(750 ml) bottle light rum, tablespoons Sichuan..."


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

#user input with the dataset => vectorization
allIngredients = data['Cleaned_Ingredients'].tolist()

#TF-IDF vectorizer - vectorize the ingredients
vectorizer = TfidfVectorizer()
matrix = vectorizer.fit_transform(allIngredients)

def recommend_meal(userIngredients, data, matrix, vectorizer):

    #vectorize user input
    userVec = vectorizer.transform([userIngredients])

    #cosine similarity
    scores = cosine_similarity(userVec, matrix).flatten()


    #get top 10 and return instructions and meal names
    topIndices = topIndices = scores.argsort()[-15:][::-1]

    return data.iloc[topIndices][['Title', 'Instructions']]


In [4]:
#EXAMPLE
userIngredients = "tomato, butter, rice"

recommendedMeals = recommend_meal(userIngredients, data, matrix, vectorizer).reset_index(drop=True)

# display
print("\nTop 15 Recommended Meals:")
recommendedMeals


Top 15 Recommended Meals:


Unnamed: 0,Title,Instructions
0,Tomato Brown Butter,"Place the tomato in a blender and purée, then ..."
1,Tomato and Parmesan Risotto,Bring broth to a simmer in a medium saucepan; ...
2,Salt-and-Pepper Fish,Rinse rice in several changes of cold water in...
3,Red Pesto Rigatoni,"Cook anchovies, garlic, oil, and walnuts in a ..."
4,Big-Batch Rice,Bring 18 cups water to a boil in a large cover...
5,Irish Weekend Fry-Up,Heat 1 tablespoon oil and 1 tablespoon butter ...
6,Eggplant Parm,Preheat the oven to 400°F. Grease a baking she...
7,Tandoori Chicken Bowls,Place racks in upper and lower thirds of oven;...
8,Maple Barbecue Grilled Chicken,Heat olive oil in a small saucepan over medium...
9,Crispy-Skin Salmon with Miso-Honey Sauce,"Whisk ginger, miso, honey, vinegar, oil, and 1..."
