In [1]:
import pandas as pd

### Scraped data from allrecipes, and feature modification

In [2]:
data = pd.read_csv('data/recipe_data.csv')
data

Unnamed: 0,ingredients,directions,title,category,rating,rating_count,cook_time,total_time,yield,servings,calories,fat,carbs,protein,verb_count,ingredient_count,yield_servings_merge
0,"['2 tablespoons extra-virgin olive oil, divide...",['Preheat the oven to 350 degrees F (175 degre...,Lower-Carb Healthy Vegetable Casserole,Recipes,5.0,5,35 mins,55 mins,8 servings,8,126.0,9g,7g,5g,15,13,8
1,"['¼ cup sliced fresh strawberries', '1 ½ fluid...","['Place strawberries, tequila, lime juice, tri...",Classic Frozen Strawberry Margarita,Recipes,4.9,16,,10 mins,,1,206.0,0g,21g,0g,4,8,1
2,['1 ½ cups warm water (110 to 115 degrees F/43...,"['Combine water, honey, and yeast in a stand m...",Golden Egg Loaves (or Braids),Recipes,4.3,3,25 mins,2 hrs 55 mins,2 loaves,24,186.0,5g,30g,6g,24,9,24
3,"['1 cup chocolate graham crackers', '¼ cup whi...",['Preheat the oven to 375 degrees F (190 degre...,Chocolate Graham Crust,Desserts,4.0,1,10 mins,50 mins,1 pie crust,8,103.0,7g,11g,1g,5,4,8
4,"['2 cups lukewarm water (105 degrees F, 40 deg...",['Combine water and yeast in a large mixing bo...,No-Knead Skillet Olive Bread,Recipes,4.5,8,30 mins,2 hrs 35 mins,10 servings,10,239.0,5g,42g,6g,19,9,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19944,"['1 ⅓ cups French-fried onions, crushed', '1 l...","['Gather all ingredients.', 'Dotdash Meredith ...",Crunchy French Onion Chicken,Recipes,4.5,339,20 mins,30 mins,,4,623.0,40g,32g,28g,10,3,4
19945,"['2 summer squash, ends trimmed', '¼ cup oliv...",['Preheat the oven to 450 degrees F (230 degre...,Roasted Summer Squash,Side Dish,4.7,71,5 mins,20 mins,,4,139.0,14g,4g,1g,8,5,4
19946,"['1 (8 ounce) container ricotta cheese', '2 la...","['Gather all ingredients.', 'Dotdash Meredith ...",Ricotta Gnocchi,Recipes,4.5,712,15 mins,1 hr,,5,442.0,26g,27g,22g,21,14,5
19947,"['2 cups sliced fresh peaches', '2 cups sliced...",['Preheat oven to 350 degrees F (175 degrees C...,Fresh Fruit Basket Cobbler,Recipes,4.5,2,20 mins,1 hr,1 8x13-inch dish,8,358.0,8g,69g,5g,13,10,8


Only modified variables: counting number of verbs, ingredients, and yield or number of servings
(using NLTK)

In [8]:
new_features = data[['title', 'verb_count', 'ingredient_count', 'yield_servings_merge']]
new_features

Unnamed: 0,title,verb_count,ingredient_count,yield_servings_merge
0,Lower-Carb Healthy Vegetable Casserole,15,13,8
1,Classic Frozen Strawberry Margarita,4,8,1
2,Golden Egg Loaves (or Braids),24,9,24
3,Chocolate Graham Crust,5,4,8
4,No-Knead Skillet Olive Bread,19,9,10
...,...,...,...,...
19944,Crunchy French Onion Chicken,10,3,4
19945,Roasted Summer Squash,8,5,4
19946,Ricotta Gnocchi,21,14,5
19947,Fresh Fruit Basket Cobbler,13,10,8


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
# finding similar recipes
# cosine similarity

# Combine 'Title', 'Ingredients', 'Directions' into a single text column for each recipe
data['combined_features'] = data['title'] + ' ' + data['ingredients'].apply(lambda x: ' '.join(x)) + ' ' + data['directions']

# Use TF-IDF to vectorize the combined features
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(data['combined_features'])

# Calculate cosine similarity between recipes
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to get similar recipes based on cosine similarity
def get_similar_recipes(recipe_id, similarity_matrix, threshold=0.3):
    similar_recipes = []
    for idx, score in enumerate(similarity_matrix[recipe_id]):
        if idx != recipe_id and score >= threshold:
            similar_recipes.append((idx, score))
    return similar_recipes

# Example: Get similar recipes for Recipe 1 (adjust the recipe_id accordingly)
recipe_id = 5  # Change the recipe ID to find similar recipes for a different recipe
similar_recipes = get_similar_recipes(recipe_id, cosine_sim)

# Display similar recipes
print(f"Similar recipes for {data['title'][recipe_id]}:")
for recipe in similar_recipes:
    print(f"Recipe: {data['title'][recipe[0]]}, Similarity Score: {recipe[1]}")

Similar recipes for Turkey Pot Pie:
Recipe: Chocolate Graham Crust, Similarity Score: 0.318379299117105
Recipe: Southern Pecan Pie, Similarity Score: 0.3934382131018406
Recipe: White Chicken Chili Pot Pie, Similarity Score: 0.32421085646996656
Recipe: Easy Pot Pie with Pie Crust, Similarity Score: 0.5381926898908417
Recipe: Butter Coconut Pie, Similarity Score: 0.49731549167629485
Recipe: Raspberry Pie, Similarity Score: 0.5484929679348689
Recipe: English Honey-Roasted Turkey, Similarity Score: 0.30919132697123564
Recipe: Berry Custard Pie, Similarity Score: 0.3202610816046148
Recipe: Quiche, Similarity Score: 0.30576575938989664
Recipe: Sarah Contona's Sweet Potato Pie, Similarity Score: 0.3293188639279943
Recipe: Brown Sugar Pie II, Similarity Score: 0.3669569256759787
Recipe: Caramel Pecan Apple Pie, Similarity Score: 0.40815705220306897
Recipe: Ricotta Pie (Old Italian Recipe), Similarity Score: 0.3458102113883574
Recipe: Mini Pumpkin Pies, Similarity Score: 0.32210817214447063
Rec