In [1]:
!pip install nltk pandas scikit-learn

import pandas as pd
import numpy as np
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Load the dataset (assuming you've uploaded it to Colab)
# Note: Recipe1M+ is large, so we'll work with a sample
try:
    recipes = pd.read_csv('recipes.csv')  # Replace with actual file name
except:
    print("Please upload the Recipe1M+ dataset to Colab first")
    print("You can download it from Kaggle: https://www.kaggle.com/datasets/shuyangli94/food-com-recipes-and-user-interactions")



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Please upload the Recipe1M+ dataset to Colab first
You can download it from Kaggle: https://www.kaggle.com/datasets/shuyangli94/food-com-recipes-and-user-interactions


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [4]:
import nltk
nltk.download('all', halt_on_error=False)  # Downloads all NLTK data (optional)

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_ru.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_rus to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |  

True

In [5]:
# Sample data if Recipe1M+ isn't available
sample_data = {
    'title': ['Pasta Carbonara', 'Vegetable Stir Fry', 'Chocolate Chip Cookies', 'Greek Salad', 'Beef Tacos'],
    'ingredients': [
        'pasta, eggs, bacon, parmesan cheese, black pepper',
        'broccoli, carrots, bell peppers, soy sauce, garlic, ginger',
        'flour, butter, sugar, eggs, chocolate chips, vanilla extract',
        'cucumber, tomatoes, red onion, feta cheese, olives, olive oil',
        'ground beef, taco shells, lettuce, tomato, cheese, sour cream'
    ],
    'instructions': ['Cook pasta...', 'Stir fry vegetables...', 'Mix ingredients...', 'Combine vegetables...', 'Cook beef...'],
    'dietary_info': ['contains meat, dairy', 'vegetarian, vegan', 'vegetarian, contains dairy', 'vegetarian, contains dairy', 'contains meat, dairy']
}

recipes = pd.DataFrame(sample_data)

# Preprocessing functions
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove punctuation
    text = re.sub(r'[^\w\s]', '', text)
    # Tokenize
    tokens = word_tokenize(text)
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    # Lemmatize
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return ' '.join(tokens)

# Preprocess ingredients
recipes['processed_ingredients'] = recipes['ingredients'].apply(preprocess_text)

In [6]:
# Create TF-IDF vectorizer
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(recipes['processed_ingredients'])

# Get feature names (ingredients)
ingredient_names = tfidf.get_feature_names_out()

In [7]:
def recommend_recipes(input_ingredients, dietary_restrictions=None, top_n=5):
    # Preprocess input
    processed_input = preprocess_text(input_ingredients)
    input_vector = tfidf.transform([processed_input])

    # Calculate cosine similarity
    cosine_sim = cosine_similarity(input_vector, tfidf_matrix)
    sim_scores = list(enumerate(cosine_sim[0]))

    # Filter by dietary restrictions if provided
    if dietary_restrictions:
        valid_indices = []
        for idx, score in sim_scores:
            recipe_dietary = str(recipes.iloc[idx]['dietary_info']).lower()
            if all(restriction not in recipe_dietary for restriction in dietary_restrictions):
                valid_indices.append((idx, score))
        sim_scores = valid_indices

    # Sort by similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get top N recommendations
    top_indices = [i[0] for i in sim_scores[:top_n]]
    return recipes.iloc[top_indices]

In [8]:
# Example 1: Basic ingredient-based recommendation
print("Recommendations for 'pasta, cheese, eggs':")
print(recommend_recipes('pasta, cheese, eggs')[['title', 'ingredients']])

# Example 2: With dietary restrictions (vegetarian)
print("\nVegetarian recommendations for 'cheese, vegetables':")
print(recommend_recipes('cheese, vegetables', dietary_restrictions=['meat'])[['title', 'ingredients', 'dietary_info']])

# Example 3: Vegan recommendations
print("\nVegan recommendations for 'vegetables, soy sauce':")
print(recommend_recipes('vegetables, soy sauce', dietary_restrictions=['meat', 'dairy', 'eggs'])[['title', 'ingredients', 'dietary_info']])

Recommendations for 'pasta, cheese, eggs':
                    title                                        ingredients
0         Pasta Carbonara  pasta, eggs, bacon, parmesan cheese, black pepper
2  Chocolate Chip Cookies  flour, butter, sugar, eggs, chocolate chips, v...
4              Beef Tacos  ground beef, taco shells, lettuce, tomato, che...
3             Greek Salad  cucumber, tomatoes, red onion, feta cheese, ol...
1      Vegetable Stir Fry  broccoli, carrots, bell peppers, soy sauce, ga...

Vegetarian recommendations for 'cheese, vegetables':
                    title                                        ingredients  \
3             Greek Salad  cucumber, tomatoes, red onion, feta cheese, ol...   
1      Vegetable Stir Fry  broccoli, carrots, bell peppers, soy sauce, ga...   
2  Chocolate Chip Cookies  flour, butter, sugar, eggs, chocolate chips, v...   

                 dietary_info  
3  vegetarian, contains dairy  
1           vegetarian, vegan  
2  vegetarian, contains 

In [9]:
from IPython.display import display, HTML

def interactive_recommender():
    print("🍳 Recipe Recommendation System 🍳")
    ingredients = input("Enter ingredients you have (comma separated): ")
    dietary = input("Any dietary restrictions? (comma separated, e.g. 'vegetarian, vegan' or 'none'): ")

    if dietary.lower() == 'none':
        dietary = None
    else:
        dietary = [d.strip().lower() for d in dietary.split(',')]

    recommendations = recommend_recipes(ingredients, dietary)

    display(HTML("<h2>Recommended Recipes:</h2>"))
    for idx, row in recommendations.iterrows():
        display(HTML(f"""
        <div style='border: 1px solid #ddd; padding: 10px; margin: 10px; border-radius: 5px;'>
            <h3>{row['title']}</h3>
            <p><strong>Ingredients:</strong> {row['ingredients']}</p>
            <p><strong>Dietary Info:</strong> {row['dietary_info']}</p>
        </div>
        """))

# Uncomment to run interactively in Colab
# interactive_recommender()

In [10]:
# Run the interactive recommender
interactive_recommender()

🍳 Recipe Recommendation System 🍳
Enter ingredients you have (comma separated): pasta
Any dietary restrictions? (comma separated, e.g. 'vegetarian, vegan' or 'none'): vegan
