In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

Extracting Unique Recipes for content based filtering:

In [2]:
df = pd.read_csv('Final_Processed_Dataset.csv')
unique_df = df.drop_duplicates(subset='RecipeId', keep='first').reset_index(drop=True)

TF-IDF Vectorization:

In [3]:
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(unique_df['RecipeIngredientParts'])

Computing Cosine Similarity and Indexing:

In [4]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
indices = pd.Series(unique_df.index, index=unique_df['Name']).drop_duplicates()

Recommendation Function:

In [5]:
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)  # Sort by similarity score
    sim_scores = sim_scores[1:6]
    recipe_indices = [i[0] for i in sim_scores]
    
    recommended_recipes = set()
    unique_recipes = []
    
    for i in recipe_indices:
        recipe_title = unique_df['Name'].iloc[i]
        if recipe_title not in recommended_recipes:
            unique_recipes.append(recipe_title)
            recommended_recipes.add(recipe_title)
    
    return unique_recipes

Printing Recommendations:

In [10]:
print(get_recommendations('Brownie Pudding'))
print(get_recommendations('Brown Bag Apple Salad'))
print(get_recommendations('Chicken Curry'))

['Chocolate Pudding Cake', 'Apricot Squares', 'Chocolate Doughnuts', 'Hot Fudge Pudding Cake II', 'Fresh Peach Upside Down Cake']
['Chocolate Coated Orange Peels', 'Yankee Grapefruit Meringue Pie', 'Summer Fruit Bowl', 'Hemingway Special- a Caribbean Inspired Cocktail', 'Chicken Waldorf Salad']
['Chicken Biryani II', 'Exotic and Sweet Moroccan Chicken with Spicy Rice', 'Easy African Sweet Potato Patties', 'Indian Corn Pilaf', 'Basmati Rice with Vegetables']
