In [2]:
# Import Block
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from surprise import SVD
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Collaborative Filtering

## Singular Value Decomposition (SVD)


In [16]:
# Load data
df_interactions_train = pd.read_csv("data/interactions_train_processed.csv")

# Load data into Surprise format
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df_interactions_train[['user_id', 'recipe_id', 'rating']], reader)

# Train-test split
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Train SVD model
svd = SVD()
svd.fit(trainset)

# Evaluate the model
predictions = svd.test(testset)
rmse(predictions)

In [20]:
df_interactions_train[df_interactions_train['recipe_id']==137739]

Unnamed: 0,user_id,recipe_id,date,rating,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
127665,4470,137739,2006-02-18,5.0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
552747,593927,137739,2010-08-21,5.0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
609322,178427,137739,2011-12-05,5.0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7


# Content Filtering

## Cosine Similarity


In [None]:
# TF-IDF on ingredients
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_interactions_train['ingredient_names'])

# Function to get recommendations
def content_based_recommendations(recipe_name, top_n=10):
    idx = df_interactions_train[df_interactions_train['name'] == recipe_name].index[0]
    cosine_sim = cosine_similarity(tfidf_matrix[idx], tfidf_matrix)
    sim_scores = list(enumerate(cosine_sim[0]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n + 1]
    recipe_indices = [i[0] for i in sim_scores]
    return recipes.iloc[recipe_indices][['name', 'ingredients']]

# Test the function
content_based_recommendations('Chocolate Cake')


In [11]:
df_ingredient_map = pd.read_pickle("data/ingr_map.pkl")
df_ingredient_map