In [None]:
# Import Block
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from surprise import SVD
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse, mse, mae, fcp

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# Collaborative Filtering

## Singular Value Decomposition (SVD)


In [None]:
# Load data
df_interactions_train = pd.read_csv("data/interactions_train_processed.csv")

# Load data into Surprise format
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df_interactions_train[['user_id', 'recipe_id', 'rating']], reader)

# Train-test split
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Train SVD model
svd = SVD(n_epochs=50, lr_all=0.005)
svd.fit(trainset)

def adjusted_cosine_similarity(matrix):
    mean_centered = matrix - matrix.mean(axis=1, keepdims=True)
    return cosine_similarity(mean_centered)

# Predict and calculate cosine similarity
predictions = svd.test(testset)
similarity_matrix = adjusted_cosine_similarity(np.array([pred.est for pred in predictions]).reshape(-1, 1))

# Rescale scores for better variation
scaler = MinMaxScaler(feature_range=(1, 5))
rescaled_scores = scaler.fit_transform(similarity_matrix)

print(rescaled_scores)

# Add noise for diversity
#diverse_scores = rescaled_scores + np.random.uniform(-0.01, 0.01, rescaled_scores.shape)

# Evaluate the model
#predictions = svd.test(testset)

# Add noise for diversity
#diverse_scores = predictions + np.random.uniform(-0.01, 0.01, predictions.shape)

# rmse
#rmse(diverse_scores)

In [None]:
# Get recommendations for a specific user
def recommend_recipes(model, user_id, df, num_recommendations=20):
    """
    Generate recipe recommendations for a given user.

    Args:
    - model: The trained SVD model.
    - user_id: The ID of the user for whom to recommend recipes.
    - df: The original DataFrame with user-recipe interactions.
    - num_recommendations: Number of recommendations to return.

    Returns:
    - DataFrame with top recommended recipes.
    """
    # Get all recipe IDs
    all_recipes = df['recipe_id'].unique()

    # Get recipes the user has already rated
    rated_recipes = df[df['user_id'] == user_id]['recipe_id'].values

    # Get recipes the user has NOT rated
    unrated_recipes = [recipe for recipe in all_recipes if recipe not in rated_recipes]

    # Predict ratings for unrated recipes
    predictions = []
    for recipe_id in unrated_recipes:
        pred = model.predict(user_id, recipe_id)
        predictions.append((recipe_id, pred.est))

    # Sort predictions by estimated rating
    top_recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)[:num_recommendations]

    # Convert to DataFrame for better visualization
    recommended_df = pd.DataFrame(top_recommendations, columns=['recipe_id', 'predicted_rating'])

    # Get recipe_name and ingredients
    #recommended_df = pd.merge(df, recommended_df, how='left', left_on='recipe_id', right_on='recipe_id')
    #recommended_df = recommended_df[['recipe_id','recipe_name','ingredient_names','predicted_rating']]

    return recommended_df

# Example usage:
user_id = 2046  # Replace with a user ID from your dataset
recommendations = recommend_recipes(svd, user_id, df_interactions_train)


In [None]:
df_interactions_train[df_interactions_train['user_id']==2046]

In [None]:
df_interactions_train[df_interactions_train['recipe_id']==13285]['recipe_name'].drop_duplicates()

# Content Filtering

## Cosine Similarity


In [None]:
# TF-IDF on ingredients
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_interactions_train['ingredient_names'])

# Function to get recommendations
def content_based_recommendations(recipe_name, top_n=10):
    idx = df_interactions_train[df_interactions_train['recipe_name'] == recipe_name].index[0]
    cosine_sim = cosine_similarity(tfidf_matrix[idx], tfidf_matrix)
    sim_scores = list(enumerate(cosine_sim[0]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n + 1]
    recipe_indices = [i[0] for i in sim_scores]
    output = df_interactions_train.iloc[recipe_indices][['recipe_name', 'ingredient_names']]
    output.drop_duplicates(subset=['recipe_name', 'ingredient_names'],inplace=True)
    return output

# Test the function
content_based_recommendations('zucchini moussaka')
