In [None]:
import pandas as pd

from collections import defaultdict

import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from scipy.sparse.linalg import svds

import matplotlib.pyplot as plt

import ast


In [None]:

df_made_train = pd.read_csv("datasets/recipes_made_train.csv")
df_made_test = pd.read_csv("datasets/recipes_made_test.csv")
df_made_val = pd.read_csv("datasets/recipes_made_val.csv")
df_raw_interactions = pd.read_csv("datasets/RAW_interactions.csv")
df_raw_recipes = pd.read_csv("datasets/RAW_recipes.csv")
df_pp_users = pd.read_csv("datasets/PP_users.csv")
df_pp_recipes = pd.read_csv("datasets/PP_recipes.csv")
df_train = pd.read_csv("datasets/interactions_train.csv")
df_test = pd.read_csv("datasets/interactions_test.csv")
df_val = pd.read_csv("datasets/interactions_validation.csv")
df_ingr_map = pd.read_csv("datasets/ingr_map.csv")

In [None]:
# Cleaning raw recipe to remove recipes that take longer than 1 day, have more than 25 steps, and more than 40 ingredients
df_rr_clean = df_raw_recipes[df_raw_recipes['minutes'] <= 1440]
df_rr_clean = df_rr_clean[df_rr_clean['n_ingredients'] <= 25]
df_rr_clean = df_rr_clean[df_rr_clean['n_steps'] <= 40]

# Dropping date, u, and i columns
df_train_clean = df_train.drop(["date", "u", "i"], axis=1)
df_test_clean = df_test.drop(["date", "u", "i"], axis=1)
df_val_clean = df_val.drop(["date", "u", "i"], axis=1)

# Removing any recipes that were removed from cleaning the raw recipes
df_train_clean = df_train_clean[df_train_clean['recipe_id'].isin(df_rr_clean['id'])]
df_test_clean = df_test_clean[df_test_clean['recipe_id'].isin(df_rr_clean['id'])]
df_val_clean = df_val_clean[df_val_clean['recipe_id'].isin(df_rr_clean['id'])]


In [None]:

df_train_sub = df_train_clean.iloc[:int(len(df_train_clean)/8), :]

In [None]:
df_train_sub

In [None]:


# Create utility matrix
utility_matrix = df_train_sub.pivot(index='user_id', columns='recipe_id', values='rating').fillna(0)

# Perform Singular Value Decomposition
U, sigma, Vt = svds(utility_matrix, k=50)  # k is the number of latent features
sigma = np.diag(sigma)

# Compute user and recipe latent feature matrices
user_features = np.dot(U, sigma)  # User latent feature matrix
recipe_features_cf = np.dot(sigma, Vt).T  # Recipe latent feature matrix (from CF)


In [None]:
utility_matrix[2046]

In [None]:
# Normalize content-based features (time as an example numerical feature)
scaler = StandardScaler()
df_raw_recipes['min_normalized'] = scaler.fit_transform(df_raw_recipes[['minutes']])
df_raw_recipes['n_steps_normalized'] = scaler.fit_transform(df_raw_recipes[['n_steps']])
df_raw_recipes['n_ingr_normalized'] = scaler.fit_transform(df_raw_recipes[['n_ingredients']])

# Convert nutrition strings into a list
df_raw_recipes['nutrition'] = df_raw_recipes.apply(ast.literal_eval)
    

# Merge recipe IDs for consistent indexing
recipe_features_cb = df_raw_recipes[['id', 'minutes', 'nutrition', 'n_steps', 'n_ingredients']]



In [None]:


# Align CF features with the recipe IDs
cf_features_df = pd.DataFrame(recipe_features_cf, index=utility_matrix.columns, columns=[f'CF_Feature_{i}' for i in range(recipe_features_cf.shape[1])])
cf_features_df.reset_index(inplace=True)
cf_features_df.rename(columns={'index': 'recipeID'}, inplace=True)

# Merge CF and CBF features
hybrid_features = pd.merge(cf_features_df, recipe_features_cb, on='recipeID', how='inner')
hybrid_features.set_index('recipeID', inplace=True)

print(hybrid_features.head())  # Combined CF + CBF feature representation


In [None]:

similarity_matrix = cosine_similarity(hybrid_features)

def recommend_recipes(user_id, num_recommendations=5):
    # Get the user's rated recipes and ratings
    user_rated_recipes = utility_matrix.loc[user_id]
    user_rated_recipes = user_rated_recipes[user_rated_recipes > 0]
    
    # Compute scores for all recipes based on the similarity matrix
    recipe_scores = np.dot(user_rated_recipes, similarity_matrix)
    
    # Rank recipes by score and exclude already-rated recipes
    recommended_recipes = pd.DataFrame({
        'recipeID': hybrid_features.index,
        'score': recipe_scores
    }).sort_values(by='score', ascending=False)
    recommended_recipes = recommended_recipes[~recommended_recipes['recipeID'].isin(user_rated_recipes.index)]
    
    return recommended_recipes.head(num_recommendations)

# Example: Recommend recipes for user 1
recommendations = recommend_recipes(user_id=12200, num_recommendations=3)
print(recommendations)
