In [5]:
FLAG_DATASET_DL = False

## 1. Carga de datos y librerías

In [6]:
# Actualizar pip
%pip install --upgrade pip setuptools wheel
# Dependencias para graficar y manipular datos
%pip install pandas matplotlib tqdm seaborn ipywidgets
# Dependencias para leer archivos Parquet
%pip install pyarrow fastparquet
# Para predicción
%pip install scikit-learn recommenders

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [7]:
# Importar librerías globales

# Manejo de datos y visualización
import os
import pandas as pd
import numpy as np
import ast
import re
import matplotlib.pyplot as plt
from collections import defaultdict
from tqdm.auto import tqdm
import zipfile

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

# Crear directorios
PATH_DATASETS = "datasets"
PATH_DATASETS_FOODCOM = os.path.join(PATH_DATASETS, "foodcom")
os.makedirs(PATH_DATASETS, exist_ok=True)
PATH_FOODCOM_RECIPES = os.path.join(PATH_DATASETS_FOODCOM, "recipes.parquet")
PATH_FOODCOM_REVIEWS = os.path.join(PATH_DATASETS_FOODCOM, "reviews.parquet")

In [8]:
if FLAG_DATASET_DL:
    # Descargar "Recipes and Reviews" de Food.com
    os.system(
        f"curl -L -o {PATH_DATASETS_FOODCOM}.zip https://www.kaggle.com/api/v1/datasets/download/irkaal/foodcom-recipes-and-reviews"
    )
    # os.system(f"unzip -o {PATH_DATASETS_FOODCOM}.zip -d {PATH_DATASETS_FOODCOM}")

    # Cita: Dominio público (https://www.kaggle.com/datasets/irkaal/foodcom-recipes-and-reviews)
    zip_path = PATH_DATASETS_FOODCOM + ".zip"

    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(PATH_DATASETS_FOODCOM)

    # Eliminar el zip
    os.remove(f"{PATH_DATASETS_FOODCOM}.zip")

    print("✅ Archivos descomprimidos en:", PATH_DATASETS_FOODCOM)

**Análisis de Datos**

In [11]:
og_recipes = pd.read_parquet(PATH_FOODCOM_RECIPES)
og_recipes.head()

Unnamed: 0,RecipeId,Name,AuthorId,AuthorName,CookTime,PrepTime,TotalTime,DatePublished,Description,Images,...,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeServings,RecipeYield,RecipeInstructions
0,38.0,Low-Fat Berry Blue Frozen Dessert,1533,Dancer,PT24H,PT45M,PT24H45M,1999-08-09 21:46:00+00:00,Make and share this Low-Fat Berry Blue Frozen ...,[https://img.sndimg.com/food/image/upload/w_55...,...,1.3,8.0,29.8,37.1,3.6,30.2,3.2,4.0,,"[Toss 2 cups berries with sugar., Let stand fo..."
1,39.0,Biryani,1567,elly9812,PT25M,PT4H,PT4H25M,1999-08-29 13:12:00+00:00,Make and share this Biryani recipe from Food.com.,[https://img.sndimg.com/food/image/upload/w_55...,...,16.6,372.8,368.4,84.4,9.0,20.4,63.4,6.0,,[Soak saffron in warm milk for 5 minutes and p...
2,40.0,Best Lemonade,1566,Stephen Little,PT5M,PT30M,PT35M,1999-09-05 19:52:00+00:00,This is from one of my first Good House Keepi...,[https://img.sndimg.com/food/image/upload/w_55...,...,0.0,0.0,1.8,81.5,0.4,77.2,0.3,4.0,,"[Into a 1 quart Jar with tight fitting lid, pu..."
3,41.0,Carina's Tofu-Vegetable Kebabs,1586,Cyclopz,PT20M,PT24H,PT24H20M,1999-09-03 14:54:00+00:00,This dish is best prepared a day in advance to...,[https://img.sndimg.com/food/image/upload/w_55...,...,3.8,0.0,1558.6,64.2,17.3,32.1,29.3,2.0,4 kebabs,"[Drain the tofu, carefully squeezing out exces..."
4,42.0,Cabbage Soup,1538,Duckie067,PT30M,PT20M,PT50M,1999-09-19 06:19:00+00:00,Make and share this Cabbage Soup recipe from F...,[https://img.sndimg.com/food/image/upload/w_55...,...,0.1,0.0,959.3,25.1,4.8,17.7,4.3,4.0,,"[Mix everything together and bring to a boil.,..."


In [12]:
recipes = og_recipes.drop(['AuthorName','Description','CookTime', 'PrepTime', 'TotalTime', 'DatePublished',
                           'Images','RecipeIngredientQuantities', 'ReviewCount',
                           "RecipeYield", 'RecipeInstructions'], axis=1)
recipes.fillna(0, inplace=True)

**Flags restrictivas**

In [13]:
def is_x_flag(ingredients, keywords):
    if isinstance(ingredients, str):
        ingredients_str = ingredients.lower()
        for keyword in keywords:
            if keyword.lower() in ingredients_str:
                return 0
    return 1

Vegetariano

In [14]:
animal_keywords = [
    "beef", "pork", "chicken", "turkey", "bacon", " ham", "lamb", "veal", "duck", "goose",
    "sausage", "prosciutto", "salami", "steak", "ribs", "wings", "drumstick", "fillet", "cutlet",
    "tenderloin", "liver", "giblets", "gizzard", "tripe", "mutton", "venison", "rabbit",
    "kangaroo", "bison", "quail", "boar", "snail", "frog", "foie gras", "chorizo", "anchovy",
    "anchovies", "tuna", "salmon", "sardine", "trout", "mackerel", "fish", "seafood", "crab",
    "lobster", "shrimp", "prawn", "octopus", "squid", "clam", "oyster", "scallop", "eel",
    "halibut", "snapper", "herring", "catfish", "tilapia", "muscles"
]

In [15]:
recipes['is_vegetarian'] = recipes['RecipeIngredientParts'].apply(is_x_flag, args=(animal_keywords,))
print(recipes[['RecipeIngredientParts', 'is_vegetarian']].head())

                               RecipeIngredientParts  is_vegetarian
0  [blueberries, granulated sugar, vanilla yogurt...              1
1  [saffron, milk, hot green chili peppers, onion...              1
2  [sugar, lemons, rind of, lemon, zest of, fresh...              1
3  [extra firm tofu, eggplant, zucchini, mushroom...              1
4  [plain tomato juice, cabbage, onion, carrots, ...              1


Libre de lactosa

In [16]:
dairy_keywords = [
    "whole milk", "skim milk", "low-fat milk", "condensed milk", "evaporated milk",
    "powdered milk", "buttermilk", "heavy cream", "whipping cream", "sour cream",
    "half and half", "cheese", "cheddar", "mozzarella", "parmesan", "brie", "feta",
    "gouda", "provolone", "ricotta", "yogurt", "greek yogurt", "kefir", "curd",
    "paneer", "butter", "clarified butter", "ghee", "whey", "casein", "lactose"
]

In [17]:
recipes['is_lactose_free'] = recipes['RecipeIngredientParts'].apply(is_x_flag, args=(dairy_keywords,))
print(recipes[['RecipeIngredientParts', 'is_lactose_free']].head())

                               RecipeIngredientParts  is_lactose_free
0  [blueberries, granulated sugar, vanilla yogurt...                1
1  [saffron, milk, hot green chili peppers, onion...                1
2  [sugar, lemons, rind of, lemon, zest of, fresh...                1
3  [extra firm tofu, eggplant, zucchini, mushroom...                1
4  [plain tomato juice, cabbage, onion, carrots, ...                1


Vegano

In [18]:
non_vegan_keywords = [
    "egg", "albumen", "honey", "bee pollen", "jelly", "propolis", "gelatin",
    "lard", "tallow", "suet", "rennet", "cochineal", "carmine", "shellac",
    "broth", "stock", "isinglass", "glycerin","castoreum"
] + animal_keywords + dairy_keywords

In [19]:
recipes['is_vegan'] = recipes['RecipeIngredientParts'].apply(is_x_flag, args=(non_vegan_keywords,))
print(recipes[['RecipeIngredientParts', 'is_vegan']].head())

                               RecipeIngredientParts  is_vegan
0  [blueberries, granulated sugar, vanilla yogurt...         1
1  [saffron, milk, hot green chili peppers, onion...         1
2  [sugar, lemons, rind of, lemon, zest of, fresh...         1
3  [extra firm tofu, eggplant, zucchini, mushroom...         1
4  [plain tomato juice, cabbage, onion, carrots, ...         1


Libre de gluten

In [20]:
gluten_keywords = [
    "wheat", "white flour", "all-purpose flour", "enriched flour",
    "bread flour", "cake flour", "self-rising flour", "semolina", "spelt",
    "durum", "farro", "couscous", "bulgur", "kamut", "graham flour", "triticale",
    "barley", "malt", "rye", "pumpernickel", "rye flour", "seitan", "gluten",
    "soy sauce", "teriyaki sauce", "hoisin sauce", "beer", "ale", "lager", "stout",
    "bread", "buns", "bagels", "croissant", "crackers", "cookies", "cakes", "pastries",
    "biscuits", "noodles", "pasta", "macaroni", "lasagna", "ravioli", "dumplings",
    "cereal", "granola", "bran"
]

In [21]:
recipes['is_gluten_free'] = recipes['RecipeIngredientParts'].apply(is_x_flag, args=(gluten_keywords,))
print(recipes[['RecipeIngredientParts', 'is_gluten_free']].head())

                               RecipeIngredientParts  is_gluten_free
0  [blueberries, granulated sugar, vanilla yogurt...               1
1  [saffron, milk, hot green chili peppers, onion...               1
2  [sugar, lemons, rind of, lemon, zest of, fresh...               1
3  [extra firm tofu, eggplant, zucchini, mushroom...               1
4  [plain tomato juice, cabbage, onion, carrots, ...               1


Otros

In [22]:
nut_keywords = ["nut", "almond", "cashew", "pecan", "pistachio", "macadamia"]
recipes['is_nut_free'] = recipes['RecipeIngredientParts'].apply(is_x_flag, args=(nut_keywords,))

soy_keywords = ["soy","edamame", "tofu", "tempeh"]
recipes['is_soy_free'] = recipes['RecipeIngredientParts'].apply(is_x_flag, args=(soy_keywords,))

shellfish_keywords = ["shrimp", "prawn", "crab", "lobster", "scallop", "clam", "mussel", "oyster"]
recipes['is_shellfish_free'] = recipes['RecipeIngredientParts'].apply(is_x_flag, args=(shellfish_keywords,))

halal_keywords = ["pork", "bacon", "ham", "gelatin", "lard", "alcohol", "wine", "beer", "rum", "brandy"]
recipes['is_halal'] = recipes['RecipeIngredientParts'].apply(is_x_flag, args=(halal_keywords,))

kosher_keywords = ["pork", "shellfish", "catfish", "rabbit", "gelatin"]
recipes['is_kosher'] = recipes['RecipeIngredientParts'].apply(is_x_flag, args=(kosher_keywords,))

alcohol_keywords = ["alcohol", "ethanol", "wine", "beer", "vodka", "rum", "whiskey", "brandy", "liqueur"]
recipes['is_alcohol_free'] = recipes['RecipeIngredientParts'].apply(is_x_flag, args=(alcohol_keywords,))

high_fodmap_keywords = [
    "garlic", "onion", "wheat", "rye", "legumes", "lentils", "chickpeas", "beans", "apples", "pears",
    "honey", "milk", "yogurt", "cheese", "sorbitol", "mannitol", "xylitol"
]
recipes['is_high_fodmap'] = recipes['RecipeIngredientParts'].apply(is_x_flag, args=(high_fodmap_keywords,))

**Valores nutricionales**

In [23]:
#Valores nutricionales "sanos" y restricciones alimentarias, pueden ser adaptables según el usuario
#https://www-verywellfit-com.translate.goog/what-does-based-on-a-2000-calorie-diet-mean-4099137?_x_tr_sl=en&_x_tr_tl=es&_x_tr_hl=es&_x_tr_pto=sge#:~:text=Based%20on%20the%20DV%20information,or%201200%20calories%20from%20carbohydrates

Daily_values = {"Calories": 2000,
                "FatContent": 78,
                "SaturatedFatContent": 20,
                "CholesterolContent": 300,
                "SodiumContent": 2300,
                "CarbohydrateContent": 275,
                "FiberContent": 28,
                "SugarContent": 40,
                "ProteinContent": 50}

Meal_values = Daily_values.copy()
for value in Meal_values:
    Meal_values[value] = Meal_values[value]/3 #Son 3 comidas al día

In [24]:
recipes_healthy = recipes.copy()

for key, value in Meal_values.items():
    recipes_healthy[key] = recipes_healthy[key]/recipes_healthy["RecipeServings"]
    recipes_healthy = recipes_healthy[recipes_healthy[key] <= value]

recipes_healthy.drop("RecipeServings", axis=1, inplace=True)

In [25]:
recipes_healthy.reset_index(drop=True, inplace=True)
recipes_healthy.head()

Unnamed: 0,RecipeId,Name,AuthorId,RecipeCategory,Keywords,RecipeIngredientParts,AggregatedRating,Calories,FatContent,SaturatedFatContent,...,is_lactose_free,is_vegan,is_gluten_free,is_nut_free,is_soy_free,is_shellfish_free,is_halal,is_kosher,is_alcohol_free,is_high_fodmap
0,38.0,Low-Fat Berry Blue Frozen Dessert,1533,Frozen Desserts,"[Dessert, Low Protein, Low Cholesterol, Health...","[blueberries, granulated sugar, vanilla yogurt...",4.5,42.725,0.625,0.325,...,1,1,1,1,1,1,1,1,1,1
1,39.0,Biryani,1567,Chicken Breast,"[Chicken Thigh & Leg, Chicken, Poultry, Meat, ...","[saffron, milk, hot green chili peppers, onion...",3.0,185.116667,9.8,2.766667,...,1,1,1,1,1,1,1,1,1,1
2,42.0,Cabbage Soup,1538,Vegetable,"[Low Protein, Vegan, Low Cholesterol, Healthy,...","[plain tomato juice, cabbage, onion, carrots, ...",4.5,25.9,0.1,0.025,...,1,1,1,1,1,1,1,1,1,1
3,43.0,Best Blackbottom Pie,34879,Pie,"[Dessert, Weeknight, Stove Top, < 4 Hours]","[graham cracker crumbs, sugar, butter, sugar, ...",1.0,54.7375,2.4125,1.3625,...,1,1,1,1,1,1,1,1,1,1
4,45.0,Buttermilk Pie With Gingersnap Crumb Crust,1580,Pie,"[Dessert, Healthy, Weeknight, Oven, < 4 Hours]","[sugar, margarine, egg, flour, salt, buttermil...",4.0,28.5,0.8875,0.2125,...,1,1,1,1,1,1,1,1,1,1


**KNN**

Recipes

In [26]:
def preprocess_list_column(col):
    return col.str.replace(r'[\[\]"]', '', regex=True).str.replace(',', ' ').str.lower()

recipes_healthy['text'] = preprocess_list_column(recipes_healthy['RecipeIngredientParts'].astype(str)) + " " + \
                 preprocess_list_column(recipes_healthy['Keywords'].astype(str))

In [27]:
numeric_features = [
    "Calories", "FatContent", "SaturatedFatContent", "CarbohydrateContent",
    "CholesterolContent", "SodiumContent", "ProteinContent", "FiberContent",
    'SugarContent', "AggregatedRating", 'is_vegetarian', 'is_lactose_free',
    'is_vegan', 'is_gluten_free', 'is_nut_free', 'is_soy_free', 'is_shellfish_free', 'is_halal',
    'is_kosher', 'is_alcohol_free', 'is_high_fodmap'
]

tfidf = TfidfVectorizer(max_features=300, stop_words="english")
scaler = StandardScaler()

In [28]:
preprocessor = ColumnTransformer(transformers=[
    ('tfidf', tfidf, 'text'),
    ('num', scaler, numeric_features)
])

X = preprocessor.fit_transform(recipes_healthy)

In [29]:
model_knn = NearestNeighbors(metric='cosine', algorithm='auto')
model_knn.fit(X)

0,1,2
,n_neighbors,5
,radius,1.0
,algorithm,'auto'
,leaf_size,30
,metric,'cosine'
,p,2
,metric_params,
,n_jobs,


Reviews

In [30]:
og_reviews = pd.read_parquet(PATH_FOODCOM_REVIEWS)
og_reviews.head()

Unnamed: 0,ReviewId,RecipeId,AuthorId,AuthorName,Rating,Review,DateSubmitted,DateModified
0,2,992,2008,gayg msft,5,better than any you can get at a restaurant!,2000-01-25 21:44:00+00:00,2000-01-25 21:44:00+00:00
1,7,4384,1634,Bill Hilbrich,4,"I cut back on the mayo, and made up the differ...",2001-10-17 16:49:59+00:00,2001-10-17 16:49:59+00:00
2,9,4523,2046,Gay Gilmore ckpt,2,i think i did something wrong because i could ...,2000-02-25 09:00:00+00:00,2000-02-25 09:00:00+00:00
3,13,7435,1773,Malarkey Test,5,easily the best i have ever had. juicy flavor...,2000-03-13 21:15:00+00:00,2000-03-13 21:15:00+00:00
4,14,44,2085,Tony Small,5,An excellent dish.,2000-03-28 12:51:00+00:00,2000-03-28 12:51:00+00:00


In [31]:
reviews = og_reviews[["RecipeId", "AuthorId", "Rating"]].copy()

In [32]:
reviews_good = reviews[reviews["Rating"] >= 5]
reviews_count = reviews_good['AuthorId'].value_counts()
authors_to_keep = reviews_count[reviews_count >= 5].index
reviews = reviews[reviews['AuthorId'].isin(authors_to_keep)].copy()
reviews.reset_index(drop=True, inplace=True)

In [33]:
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

reviews['user_idx'] = user_encoder.fit_transform(reviews['AuthorId'])
reviews['item_idx'] = item_encoder.fit_transform(reviews['RecipeId'])

sparse_ratings = csr_matrix(
    (reviews['Rating'], (reviews['user_idx'], reviews['item_idx']))
)

In [34]:
model_knn_ratings = NearestNeighbors(metric='cosine', algorithm='auto')
model_knn_ratings.fit(sparse_ratings)

0,1,2
,n_neighbors,5
,radius,1.0
,algorithm,'auto'
,leaf_size,30
,metric,'cosine'
,p,2
,metric_params,
,n_jobs,


In [35]:
def predecir_rating_usuario_sparse(user_id, recipe_id, k=5):
    if user_id not in user_encoder.classes_ or recipe_id not in item_encoder.classes_:
        return np.nan

    user_idx = user_encoder.transform([user_id])[0]
    recipe_idx = item_encoder.transform([recipe_id])[0]

    distances, indices = model_knn_ratings.kneighbors(sparse_ratings[user_idx], n_neighbors=k+1)

    sum_rating = 0
    sum_weights = 0

    for dist, idx in zip(distances[0][1:], indices[0][1:]):
        neighbor_rating = sparse_ratings[idx, recipe_idx]
        if neighbor_rating > 0:
            weight = 1 - dist
            sum_rating += neighbor_rating * weight
            sum_weights += weight

    return sum_rating / sum_weights if sum_weights > 0 else 0

In [36]:
def recomendar_hibrido_puntaje_sparse(user_id, receta_base_id, recipes_df, n=10, alpha=0.5):
    if receta_base_id not in recipes_df['RecipeId'].values:
        return None

    idx_base = recipes_df[recipes_df['RecipeId'] == receta_base_id].index[0]
    distances, indices = model_knn.kneighbors(X[idx_base], n_neighbors=15)

    resultados = []
    for idx, dist in zip(indices[0][1:], distances[0][1:]):
        rec = recipes_df.iloc[idx]
        similarity = 1 - dist
        pred_rating = predecir_rating_usuario_sparse(user_id, rec['RecipeId'])
        if not np.isnan(pred_rating):
            score = alpha * similarity + (1 - alpha) * (pred_rating / 5.0)
            resultados.append((rec['RecipeId'], rec['Name'], similarity, pred_rating, score))

    resultados.sort(key=lambda x: -x[4])
    top = resultados[:n]

    return pd.DataFrame(top, columns=["RecipeId", "Name", "Similarity", "PredictedRating", "HybridScore"])

In [37]:
def get_all_recs(reviews, recipes):
    recommendations_list = []
    total_recommendations = []

    for author_id in reviews['AuthorId'].unique():
        user_reviews = reviews[reviews['AuthorId'] == author_id]
        user_reviews = user_reviews[user_reviews["Rating"] >= 5].head()
        if len(user_reviews) == 0:
            continue

        for recipe_id in user_reviews['RecipeId'].unique():
            recommendations = recomendar_hibrido_puntaje_sparse(user_id=author_id,
                                                                receta_base_id=recipe_id,
                                                                recipes_df=recipes, n=5)
            if recommendations is not None:
                recommendations['AuthorId'] = author_id
                recommendations_list.append(recommendations)

        if recommendations_list:
            all_recommendations = pd.concat(recommendations_list, ignore_index=True)
            top_recommendations = all_recommendations.sort_values(by=['HybridScore'], ascending=[False])
            top_5_per_user = top_recommendations.head()
            total_recommendations.append(top_5_per_user)
            recommendations_list = []

    total_recommendations_df = pd.concat(total_recommendations, ignore_index=True)
    return total_recommendations_df

In [38]:
num_authors = int(len(reviews['AuthorId'].unique()) * 0.1)
sampled_authors = np.random.choice(reviews['AuthorId'].unique(), size=num_authors, replace=False)
sampled_reviews = reviews[reviews['AuthorId'].isin(sampled_authors)].copy()

In [39]:
recs = get_all_recs(sampled_reviews, recipes_healthy)

KeyboardInterrupt: 

In [None]:
recs

Columns: ['RecipeId', 'Name', 'Similarity', 'PredictedRating', 'HybridScore', 'AuthorId']
Shape: (10494, 6)

First 5 rows:
   RecipeId                                               Name  Similarity  \
0     22030          Angel Hair Pasta with Chicken and Veggies    0.981333   
1    216975                      Angel Hair Pasta With Chicken    0.963026   
2     66318                       Creamy Chicken &amp; Spinach    0.956978   
3    116796                     Garlic Chicken Pasta Primavera    0.954323   
4     80971  Tuna Pasta with Artichoke and Semi-Sun-Dried T...    0.942461   

   PredictedRating  HybridScore  AuthorId  
0              0.0     0.490666      1634  
1              0.0     0.481513      1634  
2              0.0     0.478489      1634  
3              0.0     0.477162      1634  
4              0.0     0.471231      1634  

Unique authors: 2099
Total recommendations per user stats:
count    2099.000000
mean        4.999524
std         0.021827
min         4.000000


In [None]:
recs.to_csv('knn_recommendations.csv', index=False)

In [40]:
recs = pd.read_csv('knn_recommendations.csv')

In [53]:
def recall_at_k(recommendations, k=10):
    """
    Calculate recall at k for the recommendations.
    
    Recall@k = (Number of relevant items recommended in top-k) / (Total number of relevant items)
    
    Args:
        recommendations: DataFrame with columns ['RecipeId', 'AuthorId', 'HybridScore', ...]
        k: Number of top recommendations to consider
    
    Returns:
        Average recall@k across all users
    """
    total_recall = 0
    total_users = 0
    
    # Group recommendations by user
    for author_id in recommendations['AuthorId'].unique():
        # Get user's recommendations sorted by score
        user_recs = recommendations[recommendations['AuthorId'] == author_id].copy()
        user_recs = user_recs.sort_values('HybridScore', ascending=False).head(k)
        
        # Get user's test items (items they actually rated highly)
        user_test_items = reviews[
            (reviews['AuthorId'] == author_id) & 
            (reviews['Rating'] >= 4)  # Consider rating >= 4 as relevant
        ]['RecipeId'].unique()
        
        if len(user_test_items) == 0:
            continue  # Skip users with no relevant items
            
        # Count how many recommended items are relevant
        recommended_items = set(user_recs['RecipeId'].values)
        relevant_items = set(user_test_items)
        
        relevant_recommended = len(recommended_items.intersection(relevant_items))
        total_relevant = len(relevant_items)
        
        # Calculate recall for this user
        user_recall = relevant_recommended / total_relevant if total_relevant > 0 else 0
        
        total_recall += user_recall
        total_users += 1
    
    # Return average recall across all users
    return total_recall / total_users if total_users > 0 else 0


def precision_at_k(recommendations, k=10):
    """
    Calculate precision at k for the recommendations.
    
    Precision@k = (Number of relevant items recommended in top-k) / k
    
    Args:
        recommendations: DataFrame with columns ['RecipeId', 'AuthorId', 'HybridScore', ...]
        k: Number of top recommendations to consider
    
    Returns:
        Average precision@k across all users
    """
    total_precision = 0
    total_users = 0
    
    # Group recommendations by user
    for author_id in recommendations['AuthorId'].unique():
        # Get user's recommendations sorted by score
        user_recs = recommendations[recommendations['AuthorId'] == author_id].copy()
        user_recs = user_recs.sort_values('HybridScore', ascending=False).head(k)
        
        # Get user's test items (items they actually rated highly)
        user_test_items = reviews[
            (reviews['AuthorId'] == author_id) & 
            (reviews['Rating'] >= 4)  # Consider rating >= 4 as relevant
        ]['RecipeId'].unique()
        
        if len(user_test_items) == 0:
            continue  # Skip users with no relevant items
            
        # Count how many recommended items are relevant
        recommended_items = set(user_recs['RecipeId'].values)
        relevant_items = set(user_test_items)
        
        relevant_recommended = len(recommended_items.intersection(relevant_items))
        total_recommended = len(recommended_items)
        
        # Calculate precision for this user
        user_precision = relevant_recommended / total_recommended if total_recommended > 0 else 0
        
        total_precision += user_precision
        total_users += 1
    
    # Return average precision across all users
    return total_precision / total_users if total_users > 0 else 0

In [55]:
# Modelo 
recall_base = recall_at_k(recs, k=10)
precision_base = precision_at_k(recs, k=10)
print(f"Precision at 10: {precision_base:.4f}")
print(f"Recall at 10: {recall_base:.4f}")


Precision at 10: 0.0024
Recall at 10: 0.0010
