In [1]:
import ast
import json

import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
class CoffeeRecommender:
    def __init__(self, recipes_path, users_path, interactions_path, cold_users_path):
        self.recipes = pd.read_csv(recipes_path)
        self.users = pd.read_csv(users_path)
        self.interactions = pd.read_csv(interactions_path)
        try:
            self.cold_users = set(pd.read_json(cold_users_path)[0].tolist())
        except:
            with open(cold_users_path, 'r') as f:
                self.cold_users = set(json.load(f))

        self._preprocess_features()
        
        self._build_content_model()
        self._build_collaborative_model()

    def _preprocess_features(self):
        self.recipes['required_equipment_set'] = self.recipes['required_equipment'].apply(
            lambda x: set(ast.literal_eval(x)) if isinstance(x, str) else set())
        
        self.recipes['required_products_set'] = self.recipes['required_products'].apply(
            lambda x: set(ast.literal_eval(x).keys()) if isinstance(x, str) else set())

        self.users['owned_equipment_set'] = self.users['owned_equipment'].apply(
            lambda x: set(ast.literal_eval(x)) if isinstance(x, str) else set())
        
        self.users['available_products_set'] = self.users['available_products'].apply(
            lambda x: set(ast.literal_eval(x)) if isinstance(x, str) else set())
            
        self.users.set_index('user_id', inplace=True, drop=False)

    def _build_content_model(self):
        self.taste_features = ['bitterness', 'sweetness', 'acidity', 'body']
        
        self.recipe_taste_matrix = self.recipes[['taste_' + f for f in self.taste_features]].values

        
    def _build_collaborative_model(self):
        rated_interactions = self.interactions.fillna(2.5)
        
        self.rating_matrix = rated_interactions.pivot_table(
            index='user_id', columns='recipe_id', values='rating'
        ).fillna(0)
        
        self.svd = TruncatedSVD(n_components=20, random_state=42)
        self.user_factors = self.svd.fit_transform(self.rating_matrix)
        self.item_factors = self.svd.components_
        
        self.user_id_map = {uid: i for i, uid in enumerate(self.rating_matrix.index)}
        self.recipe_ids_cf = self.rating_matrix.columns.tolist()

    def get_recommendations(self, user_id, n_recommendations=5):
        try:
            user_row = self.users.loc[user_id]
        except KeyError:
            return f"Error: User {user_id} not found in database."

        user_equip = user_row['owned_equipment_set']
        user_products = user_row['available_products_set']
        
        feasible_mask = self.recipes.apply(
            lambda row: row['required_equipment_set'].issubset(user_equip) and 
                        row['required_products_set'].issubset(user_products), axis=1
        )
        feasible_recipes = self.recipes[feasible_mask].copy()
        
        if feasible_recipes.empty:
            return pd.DataFrame(columns=['Message']).append({'Message': 'No recipes match your equipment/ingredients.'}, ignore_index=True)

        is_cold = (user_id in self.cold_users) or (user_id not in self.user_id_map)
        
        results = []
        
        if is_cold:
            method_name = "Taste Match (Cold Start)"
            
            user_pref_vec = user_row[['taste_pref_' + f for f in self.taste_features]].values.reshape(1, -1)
            
            recipe_taste_vecs = feasible_recipes[['taste_' + f for f in self.taste_features]].values
            
            scores = cosine_similarity(user_pref_vec, recipe_taste_vecs)[0]
            feasible_recipes['score'] = scores
            
        else:
            method_name = "Collaborative Filtering (History)"
            
            u_idx = self.user_id_map[user_id]
            pred_ratings_all = np.dot(self.user_factors[u_idx, :], self.item_factors)
            
            pred_map = {rid: score for rid, score in zip(self.recipe_ids_cf, pred_ratings_all)}
            
            feasible_recipes['score'] = feasible_recipes['recipe_id'].map(pred_map).fillna(0)

        top_n = feasible_recipes.sort_values('score', ascending=False).head(n_recommendations)
        
        output = top_n[['recipe_id', 'name', 'score', 'difficulty', 'preparation_time_minutes']].copy()
        output['method'] = method_name
        output['score'] = output['score'].round(3)
        
        return output
    def evaluate_ndcg(self, test_interactions, k=5):
        ndcg_scores = []
        test_users = test_interactions['user_id'].unique()
        
        for user_id in test_users:
            user_truth = test_interactions[test_interactions['user_id'] == user_id]
            truth_dict = dict(zip(user_truth['recipe_id'], user_truth['rating']))
            
            if not truth_dict:
                continue

            recs_df = self.get_recommendations(user_id, n_recommendations=k)
            
            if isinstance(recs_df, str) or 'Message' in recs_df.columns or recs_df.empty:
                ndcg_scores.append(0.0)
                continue
            actual_relevances = [truth_dict.get(rid, 0) for rid in recs_df['recipe_id']]

            ideal_relevances = sorted(truth_dict.values(), reverse=True)

            score = self._ndcg_at_k(actual_relevances, ideal_relevances, k)
            ndcg_scores.append(score)
        
        return np.mean(ndcg_scores) if ndcg_scores else 0.0
    
    @staticmethod
    def _ndcg_at_k(relevances, ideal_relevances, k):
        relevances = relevances[:k]
        ideal_relevances = ideal_relevances[:k]
        
        dcg = sum(rel / np.log2(i + 2) for i, rel in enumerate(relevances))
        idcg = sum(rel / np.log2(i + 2) for i, rel in enumerate(ideal_relevances))
        
        return dcg / idcg if idcg > 0 else 0.0


In [3]:
rec_sys = CoffeeRecommender(
    recipes_path='../student_data/recipes.csv',
    users_path='../student_data/users.csv',
    interactions_path='../student_data/interactions_train.csv',
    cold_users_path='../student_data/cold_users.json'
)

def interact_with_app(user_id):
    print(f"\n--- Recommendations for {user_id} ---")
    recs = rec_sys.get_recommendations(user_id)
    if isinstance(recs, str):
        print(recs)
    elif 'Message' in recs.columns:
        print(recs.iloc[0]['Message'])
    else:
        print(recs.to_string(index=False))


interact_with_app('user_00887') 

interact_with_app('user_00192')


--- Recommendations for user_00887 ---
                               recipe_id                          name  score   difficulty  preparation_time_minutes                   method
               recipe_affogato_decaf_105                Affogato Decaf  0.982     beginner                         3 Taste Match (Cold Start)
                     recipe_affogato_029                      Affogato  0.977     beginner                         3 Taste Match (Cold Start)
       recipe_affogato_with_cinnamon_074        Affogato with Cinnamon  0.965     beginner                         3 Taste Match (Cold Start)
          recipe_vienna_coffee_small_103           Vienna Coffee Small  0.930 intermediate                         6 Taste Match (Cold Start)
recipe_vienna_coffee_with_extra_shot_089 Vienna Coffee with Extra Shot  0.925 intermediate                         6 Taste Match (Cold Start)

--- Recommendations for user_00192 ---
                           recipe_id                      name  scor

In [4]:
interactions = pd.read_csv('../student_data/interactions_val.csv').fillna(2.5)
rec_sys.evaluate_ndcg(test_interactions=interactions)

np.float64(0.5154746497720194)

In [5]:
interactions = pd.read_csv('../student_data/interactions_val_cold.csv').fillna(2.5)
rec_sys.evaluate_ndcg(interactions)

np.float64(0.41526512777497615)