In [2]:
import pandas as pd
from lenskit import Recommender
from lenskit.algorithms.user_knn import UserUser
from lenskit.algorithms.item_knn import ItemItem
from lenskit.algorithms.bias import Bias

import pickle

In [3]:
ratings = pd.read_csv("./dataset/XWines_Slim_150K_ratings.csv")
wines = pd.read_csv("./dataset/XWines_Slim_1K_wines.csv")

  ratings = pd.read_csv("./dataset/XWines_Slim_150K_ratings.csv")


In [4]:
ratings = ratings.rename(columns={'UserID': 'user', 'WineID': 'item', 'Rating': 'rating', 'Date': 'timestamp'})
wines = wines.rename(columns={'WineID': 'item'})

In [5]:
ratings_data = ratings[['user', 'item', 'rating', 'timestamp']]


In [6]:
display(ratings_data.head)

<bound method NDFrame.head of            user    item  rating            timestamp
0       1356810  103471     4.5  2021-11-02 20:52:59
1       1173759  111415     5.0  2015-08-20 17:46:26
2       1164877  111395     5.0  2020-11-13 05:40:26
3       1207665  111433     5.0  2017-05-05 06:44:13
4       1075841  111431     5.0  2016-09-14 20:18:38
...         ...     ...     ...                  ...
149995  1000052  111468     4.5  2021-12-22 21:03:51
149996  1180844  111461     4.0  2017-04-23 21:07:55
149997  1218581  113690     3.5  2019-04-14 17:45:08
149998  1106198  111468     4.5  2021-07-10 07:00:15
149999  1059173  111479     5.0  2019-09-08 15:42:28

[150000 rows x 4 columns]>

In [7]:
# bias_model = Bias(items=True, users=True, damping=0.0)
# recs_bias = Recommender.adapt(bias_model)
# recs_bias.fit(ratings_data)

In [8]:
# with open('simple_bias_slim.pkl', 'wb') as file:
#     pickle.dump(recs_bias, file)

In [9]:
# with open('models/simple_uu_slim.pkl', 'rb') as file:
#     loaded_recs = pickle.load(file)

In [11]:
from Recommender import Recommender
# Create an instance of the Recommender class
recommender = Recommender()
recommender.load_model("user-user")

# Randomly sample 5 rows from ratings_data
test_data = ratings_data[['user']].sample(n=5, random_state=42)

# Test the predict method
predictions_df = recommender.predict(test_data)
print(predictions_df)


INFO:root:Loading model: user-user


KeyError: "['item'] not in index"

In [12]:
recommender = Recommender()
recommender.load_model("user-user")  # or "item-item"
group_recommendations = recommender.group_recommend(user_ids=[1356810, 1180844], n=5)


INFO:root:Loading model: user-user


In [13]:
print(group_recommendations)

     item     score     user
0  182711  4.775110  1356810
1  183379  4.766891  1356810
2  144337  4.766091  1356810
3  175514  4.737082  1356810
4  192584  4.735993  1356810
5  184745  4.634452  1180844
6  112664  4.616522  1180844
7  104821  4.525125  1180844
8  194674  4.500739  1180844
9  112084  4.497325  1180844


In [14]:
from typing import List, Dict

def generate_personalized_explanation(user_id: int, recommended_wine: pd.Series, user_ratings: pd.DataFrame, all_wines: pd.DataFrame) -> Dict[str, str]:
    """
    Generate a personalized explanation for a wine recommendation.

    Args:
        user_id (int): The ID of the user receiving the recommendation.
        recommended_wine (pd.Series): The recommended wine's features.
        user_ratings (pd.DataFrame): The user's rating history.
        all_wines (pd.DataFrame): DataFrame containing all wines and their features.

    Returns:
        Dict[str, str]: A dictionary of personalized explanations for different features.
    """
    liked_wines = all_wines[all_wines['item'].isin(user_ratings[user_ratings['rating'] >= 4]['item'])]
    
    explanations = {}
    
    if len(liked_wines) > 2:
        # Function to check if a feature is present in more than 80% of liked wines
        def is_frequent_feature(feature_name):
            feature_counts = liked_wines[feature_name].value_counts(normalize=True)
            return feature_counts[feature_counts >= 0.8].index.tolist()
        
        # Strategy for string features: Type, Elaborate, Body, Acidity, Country, RegionName, WineryName
        string_features = ['Type', 'Elaborate', 'Body', 'Acidity', 'Country', 'RegionName', 'WineryName']
        for feature in string_features:
            frequent_values = is_frequent_feature(feature)
            if recommended_wine[feature] in frequent_values:
                explanations[feature] = f"You've shown a strong preference for {recommended_wine[feature]} in {feature.lower()}, which this wine matches."
        
        # Strategy for ABV
        avg_abv = liked_wines['ABV'].mean()
        if abs(recommended_wine['ABV'] - avg_abv) <= 1:
            explanations['ABV'] = f"This wine's ABV ({recommended_wine['ABV']}%) is similar to your preferred average of {avg_abv:.1f}%."
        
        # Strategy for Grapes and Harmonize
        def process_list_feature(feature_name):
            user_items = [item for items in liked_wines[feature_name] for item in items]
            recommended_items = recommended_wine[feature_name]
            common_items = set(user_items) & set(recommended_items)
            if common_items:
                if feature_name == 'Grapes':
                    return f"This wine features {', '.join(common_items)} grapes, which are also present among your favourite wines."
                elif feature_name == 'Harmonize':
                    return f"This wine harmonizes well with {', '.join(common_items)}, which you've enjoyed in other wines."

            return None
        
        for feature in ['Grapes', 'Harmonize']:
            explanation = process_list_feature(feature)
            if explanation:
                explanations[feature] = explanation
    
    else:
        explanations['general'] = "We don't have enough information about your preferences yet, but we think you might enjoy this wine based on its overall characteristics."
    
    return explanations

In [15]:
import ast

wines['Grapes'] = wines['Grapes'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
wines['Harmonize'] = wines['Harmonize'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

In [16]:
def test_explanation_generation_for_user(recommender: Recommender, wines: pd.DataFrame, rating_data: pd.DataFrame, user_id: int):
    """
    Test the generate_personalized_explanation function for a specific user.

    Args:
        recommender (Recommender): An instance of the Recommender class.
        wines (pd.DataFrame): DataFrame containing all wines and their features.
        rating_data (pd.DataFrame): DataFrame containing all users' ratings.
        user_id (int): The specific user ID to test.
    """
    print(f"Testing explanations for User ID: {user_id}")

    # Generate recommendations for this user
    n_recommendations = 3
    recommendations = recommender.recommend(user_id, n_recommendations)

    # Get user ratings
    user_ratings = rating_data[rating_data['user'] == user_id]

    # Generate and print explanations for each recommendation
    for _, recommendation in recommendations.iterrows():
        wine_id = recommendation['item']
        recommended_wine = wines[wines['item'] == wine_id].iloc[0]
        
        explanations = generate_personalized_explanation(user_id, recommended_wine, user_ratings, wines)
        
        print(f"\nExplanations for recommended wine (ID: {wine_id}):")
        for feature, explanation in explanations.items():
            if explanation:  # Only print non-empty explanations
                print(f"  {feature}: {explanation}")

In [17]:

recommender.load_model("user-user")

test_data = ratings_data[['user']].sample(n=5, random_state=42)
for user_id in test_data['user']:
    test_explanation_generation_for_user(recommender, wines, ratings_data, user_id)

INFO:root:Loading model: user-user


Testing explanations for User ID: 1234318

Explanations for recommended wine (ID: 112084.0):
  Acidity: You've shown a strong preference for High in acidity, which this wine matches.
  ABV: This wine's ABV (12.5%) is similar to your preferred average of 13.4%.
  Grapes: This wine features Chardonnay, Pinot Noir grapes, which are also present among your favourite wines.
  Harmonize: This wine harmonizes well with Soft Cheese, Rich Fish, Shellfish, Pork, which you've enjoyed in other wines.

Explanations for recommended wine (ID: 111415.0):
  Acidity: You've shown a strong preference for High in acidity, which this wine matches.
  ABV: This wine's ABV (13.0%) is similar to your preferred average of 13.4%.
  Grapes: This wine features Merlot, Cabernet Sauvignon grapes, which are also present among your favourite wines.
  Harmonize: This wine harmonizes well with Lamb, Game Meat, Poultry, Beef, which you've enjoyed in other wines.

Explanations for recommended wine (ID: 113321.0):
  Acidit

In [29]:
import random

def random_explanation(recommended_wine: pd.Series) -> str:
    """
    Generate a random explanation for a wine recommendation.

    Args:
        recommended_wine (pd.Series): The recommended wine's features.

    Returns:
        str: A randomly generated explanation for the wine recommendation.
    """
    explanation_types = [
        lambda: f"We recommend this wine because it's a {recommended_wine['Type'].lower()} wine which you might like.",
        lambda: f"This wine has an ABV of {recommended_wine['ABV']}%, which many people enjoy.",
        lambda: f"Coming from {recommended_wine['Country']}, this wine offers a unique taste experience.",
        lambda: f"The {recommended_wine['Acidity'].lower()} acidity of this wine complements many dishes.",
        lambda: f"This wine features {', '.join(recommended_wine['Grapes'][:2])} grapes, creating a delightful flavor profile.",
        lambda: f"Many people have enjoyed wines from the {recommended_wine['RegionName']} region.",
        lambda: f"The {recommended_wine['Elaborate']} wine has received positive feedback from other users.",
        lambda: f"The {recommended_wine['WineryName']} winery is known for producing high-quality wines.",
        lambda: f"This wine pairs well with {', '.join(recommended_wine['Harmonize'][:2])}, making it versatile for various occasions."
    ]

    # Randomly select 1 to 3 explanations
    num_explanations = random.randint(1, 3)
    selected_explanations = random.sample(explanation_types, num_explanations)
    
    return " ".join(explanation() for explanation in selected_explanations)

In [31]:
def test_random_explanation_generation(recommender: Recommender, wines: pd.DataFrame, rating_data: pd.DataFrame, num_users: int = 5):
    """
    Test the random_explanation function for a set of random users.

    Args:
        recommender (Recommender): An instance of the Recommender class.
        wines (pd.DataFrame): DataFrame containing all wines and their features.
        rating_data (pd.DataFrame): DataFrame containing all users' ratings.
        num_users (int): The number of random users to test.
    """
    print(f"Testing random explanations for {num_users} users")

    # Get random user IDs
    random_users = rating_data['user'].sample(n=num_users, random_state=42)

    for user_id in random_users:
        print(f"\nTesting explanations for User ID: {user_id}")

        # Generate recommendations for this user
        n_recommendations = 3
        recommendations = recommender.recommend(user_id, n_recommendations)

        # Generate and print random explanations for each recommendation
        for _, recommendation in recommendations.iterrows():
            wine_id = recommendation['item']
            recommended_wine = wines[wines['item'] == wine_id].iloc[0]
            
            explanation = random_explanation(recommended_wine)
            
            print(f"\nRandom explanation for recommended wine (ID: {wine_id}):")
            print(f"  {explanation}")


In [32]:
test_random_explanation_generation(recommender, wines, ratings_data, num_users=5)

Testing random explanations for 5 users

Testing explanations for User ID: 1234318

Random explanation for recommended wine (ID: 112084.0):
  We recommend this wine because it's a sparkling wine which you might like.

Random explanation for recommended wine (ID: 111415.0):
  Coming from France, this wine offers a unique taste experience. The high acidity of this wine complements many dishes. The Assemblage/Bordeaux Red Blend wine has received positive feedback from other users.

Random explanation for recommended wine (ID: 113321.0):
  We recommend this wine because it's a sparkling wine which you might like. The high acidity of this wine complements many dishes.

Testing explanations for User ID: 1210668

Random explanation for recommended wine (ID: 186159.0):
  This wine features Syrah/Shiraz grapes, creating a delightful flavor profile.

Random explanation for recommended wine (ID: 112084.0):
  The high acidity of this wine complements many dishes. The Assemblage/Blend wine has rece