In [2]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, average_precision_score
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split

In [6]:
class HybridRecommendationModel:
    def __init__(self, customers_file, products_file, ratings_file, n_neighbors=5, metric='euclidean'):
        self.customers_file = customers_file
        self.products_file = products_file
        self.ratings_file = ratings_file
        self.n_neighbors = n_neighbors
        self.metric = metric
        self.label_encoder = LabelEncoder()
        self.knn_model = None
        self.merged_df = None

    def load_data(self):
        customers_df = pd.read_json(self.customers_file)
        products_df = pd.read_json(self.products_file)
        ratings_df = pd.read_json(self.ratings_file)

        customers_df.rename(columns={'Id': 'CustomerID'}, inplace=True)
        products_df.rename(columns={'Id': 'ProductID'}, inplace=True)

        # Merge ratings data with customer and product data
        self.merged_df = ratings_df.merge(customers_df, on='CustomerID')
        self.merged_df = self.merged_df.merge(products_df, on='ProductID')

        # Encode categorical features like 'age' and 'region'
        self.merged_df['Age'] = self.label_encoder.fit_transform(self.merged_df['Age'])
        self.merged_df['Region'] = self.label_encoder.fit_transform(self.merged_df['Region'])
        self.merged_df['Category'] = self.label_encoder.fit_transform(self.merged_df['Category'])

    def train_content_based_model(self):
        X = self.merged_df[['Category', 'Age', 'Region']]
        self.knn_model = NearestNeighbors(n_neighbors=self.n_neighbors, metric=self.metric, algorithm='brute')
        self.knn_model.fit(X)

    def hybrid_recommendations(self, user_id, num_recommendations=5):
        if self.knn_model is None:
            raise Exception("Content-based model has not been trained. Call train_content_based_model() first.")
        
        # Use collaborative filtering to get initial recommendations (replace this with actual collaborative filtering)
        collaborative_filtering_recommendations = self.get_collaborative_filtering_recommendations(user_id, num_recommendations)

        # For each collaborative filtering recommendation, get content-based recommendations
        hybrid_recommendations = []
        for product_id in collaborative_filtering_recommendations:
            product_features = self.get_product_features(product_id)
            content_based_recommendations = self.get_content_based_recommendations(product_features, num_recommendations)
            hybrid_recommendations.extend(content_based_recommendations)

        # Remove duplicates and keep the top N recommendations
        unique_hybrid_recommendations = list(set(hybrid_recommendations))
        return unique_hybrid_recommendations[:num_recommendations]

    def evaluate_recommendations(self, user_id, true_product_ids, num_recommendations=5):
        hybrid_recommendations = self.hybrid_recommendations(user_id, num_recommendations)

        # Evaluate recommendations
        hits = [product_id in hybrid_recommendations for product_id in true_product_ids]
        precision = sum(hits) / num_recommendations
        recall = sum(hits) / len(true_product_ids)
        mrr = self.calculate_mrr(hits)
        ndcg = self.calculate_ndcg(hybrid_recommendations, true_product_ids)

        return precision, recall, mrr, ndcg

    def calculate_mrr(self, hits):
        for i, hit in enumerate(hits):
            if hit:
                return 1 / (i + 1)
        return 0

    def calculate_ndcg(self, recommended_products, true_product_ids):
        dcg = 0
        idcg = sum(1 / np.log2(i + 2) for i in range(len(true_product_ids)))
        for i, product_id in enumerate(recommended_products):
            if product_id in true_product_ids:
                dcg += 1 / np.log2(i + 2)
        return dcg / idcg

    def get_collaborative_filtering_recommendations(self, user_id, num_recommendations):
        # Implement collaborative filtering recommendation generation here
        # You can use methods such as matrix factorization or user-item collaborative filtering to generate recommendations
        # Return a list of recommended product IDs
        # Load data for Surprise
        reader = Reader(rating_scale=(1, 5))
        data = Dataset.load_from_df(self.merged_df[['CustomerID', 'ProductID','Category', 'Age', 'Region', 'Rate']], reader)

        # Split data into train and test sets
        trainset, testset = train_test_split(data, test_size=0.2)

        # Build and train the KNNBasic collaborative filtering model
        sim_options = {'name': 'cosine', 'user_based': True}
        self.cf_model = KNNBasic(sim_options=sim_options)
        self.cf_model.fit(trainset)

    def get_content_based_recommendations(self, product_features, num_recommendations):
        if self.knn_model is None:
            raise Exception("Content-based model has not been trained. Call train_content_based_model() first.")
        
        distances, indices = self.knn_model.kneighbors([product_features], n_neighbors=num_recommendations)
        recommended_products = []
        for index in indices[0]:
            recommended_products.append(self.merged_df.iloc[index]['ProductID'])
        return recommended_products

    def get_product_features(self, product_id):
        # Retrieve product features (category, age, region) based on product_id
        product_data = self.merged_df[self.merged_df['ProductID'] == product_id]
        product_features = product_data[['Category', 'Age', 'Region']].values[0]
        return product_features


In [7]:
if __name__ == "__main__":
    # Example usage of the HybridRecommendationModel class
    customers_file = 'input/dataset/customers.json'
    products_file = 'input/dataset/products.json'
    ratings_file = 'input/dataset/ratings.json'

    hybrid_model = HybridRecommendationModel(customers_file, products_file, ratings_file)
    hybrid_model.load_data()
    hybrid_model.train_content_based_model()
    # Implement training of collaborative filtering model here (not shown in this example)

    # Example: Evaluate hybrid recommendations for a specific user
    user_id = 1  # Replace with the actual user ID
    true_product_ids = [2, 5, 8, 10]  # Replace with the actual true product IDs for the user
    num_recommendations = 5

    precision, recall, mrr, ndcg = hybrid_model.evaluate_recommendations(user_id, true_product_ids, num_recommendations)

    # Display evaluation results
    print("Precision:", precision)
    print("Recall:", recall)
    print("Mean Reciprocal Rank (MRR):", mrr)
    print("Normalized Discounted Cumulative Gain (NDCG):", ndcg)

ValueError: too many values to unpack (expected 3)