In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, recall_score, f1_score, confusion_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
import json


In [2]:
class CollaborativeFilteringRecommendation:
    def __init__(self, k_neighbors=5):
        self.k_neighbors = k_neighbors
        self.knn = KNeighborsClassifier(n_neighbors=k_neighbors, metric='cosine')
        self.is_trained = False

    def load_data(self, customer_file, product_file, rating_file):
        with open(customer_file, 'r') as f:
            self.customer_data = json.load(f)

        with open(product_file, 'r') as f:
            self.product_data = json.load(f)

        with open(rating_file, 'r') as f:
            self.rating_data = json.load(f)

    def preprocess_data(self):
        customers_df = pd.DataFrame(self.customer_data)
        products_df = pd.DataFrame(self.product_data)
        ratings_df = pd.DataFrame(self.rating_data)
        customers_df.rename(columns={'Id': 'CustomerID'}, inplace=True)
        products_df.rename(columns={'Id': 'ProductID'}, inplace=True)
        user_item_ratings = pd.merge(ratings_df, customers_df, on='CustomerID')
        user_item_ratings = pd.merge(user_item_ratings, products_df, on='ProductID')
        # Encode categorical features like 'age' and 'region'
        label_encoder = LabelEncoder()
        user_item_ratings['Age'] = label_encoder.fit_transform(user_item_ratings['Age'])
        user_item_ratings['Region'] = label_encoder.fit_transform(user_item_ratings['Region'])
        user_item_ratings['Category'] = label_encoder.fit_transform(user_item_ratings['Category'])
        user_item_ratings['genre'] = label_encoder.fit_transform(user_item_ratings['genre'])
        self.train_data, self.test_data = train_test_split(user_item_ratings, test_size=0.2, random_state=42)

    def train_model(self):
        X_train = self.train_data[['Age', 'Region', 'Category','genre']]
        y_train = self.train_data['Rate']
        self.knn.fit(X_train, y_train)
        self.is_trained = True

    def evaluate_model(self):
        if not self.is_trained:
            print("Model is not trained. Call train_model() first.")
            return

        X_test = self.test_data[['Age', 'Region', 'Category','genre']]
        y_pred = self.knn.predict(X_test)

        print("Classification Report:")
        print(classification_report(self.test_data['Rate'], y_pred))

        print("Confusion Matrix:")
        print(confusion_matrix(self.test_data['Rate'], y_pred))

In [3]:
if __name__ == "__main__":
    recommendation_system = CollaborativeFilteringRecommendation(k_neighbors=5)
    recommendation_system.load_data('input/dataset/customers.json', 'input/dataset/products.json', 'input/dataset/ratings.json')
    recommendation_system.preprocess_data()
    recommendation_system.train_model()
    recommendation_system.evaluate_model()

        CustomerID  ProductID  Rate  \
0            41024        446     3   
1            27084        446     5   
2            17024        446     5   
3             5906        446     4   
4            39165        446     4   
...            ...        ...   ...   
699995       13834        682     3   
699996       14582        682     5   
699997        6641        682     2   
699998       12437        682     5   
699999       10832        682     5   

                                              Commentaire  \
0                  Lose indeed oil chair face throughout.   
1                                 Page newspaper mission.   
2                                      Fine oil grow her.   
3                          Never future laugh decade for.   
4                  Team blood position party note budget.   
...                                                   ...   
699995                     Certainly about try lose city.   
699996                  Color model former 