In [None]:

import numpy as np
import pandas as pd

from gensim.models import Word2Vec
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from typing import List, Dict, Tuple
import random
import pickle
import pprint
import spacy

from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy


product_data = {
    "data_context": "books",
    "product_filepath": "data/products_books_v1_10_10.csv",
    "transactions_filepath": "data/transactions_books_v1_10_10.csv",
    "features": ["product_title", "product_image", "product_soup", "product_images"],
    "version": "1.0",
    "unique_name": "_books_v1_10_10",
}

print("looking at", "../" + product_data["product_filepath"])

productdf =  pd.read_csv("../" + product_data["product_filepath"])
transactiondf = pd.read_csv("../" + product_data["transactions_filepath"])


print(len(transactiondf))
productdf.head()
transactiondf.head()


In [None]:

class RecommendationAbstract():
    strategy_name: str = "REQUIRES IMPLEMENTATION"
    version: str = "REQUIRES IMPLEMENTATION"
    details: str = "REQUIRES IMPLEMENTATION"
    link: str = "REQUIRES IMPLEMENTATION"
    supports_single_recommendation: bool = "REQUIRES IMPLEMENTATION"
    supports_past_recommendation: bool = "REQUIRES IMPLEMENTATION"

    def __init__(self, products, product_data):
        self.products = products
        self.product_data = product_data
        self.model = None
        # populate id_to_products
        self.id_to_products = {}
        for product in self.products.to_dict(orient='records'):
            self.id_to_products[product['id']] = product

    def loadModel(self, model_code):
        """
        Load the model
        """
        self.model = model_code

    def train(self, verbose=False, transactions_train=None, users_train=None):
        """
        Train the model
        """
        # ... do training
        # self.model = trained_model
        
    def get_random_recommendation(self, n=1):
        """
        Get random recommendations
        """
        # Select n random rows from the DataFrame
        random_rows = self.products.sample(n)
        # Convert the selected rows to a list of dictionaries
        random_recommendations = random_rows.to_dict(orient='records')
        return random_recommendations



    def saveModel(self, model_code):
        """
        Save the model
        """
        # ... saves the model

    def id_to_productDetail(self, product_id: str) -> Dict[str, str]:
        """
        Return product details based on product id.
        """
        return self.id_to_products.get(product_id)

    def ids_to_products(self, ids: List[str]) -> List[Dict[str, str]]:
        """
        Return product details for a list of product ids.
        """
        return [self.id_to_productDetail(id) for id in ids]

    def like(self, keyword: str) -> List[str]:
        """
        Return a list of products that contain the given keyword in their title.
        """
        return [product for product in self.products if keyword in product['product_title']]

    def recommend_from_single(self, product_id: str, n=5) -> List[str]:
        """
        Return recommendations based on a single product.
        """
        target_name = self.id_to_productDetail(product_id)['product_title']
        keywords = target_name.split(" ")
        recommendations = []
        for keyword in keywords:
            recommendations.extend(self.like(keyword))
        
        random.shuffle(recommendations)
        return recommendations[:n]

    def recommend_from_past(self, user_transactions, n=10) -> List[str]:
        """
        Return recommendations based on past user transactions.
        """
        rec = []
        for transaction in user_transactions:
            rec.extend(self.recommend_from_single(transaction['product_id']))
        random.shuffle(rec)
        return rec[:n]

In [None]:

from surprise import KNNWithZScore

class KNNWithZScoreRecommender(RecommendationAbstract):
    strategy_name: str = "KNN With Means"
    slug_name: str = "knn_with_means"
    version: str = "v1"
    details: str = "REQUIRES IMPLEMENTATION"
    link: str = "REQUIRES IMPLEMENTATION"
    supports_single_recommendation: bool = True
    supports_past_recommendation: bool = True
    
    def __init__(self, products: pd.DataFrame, product_data: dict):
        super().__init__(products, product_data)
        self.products = products
        self.model = None
        
        # Get the product ids and store them.
        self.product_ids = self.products['id'].unique()
        self.all_transactions_df = None
        
    def train(self, transactions, auto_save=True, dont_save_self_state=False) :
        
        sim_options = {"name": "pearson_baseline", "user_based": False}
        model = KNNWithZScore(sim_options=sim_options)
        
        reader = Reader(rating_scale=(1, 5))
        
        data = Dataset.load_from_df(transactions[['user_id', 'product_id', 'rate']], reader)
        
        model.fit(data.build_full_trainset())
        
        if dont_save_self_state:
            return model
        
        self.model = model
        self.all_transactions_df = transactions
        # self.accuracy = accuracy.rmse(model.test(data.build_full_trainset().build_testset()), verbose=True)
        
        if auto_save:
            self.save()
            
        return model
        
        
    def get_filename(self):
        return "models/" + self.slug_name + self.product_data["unique_name"] + ".pik"
    
    def save(self):
        # Store self.pt
        filename = self.get_filename()
        model_file = open(filename, 'wb')
        pickle.dump(self.model, model_file)
        model_file.close()
        
    def load(self):
        filename = self.get_filename()
        model_file = open(filename, 'rb')
        self.model = pickle.load(model_file)
        model_file.close()
        

    def recommend_from_single(self, product_id: str, n=5) -> List[Tuple[dict, float]]:
        """
        
        # Retrieve inner ids of the nearest neighbors of Toy Story.
        toy_story_neighbors = algo.get_neighbors(toy_story_inner_id, k=10)
        """
        recommendation_list: List[tuple[dict, float]] = []
        
        product_inner_id = self.model.trainset.to_inner_iid(product_id)
        
        neighbors = self.model.get_neighbors(product_inner_id, k=n)
        
        for neighbor in neighbors:
            product_serie = self.products.iloc[neighbor]
            product = product_serie.to_dict()
            recommendation_list.append((product, 1.0))
        
        return recommendation_list[:n]

    def collaborativestore_predict_population(self, transactions: List[str], n=5):
        """
        Adds the transactions to the use history
        'user_id', 'product_id', 'rate'
        """
        # Add transactions to the self.transactions_df as a new user
        transaction_rows = []
        random_user_id = "user" + str(random.randint(0, 1000000))
        for transaction in transactions:
            transaction_rows.append({'user_id': 'user_id', 'product_id': transaction, 'rate': 5})
        
        # Convert to a DataFrame
        new_transactions_df = pd.DataFrame(transaction_rows)

        # Append using concat
        all_transactions_df: pd.Dataframe = pd.concat([self.all_transactions_df, new_transactions_df], ignore_index=True)
        
        model = self.train(all_transactions_df, dont_save_self_state=True)
        
        return self.predict_recommendations(random_user_id, transactions, model, n)
    
    def predict_recommendations(self, user_id: str, transactions: List[str], model, n=5):
        books_to_predict = [book_id for book_id in self.product_ids if book_id not in transactions]
        predictions = []
        
        for book_id in books_to_predict:
            pred = model.predict(user_id, book_id)
            predictions.append((book_id, pred.est))
        
        pred_products = []
        # sort predictions
        predictions.sort(key=lambda x: x[1], reverse=True)
        for book_id, confidence in predictions[:n]:
            product = self.id_to_products[book_id]
            pred_products.append(product)
            
        return pred_products
        

    def recommend_from_past(self, transactions: List[str], n=10):
        """
        Calls for each transaction the recommend_from_single method.
        Gives Priority if seen multiple recommendations.
        Shuffle and returns :n
        """
        recs = set()
        recs_seen_times = {}
        products_dictionary = {}
        
        if(len(transactions) > 2):
            return self.collaborativestore_predict_population(
                transactions, n=n
            )
        
        for transaction in transactions:
            recs = self.recommend_from_single(transaction)
            for rec_id, confidence in recs:
                
                if rec_id in recs:
                    recs_seen_times[rec_id['id']] += 1
                else:
                    products_dictionary[rec_id['id']] = rec_id
                    recs_seen_times[rec_id['id']] = 1
        
        for rec_id in recs_seen_times:
            recs.append((products_dictionary[rec_id], recs_seen_times[rec_id]))
            
        recs = list(recs)
        # sort
        
        recs.sort(key=lambda x: x[1], reverse=True)
        return recs
    

In [None]:

engineRec = KNNWithZScoreRecommender(productdf, product_data)
engineRec.train(transactions=transactiondf, auto_save=True)
# engineRec.load()
  
  

randomProduct = engineRec.get_random_recommendation()[0]
pprint.pprint(randomProduct)

print('======== RECOMENDATIONS SINGLE CASE =========== ')
rec = engineRec.recommend_from_single(randomProduct['id'])
pprint.pprint(rec)
