In [1]:

import pandas as pd


from typing import List, Dict, Tuple
import random
import pickle
import pprint

from surprise import KNNBasic
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

product_data = {
    "data_context": "books",
    "product_filepath": "data/products_books_v1_10_10.csv",
    "transactions_filepath": "data/transactions_books_v1_10_10.csv",
    "features": ["product_title", "product_image", "product_soup", "product_images"],
    "version": "1.0",
    "unique_name": "_books_v1_10_10",
}

print("looking at", "../" + product_data["product_filepath"])

productdf =  pd.read_csv("../" + product_data["product_filepath"])
transactiondf = pd.read_csv("../" + product_data["transactions_filepath"])


print(len(transactiondf))
productdf.head()
transactiondf.head()


looking at ../data/products_books_v1_10_10.csv
381082


Unnamed: 0,id,user_id,product_id,rate
0,e84cbad6-c5a7-49b1-9eb2-558a648998d6,276847,446364193,0
1,e84cbad6-c5a7-49b1-9eb2-558a648998d6,276847,3379015180,0
2,e84cbad6-c5a7-49b1-9eb2-558a648998d6,276847,3404148576,4
3,e84cbad6-c5a7-49b1-9eb2-558a648998d6,276847,3423071516,5
4,e84cbad6-c5a7-49b1-9eb2-558a648998d6,276847,3442413508,5


In [2]:

class RecommendationAbstract():
    strategy_name: str = "REQUIRES IMPLEMENTATION"
    version: str = "REQUIRES IMPLEMENTATION"
    details: str = "REQUIRES IMPLEMENTATION"
    link: str = "REQUIRES IMPLEMENTATION"
    supports_single_recommendation: bool = "REQUIRES IMPLEMENTATION"
    supports_past_recommendation: bool = "REQUIRES IMPLEMENTATION"

    def __init__(self, products, product_data):
        self.products = products
        self.product_data = product_data
        self.model = None
        # populate id_to_products
        self.id_to_products = {}
        for product in self.products.to_dict(orient='records'):
            self.id_to_products[product['id']] = product

    def loadModel(self, model_code):
        """
        Load the model
        """
        self.model = model_code

    def train(self, verbose=False, transactions_train=None, users_train=None):
        """
        Train the model
        """
        # ... do training
        # self.model = trained_model
        
    def get_random_recommendation(self, n=1):
        """
        Get random recommendations
        """
        # Select n random rows from the DataFrame
        random_rows = self.products.sample(n)
        # Convert the selected rows to a list of dictionaries
        random_recommendations = random_rows.to_dict(orient='records')
        return random_recommendations



    def saveModel(self, model_code):
        """
        Save the model
        """
        # ... saves the model

    def id_to_productDetail(self, product_id: str) -> Dict[str, str]:
        """
        Return product details based on product id.
        """
        return self.id_to_products.get(product_id)

    def ids_to_products(self, ids: List[str]) -> List[Dict[str, str]]:
        """
        Return product details for a list of product ids.
        """
        return [self.id_to_productDetail(id) for id in ids]

    def like(self, keyword: str) -> List[str]:
        """
        Return a list of products that contain the given keyword in their title.
        """
        return [product for product in self.products if keyword in product['product_title']]

    def recommend_from_single(self, product_id: str, n=5) -> List[str]:
        """
        Return recommendations based on a single product.
        """
        target_name = self.id_to_productDetail(product_id)['product_title']
        keywords = target_name.split(" ")
        recommendations = []
        for keyword in keywords:
            recommendations.extend(self.like(keyword))
        
        random.shuffle(recommendations)
        return recommendations[:n]

    def recommend_from_past(self, user_transactions, n=10) -> List[str]:
        """
        Return recommendations based on past user transactions.
        """
        rec = []
        for transaction in user_transactions:
            rec.extend(self.recommend_from_single(transaction['product_id']))
        random.shuffle(rec)
        return rec[:n]

In [11]:

class SVDMatrixRecommender(RecommendationAbstract):
    strategy_name: str = "SVD Basic"
    slug_name: str = "svd_matrix"
    version: str = "v1"
    details: str = "REQUIRES IMPLEMENTATION"
    link: str = "REQUIRES IMPLEMENTATION"
    supports_single_recommendation: bool = True
    supports_past_recommendation: bool = True
    
    def __init__(self, products: pd.DataFrame, product_data: dict, transactions = None):
        super().__init__(products, product_data)
        self.products = products
        self.model = None
        
        # Get the product ids and store them.
        self.product_ids = self.products['id'].unique()
        self.all_transactions_df = transactions
        
    def train(self, transactions, auto_save=True, dont_save_self_state=False) :
        
        sim_options = {"name": "pearson_baseline", "user_based": False}
        model = KNNBasic(sim_options=sim_options)
        
        reader = Reader(rating_scale=(1, 5))
        
        data = Dataset.load_from_df(transactions[['user_id', 'product_id', 'rate']], reader)
        
        model.fit(data.build_full_trainset())
        
        if dont_save_self_state:
            return model
        
        self.model = model
        self.all_transactions_df = transactions
        # self.accuracy = accuracy.rmse(model.test(data.build_full_trainset().build_testset()), verbose=True)
        
        if auto_save:
            self.save()
            
        return model
        
        
    def get_filename(self):
        return "models/" + self.slug_name + self.product_data["unique_name"] + ".pik"
    
    def save(self):
        # Store self.pt
        filename = self.get_filename()
        model_file = open(filename, 'wb')
        pickle.dump(self.model, model_file)
        model_file.close()
        
    def load(self, auto_create=True):
        
        filename = self.get_filename()
        model_file = open(filename, 'rb')
        self.model = pickle.load(model_file)
        model_file.close()
        # try:
        # except:
        #     self.save()
            

    def recommend_from_single(self, product_id: str, n=5) -> List[Tuple[dict, float]]:
        """
        
        # To optimize things, SVD takes a Similitude type recommender. Which posses the method (receive product neighbors.)
        """
        recommendation_list: List[tuple[dict, float]] = []
        product_inner_id = self.model.trainset.to_inner_iid(product_id)
        neighbors = self.model.get_neighbors(product_inner_id, k=n*2)
        
        # for each neighbor, try to predict and prioritize given a user in all_transactions_that shared that book as well.
        for neighbor_book_inner_id in neighbors:
            # get user_id that top rated the product sort the relevant_transactions
            if neighbor_book_inner_id == product_inner_id:
                continue
            
            product_serie = self.products.iloc[neighbor_book_inner_id]
            neighbor_book_id = product_serie['id']
            relevant_transactions = self.all_transactions_df[self.all_transactions_df['product_id'] == neighbor_book_id]
            relevant_transactions = relevant_transactions.sort_values(by='rate', ascending=False)
            # remove where  product_id product_id
            if len(relevant_transactions) == 0:
                continue
            
            user_id = relevant_transactions.iloc[0]['user_id']
            
            pred = self.model.predict(user_id, neighbor_book_id)
            recommendation_list.append((self.id_to_products[neighbor_book_id], pred.est))
        
        # sort recommendations
        recommendation_list.sort(key=lambda x: x[1], reverse=True)
        return recommendation_list[:n]

    def collaborativestore_predict_population(self, transactions: List[str], n=5):
        """
        Adds the transactions to the use history to be considered when training the model. Doesnt not save the model with this transactions,
        proceeds to use the models to create recommendations. This is pattern was added for KNN and Matrix Factorizations
        """
        # Add transactions to the self.transactions_df as a new user
        transaction_rows = []
        random_user_id = "user" + str(random.randint(0, 1000000))
        for transaction in transactions:
            transaction_rows.append({'user_id': 'user_id', 'product_id': transaction, 'rate': 5})
        
        # Convert to a DataFrame
        new_transactions_df = pd.DataFrame(transaction_rows)

        # Append using concat
        all_transactions_df: pd.Dataframe = pd.concat([self.all_transactions_df, new_transactions_df], ignore_index=True)
        
        model = self.train(all_transactions_df, dont_save_self_state=True)
        
        return self.predict_recommendations(random_user_id, transactions, model, n)
    
    def predict_recommendations(self, user_id: str, transactions: List[str], model, n=5):
        books_to_predict = [book_id for book_id in self.product_ids if book_id not in transactions]
        predictions = []
        
        for book_id in books_to_predict:
            pred = model.predict(user_id, book_id)
            predictions.append((book_id, pred.est))
        
        pred_products = []
        # sort predictions
        predictions.sort(key=lambda x: x[1], reverse=True)
        for book_id, confidence in predictions[:n]:
            product = self.id_to_products[book_id]
            pred_products.append(product)
            
        return pred_products
        

    def recommend_from_past(self, transactions: List[str], n=10):
        """
        Calls for each transaction the recommend_from_single method.
        Gives Priority if seen multiple recommendations.
        Shuffle and returns :n
        """
        recs = set()
        recs_seen_times = {}
        products_dictionary = {}
        
        # Deprecated.
        # if(len(transactions) > 2):
        #     return self.collaborativestore_predict_population(
        #         transactions, n=n
        #     )
        
        for transaction in transactions:
            recs = self.recommend_from_single(transaction)
            for rec_id, confidence in recs:
                
                if rec_id in recs:
                    recs_seen_times[rec_id['id']] += 1
                else:
                    products_dictionary[rec_id['id']] = rec_id
                    recs_seen_times[rec_id['id']] = 1
        
        for rec_id in recs_seen_times:
            recs.append((products_dictionary[rec_id], recs_seen_times[rec_id]))
            
        recs = list(recs)
        
        recs.sort(key=lambda x: x[1], reverse=True)
        return recs
    

In [12]:

engineRec = SVDMatrixRecommender(productdf, product_data, transactions=transactiondf)
# engineRec.train(transactions=transactiondf, auto_save=True)
engineRec.load()
  
  

randomProduct = engineRec.get_random_recommendation()[0]
pprint.pprint(randomProduct)
SEARCH_TEST = randomProduct['id']
# SEARCH_TEST = engineRec.recommend_from_single(randomProduct['id'])
# 

# pprint.pprint(engineRec.id_to_productDetail(SEARCH_TEST))
# pprint.pprint(SEARCH_TEST)
print('======== RECOMENDATIONS SINGLE CASE =========== ')

# SEARCH_TEST = '1572971835'
rec = engineRec.recommend_from_single(SEARCH_TEST)
print('---------')
pprint.pprint(rec)


{'count': 35,
 'id': '0380788624',
 'product_id': '0380788624',
 'product_image': 'http://images.amazon.com/images/P/0380788624.01.MZZZZZZZ.jpg',
 'product_price': nan,
 'product_soup': 'Cryptonomicon Neal Stephenson Perennial',
 'product_tags': nan,
 'product_title': 'Cryptonomicon'}


AttributeError: 'NoneType' object has no attribute 'trainset'

In [25]:
  
  

# randomProduct = engineRec.get_random_recommendation()[0]
# pprint.pprint(randomProduct)

# rec = engineRec.recommend_from_single(randomProduct['id'])
# pprint.pprint(rec)
# 

SEARCH_TEST = '0590353403'
print('======== RECOMENDATIONS SINGLE CASE =========== ')

# pprint.pprint(engineRec.product_ids[SEARCH_TEST])
rec = engineRec.recommend_from_single(SEARCH_TEST)
pprint.pprint(engineRec.id_to_productDetail(SEARCH_TEST))
print('---------')
pprint.pprint(rec)


{'count': 122,
 'id': '0590353403',
 'product_id': '0590353403',
 'product_image': 'http://images.amazon.com/images/P/0590353403.01.MZZZZZZZ.jpg',
 'product_price': nan,
 'product_soup': "Harry Potter and the Sorcerer's Stone (Book 1) J. K. Rowling "
                 'Scholastic',
 'product_tags': nan,
 'product_title': "Harry Potter and the Sorcerer's Stone (Book 1)"}
---------
[({'count': 40,
   'id': '0786884142',
   'product_id': '0786884142',
   'product_image': 'http://images.amazon.com/images/P/0786884142.01.MZZZZZZZ.jpg',
   'product_price': nan,
   'product_soup': 'A Monk Swimming : A Memoir Malachy McCourt Hyperion',
   'product_tags': nan,
   'product_title': 'A Monk Swimming : A Memoir'},
  5),
 ({'count': 266,
   'id': '0446606812',
   'product_id': '0446606812',
   'product_image': 'http://images.amazon.com/images/P/0446606812.01.MZZZZZZZ.jpg',
   'product_price': nan,
   'product_soup': 'Message in a Bottle Nicholas Sparks Warner Vision',
   'product_tags': nan,
   'prod

In [26]:

# ... Repetition.
print("=============  RECOMENDATIONS RECOMMENDATIONS  ============")
tansactions = ['0590353403', '0439139597']

"""
Harry Potter and the Sorcerer's Stone (Book 1)
"Harry Potter and the Goblet of Fire (Book 4)"
"""

rec = engineRec.recommend_from_past(tansactions)
pprint.pprint(rec)


[({'count': 40,
   'id': '0786884142',
   'product_id': '0786884142',
   'product_image': 'http://images.amazon.com/images/P/0786884142.01.MZZZZZZZ.jpg',
   'product_price': nan,
   'product_soup': 'A Monk Swimming : A Memoir Malachy McCourt Hyperion',
   'product_tags': nan,
   'product_title': 'A Monk Swimming : A Memoir'},
  5),
 ({'count': 266,
   'id': '0446606812',
   'product_id': '0446606812',
   'product_image': 'http://images.amazon.com/images/P/0446606812.01.MZZZZZZZ.jpg',
   'product_price': nan,
   'product_soup': 'Message in a Bottle Nicholas Sparks Warner Vision',
   'product_tags': nan,
   'product_title': 'Message in a Bottle'},
  5),
 ({'count': 58,
   'id': '0061091790',
   'product_id': '0061091790',
   'product_image': 'http://images.amazon.com/images/P/0061091790.01.MZZZZZZZ.jpg',
   'product_price': nan,
   'product_soup': 'After All These Years Susan Isaacs HarperTorch',
   'product_tags': nan,
   'product_title': 'After All These Years'},
  5),
 ({'count': 39,
