In [1]:
# imports
import pandas as pd
import numpy as np
from IPython.core.debugger import set_trace
import matplotlib.pyplot as plt
import subprocess

# Export notebook as python script to the ../python-code - folder

In [None]:
subprocess.run("jupyter nbcbonvert --output-dir='../python-code' --to python Naive_Bayes.ipynb --TemplateExporter.exclude_markdown=True --TemplateExporter.exclude_input_prompt=True", shell=True)

In [None]:
class Naive_Bayes_CF():
    """
    Class representing a Naive - Bayes classifier implementation for the collaborative filterting setting of recommender systems.
    """
    def __init__(self, rmh, is_task_conviction:bool=True):
        super().__init__()
        self.rmh_ = rmh
        if is_task_conviction:
            self.possibles_classes = set([0,1])
        else:
            self.possibles_classes = set([i for i in range(7)])
    
    def build_lookups(self) -> None:
        """
        Map users and items to numerical values for further indexing.
        """
        self.userid_lookup_ = {username: i for i, username in enumerate(self.rmh_.final_rating_matrix_w_usernames["username"])}
        self.itemid_lookup_ = {item: i-1 for i, item in enumerate(list(self.rmh_.final_rating_matrix_w_usernames.columns))}
        # Reverse the two calculated mappings for bidirectional lookup
        self.username_lookup = {user_id: username for username, user_id in self.userid_lookup_.items()}
        self.itemname_lookup = {item_id: itemname for itemname, item_id in self.itemid_lookup_.items()}
    
    def compute_prior_prob(self) -> None:
        """
        Compute the prior probability for every item/rating combination.
        """
        # Use rating matrix without username column as it is not relevant for the prior probability computation
        temp_rating_matrix = self.rmh_.final_rating_matrix_w_usernames.drop("username", axis=1).values
        items_without_username = set([self.itemid_lookup_[item] for item in self.itemid_lookup_.keys() if item != "username"])
        # Build dictionary to hold the prior probabilities for all item/class combinations
        self.prior_prob_for_item = {item_id: {} for item_id in items_without_username}
        for item_id in items_without_username:
            for c in self.possibles_classes:
                # Calculate the number of users that rated the item with class c
                class_count = len(temp_rating_matrix[temp_rating_matrix[:,item_id] == c])
                # Calculate all the users that gave a rating for the item
                rated_count = np.sum(~np.isnan(temp_rating_matrix[:,item_id]))
                self.prior_prob_for_item[item_id][c] = class_count / rated_count
    