In [30]:
import pandas as pd
import numpy as np
from scipy import sparse
from tqdm import tqdm
import json

# Constants
k_neighbors = 19
n_recommendations = 10

In [31]:
class CollaborativeFiltering:
    """
    Collaborative Filtering class for User-User or Item-Item predictions.
    """
    def __init__(self, Y_data: np.ndarray, n_entities: int, n_others: int, experienced, k: int, sim_func = np.dot, mode='none') -> None:
        """
        Initialize the collaborative filtering model.
        
        Parameters:
        - k: number of nearest neighbors to consider for predictions
        - sim_func: similarity function
        - mode: 'user' for User-User CF, 'item' for Item-Item CF
        """
        if mode != 'user' and mode != 'item':
            raise ValueError("Mode must be 'user' or 'item'")
        self.Ybar = Y_data
        self.n_entities = n_entities  # Number of unique targets
        self.n_items = n_others # Number of unique others
        self.experienced = experienced
        self.k = k
        self.sim_func = sim_func
        self.mode = mode

    def fit(self) -> None:
        """
        Compute the similarity matrix.
        """

        self.Ybar = sparse.coo_matrix(self.Ybar).tocsr() # Create a sparse matrix
        if self.mode == 'user':
            print("Computing User-User CF Similarity Matrix...")
            self.S = self.sim_func(self.Ybar, self.Ybar.T)
        else:  # mode == 'item'
            print("Computing Item-Item CF Similarity Matrix...")
            self.S = self.sim_func(self.Ybar.T, self.Ybar)

In [32]:
def read_json(file):
    with open(file) as f:
        return json.load(f)

# Reading data file:
dataset = read_json("ml_100k.json")

# Getting users and movies lists
n_movies = 0
n_users = 0
desire = list() # movies each user would watch
movies_set = set()
movies_id = dict()
for elem in dataset:
    desire.append(elem[1])
    watched = elem[0].split(' | ')
    for movie in watched:
        if movie not in movies_set:
            movies_set.add(movie)
            movies_id[movie] = n_movies
            n_movies += 1
    n_users += 1

# We turn the dataset to user-item matrix (to apply the book's code)
ui_matrix = np.zeros((n_users, n_movies), dtype=int)

movies_watched = list()
users_liked = list([] for _ in range(n_movies))
index = 0
for elem in dataset:
    watched = elem[0].split(' | ')
    movies_watched.append(watched)
    for movie in watched:
        id = movies_id[movie]
        ui_matrix[index, id] = 1
        users_liked[id].append(index)
    index += 1
    
print("Number of different movies: ", n_movies)
print("Number of different users: ", n_users)
print("Answers for evaluation:", desire)
print("User-Item Matrix: ", ui_matrix)

Number of different movies:  1493
Number of different users:  943
Answers for evaluation: ['Starship Troopers', 'Primary Colors', 'Rosencrantz and Guildenstern Are Dead', 'Bean', 'Dark City', 'The Bridge on the River Kwai', 'Men in Black', 'Mimic', '2 Days in the Valley', 'The Lion King', "It's a Wonderful Life", 'Striptease', 'Escape from L.A.', "William Shakespeare's Romeo and Juliet", 'The Professional', 'Johnny Mnemonic', 'Under Siege 2: Dark Territory', 'Lost Highway', 'Speed', 'The Fifth Element', 'Congo', 'Body Parts', 'The Replacement Killers', 'Air Force One', 'The Aristocats', 'Price Above Rubies, A', 'Sleepless in Seattle', 'Rumble in the Bronx', 'The Apostle', 'Twelve Monkeys', 'Toy Story', 'Foreign Correspondent', 'Volcano', 'Jackie Brown', "Jackie Chan's First Strike", 'Fear', 'The Craft', 'Screamers', 'Willy Wonka and the Chocolate Factory', 'Forbidden Planet', 'The Crying Game', 'The River Wild', 'Independence Day (ID4)', 'Scream 2', 'Stand by Me', 'The Full Monty', 'Fr

In [33]:
# User-User CF
cf_user = CollaborativeFiltering(ui_matrix, n_users, n_movies, movies_watched, k_neighbors, mode='user')
cf_user.fit()
# cf_user.evaluate_recommendations(desire, n_recommendations)

# Item-Item CF
cf_item = CollaborativeFiltering(ui_matrix, n_movies, n_users, users_liked, k_neighbors, mode='item')
cf_item.fit()
# cf_item.evaluate_recommendations(desire, n_recommendations)

Computing User-User CF Similarity Matrix...
Computing Item-Item CF Similarity Matrix...
