#Maximal Marginal Relevance

In [2]:
import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.impute import SimpleImputer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# Download do dataset
!pip install wget
import wget
!python3 -m wget http://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip ml-100k.zip

Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9655 sha256=723b5737448db89470661e21eca3fccab252490222ab65078370c7fffc0b0f96
  Stored in directory: /root/.cache/pip/wheels/01/46/3b/e29ffbe4ebe614ff224bad40fc6a5773a67a163251585a13a9
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2

Saved under ml-100k.zip
Archive:  ml-100k.zip
   creating: ml-100k/
  inflating: ml-100k/allbut.pl       
  inflating: ml-100k/mku.sh          
  inflating: ml-100k/README          
  inflating: ml-100k/u.data          
  inflating: ml-100k/u.genre         
  inflating: ml-100k/u.info          
  inflating: ml-100k/u.item          
  inflating: ml-100k/u.occupation    
  inflating: ml-100k/u.user          
  inflating: ml-100k/u1.base         
  

In [4]:
# Carrega o dataset
ratings = pd.read_csv(
    "ml-100k/u.data",
    sep="\t",
    names=["user_id", "item_id", "rating", "timestamp"]
)

num_users = ratings.user_id.max()
num_items = ratings.item_id.max()

# Constrói a matriz usuário-item
R = np.zeros((num_users, num_items))

for row in ratings.itertuples():
    R[row.user_id - 1, row.item_id - 1] = row.rating

user_means = np.where(R.sum(axis=1) != 0, R.sum(axis=1) / (R != 0).sum(axis=1), 0)

# Centraliza as entradas e preenche valores faltantes para o SVD
R_centered = (R - user_means.reshape(-1, 1)) * (R != 0)

imp = SimpleImputer(strategy="mean")
R_dense = imp.fit_transform(R_centered)

In [5]:
# Fatoração SVD e predições
n_factors = 40
svd = TruncatedSVD(n_components=n_factors, random_state=42)
U = svd.fit_transform(R_dense)
Sigma = svd.singular_values_
Vt = svd.components_

R_pred = U @ np.diag(Sigma) @ Vt
R_pred += user_means.reshape(-1, 1)

In [6]:
# Cálculo da similaridade item-item
Sigma_half = np.sqrt(np.diag(Sigma))
item_vectors = (Sigma_half @ Vt).T

# Normalização
item_vectors /= np.linalg.norm(item_vectors, axis=1, keepdims=True)

# Matriz de similaridade
item_sim = cosine_similarity(item_vectors)

In [7]:
# Top N do SVD
def topn_basic(user_idx, top_n=10):
    relevance = R_pred[user_idx]
    rated_mask = R[user_idx] > 0
    candidates = np.where(~rated_mask)[0]

    idx = candidates[np.argsort(relevance[candidates])[::-1]]
    return idx[:top_n]

In [8]:
# Implementação do MMR
def mmr_ranking(user_idx, lambda_mm=0.7, top_n=10):
    relevance = R_pred[user_idx]
    rated_mask = R[user_idx] > 0

    candidates = np.where(~rated_mask)[0]
    selected = []

    for _ in range(top_n):
        scores = []

        for c in candidates:
            if selected:
                diversity = np.mean([item_sim[c, s] for s in selected])
            else:
                diversity = 0

            mmr_score = lambda_mm * relevance[c] - (1 - lambda_mm) * diversity
            scores.append((mmr_score, c))

        _, best_item = max(scores)
        selected.append(best_item)
        candidates = candidates[candidates != best_item]

    return selected

In [9]:
# Métrica de diversidade, mais alto = mais diverso
def intra_list_diversity(recommended_items):
    sims = []
    for i in range(len(recommended_items)):
        for j in range(i+1, len(recommended_items)):
            sims.append(item_sim[recommended_items[i], recommended_items[j]])

    if not sims:
        return 0

    return 1 - np.mean(sims)

In [10]:
# Teste com usuário
def test(test_user):
    basic = topn_basic(test_user, top_n=10)
    mmr07 = mmr_ranking(test_user, lambda_mm=0.7, top_n=10)
    mmr03 = mmr_ranking(test_user, lambda_mm=0.3, top_n=10)
    mmr00 = mmr_ranking(test_user, lambda_mm=0.0, top_n=10)

    print("=== BASIC RECOMMENDATIONS ===")
    print(basic)
    print("ILD:", intra_list_diversity(basic))

    print("\n=== MMR λ=0.7 ===")
    print(mmr07)
    print("ILD:", intra_list_diversity(mmr07))

    print("\n=== MMR λ=0.3 ===")
    print(mmr03)
    print("ILD:", intra_list_diversity(mmr03))

    print("\n=== MMR λ=0.0 (max diversity) ===")
    print(mmr00)
    print("ILD:", intra_list_diversity(mmr00))

test(210)

=== BASIC RECOMMENDATIONS ===
[186 190 181 202   0 178 179 434  12 653]
ILD: 0.8195285159240431

=== MMR λ=0.7 ===
[np.int64(186), np.int64(190), np.int64(181), np.int64(202), np.int64(0), np.int64(178), np.int64(179), np.int64(434), np.int64(12), np.int64(317)]
ILD: 0.8404422759813547

=== MMR λ=0.3 ===
[np.int64(186), np.int64(190), np.int64(202), np.int64(0), np.int64(181), np.int64(178), np.int64(12), np.int64(193), np.int64(179), np.int64(434)]
ILD: 0.8384072531924236

=== MMR λ=0.0 (max diversity) ===
[np.int64(1681), np.int64(47), np.int64(844), np.int64(412), np.int64(134), np.int64(1079), np.int64(1525), np.int64(122), np.int64(1252), np.int64(191)]
ILD: 1.096911088424103
