In [28]:
#imports
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from collections import Counter

# DATA 612 Project 4 | Accuracy and Beyond 

### Choose a different dataset (MovieLens) to work with from your previous projects

This dataset (ml-latest-small) describes 5-star rating and free-text tagging activity from [MovieLens](http://movielens.org), a movie recommendation service. It contains 100836 ratings and 3683 tag applications across 9742 movies. These data were created by 610 users between March 29, 1996 and September 24, 2018. This dataset was generated on September 26, 2018.

In [4]:
original_movies_df = pd.read_csv('movies.csv')
original_ratings_df = pd.read_csv('ratings.csv')

In [5]:
original_movies_df

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [6]:
original_ratings_df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [10]:
# Convert to user-item matrix
user_item_matrix = original_ratings_df.pivot(index='userId', columns='movieId', values='rating')

# Optional: Fill missing values (e.g., with 0 or NaN)
user_item_matrix = user_item_matrix.fillna(0)
user_item_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 1. Compare the accuracy of at least two recommender system algorithms against your offline data. Choose two recommenders, then pick a metric. you can look at RMSE and MEA, but pick something recommender specific like novelty, diversity, serendipity. 

### Recommender 1: User-Based Collaborative Filtering (UBCF)

#### User-Based Collaborative Filtering (UBCF) recommender system using cosine similarity between users. To recommend items to a target user by:

- Finding similar users (via cosine similarity)

- Aggregating their ratings for items the target user hasn't seen

- Predicting how much the target user would like those unseen items

- Returning the top recommendation

In [64]:
# Compute cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

# Predict ratings for a target user
def predict_ratings_user_based(target_user_id):
    similar_users = user_similarity_df[target_user_id].sort_values(ascending=False)[:]
    sim_scores = similar_users.values
    sim_users = similar_users.index

    target_user_ratings = user_item_matrix.loc[target_user_id]
    predicted_ratings = {}

    for item in user_item_matrix.columns:
        if target_user_ratings[item] == 0:  # Only predict for unseen items
            numerator = 0
            denominator = 0
            for i, sim_user in enumerate(sim_users):
                rating = user_item_matrix.loc[sim_user, item]
                if rating > 0:
                    numerator += sim_scores[i] * rating
                    denominator += sim_scores[i]
            if denominator > 0:
                predicted_ratings[item] = numerator / denominator

    # Return top 5 recommendations
    return sorted(predicted_ratings.items(), key=lambda x: x[1], reverse=True)[:]


In [65]:
def train_ubcf(ratings_df):
    """
    Prepares UBCF data structures.

    Returns:
        user_item_matrix: user-item rating matrix
        similarity_df: cosine similarity between users (DataFrame)
    """
    user_item_matrix = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)
    user_similarity = cosine_similarity(user_item_matrix)
    similarity_df = pd.DataFrame(user_similarity,
                                 index=user_item_matrix.index,
                                 columns=user_item_matrix.index)
    return user_item_matrix, similarity_df

    

In [66]:
def recommend_ubcf(user_id, user_item_matrix, similarity_df, ratings_df, k=5, top_n=10):
    """
    Recommend top-N items for a user using UBCF.

    Args:
        user_id (int): Target user
        user_item_matrix (DataFrame): user-item matrix from train_ubcf
        similarity_df (DataFrame): cosine similarity between users
        ratings_df (DataFrame): raw ratings data
        k (int): Number of neighbors
        top_n (int): Number of recommendations to return

    Returns:
        List of (item_id, predicted_rating)
    """
    if user_id not in user_item_matrix.index:
        return []

    # Get k most similar users
    similar_users = similarity_df[user_id].drop(user_id).nlargest(k)
    sim_scores = similar_users.values
    sim_user_ids = similar_users.index

    # Items user has already rated
    seen_items = set(ratings_df[ratings_df['userId'] == user_id]['movieId'])

    predictions = {}
    for item in user_item_matrix.columns:
        if item in seen_items:
            continue

        numerator, denominator = 0.0, 0.0
        for i, sim_user in enumerate(sim_user_ids):
            rating = user_item_matrix.loc[sim_user, item]
            if rating > 0:
                numerator += sim_scores[i] * rating
                denominator += sim_scores[i]
        if denominator > 0:
            predictions[item] = numerator / denominator

    return sorted(predictions.items(), key=lambda x: x[1], reverse=True)[:top_n]

In [67]:
user_item_matrix, similarity_df = train_ubcf(original_ratings_df)

# Use it for a user
user_id = 5
recs = recommend_ubcf(user_id, user_item_matrix, similarity_df, original_ratings_df, k=10, top_n=10)
print("Recommendations for user 5:", recs)

Recommendations for user 5: [(25, np.float64(5.0)), (151, np.float64(5.0)), (362, np.float64(5.0)), (780, np.float64(5.0)), (168, np.float64(4.999999999999999)), (218, np.float64(4.999999999999999)), (222, np.float64(4.999999999999999)), (249, np.float64(4.999999999999999)), (280, np.float64(4.999999999999999)), (293, np.float64(4.999999999999999))]


#### Recommender 2: FunkSVD (Matrix Factorization using Stochastic Gradient Descent (SGD))

FunkSVD learns latent features for users and items based on historical ratings. It uses stochastic gradient descent (SGD) to learn these features by minimizing the error between actual and predicted ratings.

In [23]:
def funk_svd(ratings_df, K=20, epochs=20, alpha=0.005, beta=0.02, verbose=True):
    """
    ratings_df: DataFrame with user_id, item_id, rating
    K: number of latent factors
    alpha: learning rate
    beta: regularization
    """
    num_users = ratings_df['userId'].max()
    num_items = ratings_df['movieId'].max()

    # Initialize latent factor matrices
    P = np.random.normal(scale=1./K, size=(num_users, K))  # user latent features
    Q = np.random.normal(scale=1./K, size=(num_items, K))  # item latent features

    ratings = ratings_df[['userId', 'movieId', 'rating']].values

    for epoch in range(epochs):
        np.random.shuffle(ratings)
        total_loss = 0

        for user, item, rating in ratings:
            u = int(user) - 1
            i = int(item) - 1

            pred = np.dot(P[u], Q[i])
            err = int(rating) - pred

            # Update latent features
            P[u] += alpha * (err * Q[i] - beta * P[u])
            Q[i] += alpha * (err * P[u] - beta * Q[i])

            total_loss += err ** 2

        if verbose:
            rmse = np.sqrt(total_loss / len(ratings))
            print(f"Epoch {epoch+1}/{epochs}, RMSE: {rmse:.4f}")

    return P, Q

In [24]:
P, Q = funk_svd(original_ratings_df, K=20, epochs=25, alpha=0.005, beta=0.02)

Epoch 1/25, RMSE: 3.5228
Epoch 2/25, RMSE: 3.4287
Epoch 3/25, RMSE: 2.5364
Epoch 4/25, RMSE: 1.7990
Epoch 5/25, RMSE: 1.4830
Epoch 6/25, RMSE: 1.3116
Epoch 7/25, RMSE: 1.2029
Epoch 8/25, RMSE: 1.1273
Epoch 9/25, RMSE: 1.0715
Epoch 10/25, RMSE: 1.0281
Epoch 11/25, RMSE: 0.9924
Epoch 12/25, RMSE: 0.9628
Epoch 13/25, RMSE: 0.9372
Epoch 14/25, RMSE: 0.9148
Epoch 15/25, RMSE: 0.8942
Epoch 16/25, RMSE: 0.8762
Epoch 17/25, RMSE: 0.8594
Epoch 18/25, RMSE: 0.8444
Epoch 19/25, RMSE: 0.8301
Epoch 20/25, RMSE: 0.8166
Epoch 21/25, RMSE: 0.8040
Epoch 22/25, RMSE: 0.7917
Epoch 23/25, RMSE: 0.7799
Epoch 24/25, RMSE: 0.7686
Epoch 25/25, RMSE: 0.7576


In [27]:
def predict(user_id, item_id, P, Q):
    return np.dot(P[user_id - 1], Q[item_id - 1])

# Example
print(f"Predicted rating for user 5 on movie 50: {predict(10, 25, P, Q):.2f}")

Predicted rating for user 5 on movie 50: 2.58


In [78]:
def predict_all_items_for_user(user_id, P, Q, seen_items=None, top_n=10):
    """
    Predict all item ratings for a user using FunkSVD.

    Args:
        user_id (int): The ID of the user (1-based)
        P (np.ndarray): User latent matrix
        Q (np.ndarray): Item latent matrix
        seen_items (list or set): Item IDs the user has already rated (1-based)
        top_n (int): Return top-N predictions

    Returns:
        List of tuples (item_id, predicted_rating), sorted by rating descending
    """
    u = user_id - 1  # Convert to 0-based index
    user_vector = P[u]

    # Predict scores for all items
    scores = np.dot(Q, user_vector)  # Q is items x K, user_vector is (K,)
    
    item_ids = np.arange(1, Q.shape[0] + 1)  # item IDs are 1-based

    # Optionally filter out already-seen items
    if seen_items:
        seen_items = set(seen_items)
        predictions = [(item_id, score) for item_id, score in zip(item_ids, scores) if item_id not in seen_items]
    else:
        predictions = list(zip(item_ids, scores))

    # Return top-N sorted
    predictions.sort(key=lambda x: x[1], reverse=True)
    return predictions[:top_n]

### Metric	Measures
**Novelty** is a measure of recommending less popular items. <br> **Diversity** is how different the recommended items are from each other.<br> **Serendipity** is recommending unexpected items that are still relevant.

In [39]:
def get_item_popularity(df):
    item_counts = Counter(df['movieId'])
    return dict(item_counts)

#### Novelty
**Novelty score** = average popularity of recommended items <br>
**Higher score** = less novel <br>
**Lower score** = more novel

In [40]:
def novelty(recommended_items, item_popularity):
    """
    recommended_items: list of item IDs
    item_popularity: dict of item_id -> count
    """
    popularity_scores = [item_popularity.get(item, 0) for item in recommended_items]
    return np.mean(popularity_scores)

#### Diversity (Pairwise dissimilarity between items)
Assume no content-based info, we’ll use Jaccard on user sets (collaborative-only diversity):

In [41]:
def jaccard_similarity(set1, set2):
    return len(set1 & set2) / len(set1 | set2) if set1 | set2 else 0

def diversity(recommended_items, user_item_dict):
    """
    recommended_items: list of item IDs
    user_item_dict: dict of item_id -> set of user_ids who interacted
    """
    pairs = [(i, j) for idx, i in enumerate(recommended_items) 
                     for j in recommended_items[idx + 1:]]
    diversities = []
    for i, j in pairs:
        users_i = user_item_dict.get(i, set())
        users_j = user_item_dict.get(j, set())
        sim = jaccard_similarity(users_i, users_j)
        diversities.append(1 - sim)
    return np.mean(diversities) if diversities else 0

#### Serendipity
An item is serendipitous if:<br>
It's relevant (user actually liked it), and <br> 
It’s unexpected (not in popular items)

In [42]:
def serendipity(user_id, recommendations, relevant_items, popular_items):
    count = 0
    for item in recommendations:
        if item in relevant_items and item not in popular_items:
            count += 1
    return count / len(recommendations) if recommendations else 0

#### Evaluation 

In [139]:
def evaluate_recommender(model_name, recommend_fn, df, P=None, Q=None):
    item_popularity = get_item_popularity(df)
    user_item_dict = df.groupby('movieId')['userId'].apply(set).to_dict()

    top_popular_items = set([item for item, count in Counter(df['movieId']).most_common(50)])

    users = df['userId'].unique()[:100]  # Test on a small subset
    novelty_scores = []
    diversity_scores = []
    serendipity_scores = []

    for user_id in users:
        seen_items = df[df['userId'] == user_id]['movieId'].tolist()
        relevant_items = set([item for item in seen_items if df[(df['userId'] == user_id) & (df['movieId'] == item)]['rating'].mean() >= 4])

        # Get recommendations
        if model_name == 'FunkSVD':
            recs = predict_all_items_for_user(user_id, P, Q, seen_items, top_n=10)
            rec_items = [item for item, _ in recs]
        else:  # UBCF
            rec_items = [item for item, _ in recommend_fn(user_id)]

        # Evaluate
        novelty_scores.append(novelty(rec_items, item_popularity))
        diversity_scores.append(diversity(rec_items, user_item_dict))
        serendipity_scores.append(serendipity(user_id, rec_items, relevant_items, top_popular_items))

    return {
        'novelty': np.mean(novelty_scores),
        'diversity': np.mean(diversity_scores),
        'serendipity': np.mean(serendipity_scores),
    }

In [74]:
def ubcf_recommend_wrapper(user_id):
    return recommend_ubcf(user_id, user_item_matrix, similarity_df, original_ratings_df, k=10, top_n=10)

In [None]:
ubcf_metrics = evaluate_recommender('UBCF', recommend_fn=ubcf_recommend_wrapper, df=original_ratings_df)

In [79]:
funk_metrics = evaluate_recommender('FunkSVD', recommend_fn=None, df=original_ratings_df, P=P, Q=Q)

In [77]:
print("UBCF Metrics:", ubcf_metrics)


UBCF Metrics: {'novelty': np.float64(57.873999999999995), 'diversity': np.float64(0.8659564624739196), 'serendipity': np.float64(0.0)}


In [80]:
print("FunkSVD Metrics:", funk_metrics)

FunkSVD Metrics: {'novelty': np.float64(58.09299999999999), 'diversity': np.float64(0.9211369206904918), 'serendipity': np.float64(0.0)}


### Comparing Metrics:

Our FunkSVD model has slightly higher **novelty** score, meaning more popular items are being recommended, but slightly higher **diversity** meaning there is more dissimilarity among recommended items.

Our UBCF model has slightly lower **novelty** score, meaning slightly more obscure items are being recommended, but it has slightly lower **diversity** meaning there is more similarity among the recommended items.

Both models have the same extremly low **serendipity**. I think this is because both are tending to recommend popular movies, we're only returning the top 10 recommendations and all those 10 are all seen or popular.

I would prefer the FunkSVD model, for more varied content in my top 10 list, at the cost of the movies being slightly more well known. 

### 2. Implement support for at least one business or user experience goal such as increased serendipity, novelty, or diversity.

### Increasing Serendipity
We'll rerank a candidate recommendation list (e.g., from FunkSVD or UBCF) by boosting items that are:
- Unpopular (low global popularity)
- Predicted to be relevant (predicted rating is high)

In [148]:
def rerank_for_serendipity(user_id, recommendations, ratings_df, popularity_weight=0.6):
    """
    Rerank items to increase serendipity (favoring less popular but relevant items).

    Args:
        user_id (int): The user to recommend to
        recommendations: List of (item_id, predicted_rating)
        ratings_df: Full ratings DataFrame
        popularity_weight (float): [0,1] controls tradeoff. Higher = more serendipity

    Returns:
        List of (item_id, adjusted_score)
    """
    # Compute item popularity
    item_counts = Counter(ratings_df['movieId'])
    max_pop = max(item_counts.values())
    
    reranked = []
    for item_id, predicted_rating in recommendations:
        popularity = item_counts.get(item_id, 0) / max_pop  # normalize to [0,1]
        unexpectedness = 1 - popularity                     # higher = less popular
        adjusted_score = (1 - popularity_weight) * predicted_rating + popularity_weight * unexpectedness
        reranked.append((item_id, adjusted_score))

    return sorted(reranked, key=lambda x: x[1], reverse=True)

In [155]:
def evaluate_recommender_serendipity(model_name, recommend_fn, df, P=None, Q=None):
    item_popularity = get_item_popularity(df)
    user_item_dict = df.groupby('movieId')['userId'].apply(set).to_dict()

    top_popular_items = set([item for item, count in Counter(df['movieId']).most_common(5)])

    users = df['userId'].unique()[0:100]  # Test on a small subset
    novelty_scores = []
    diversity_scores = []
    serendipity_scores = []

    for user_id in users:
        seen_items = df[df['userId'] == user_id]['movieId'].tolist()
        relevant_items = set([item for item in seen_items if df[(df['userId'] == user_id) & (df['movieId'] == item)]['rating'].mean() >= 3])

        # Get recommendations
        if model_name == 'FunkSVD':
            recs = predict_all_items_for_user(user_id, P, Q, seen_items, top_n=50)
            reranked_recs = rerank_for_serendipity(user_id, recs, df, popularity_weight=0.6)
            rec_items = [item for item, _ in reranked_recs[:10]]
        else:  # UBCF
            rec_items = [item for item, _ in ubcf_with_serendipity(user_id)]


        # Evaluate
        novelty_scores.append(novelty(rec_items, item_popularity))
        diversity_scores.append(diversity(rec_items, user_item_dict))
        serendipity_scores.append(serendipity(user_id, rec_items, relevant_items, top_popular_items))

    return {
        'novelty': np.mean(novelty_scores),
        'diversity': np.mean(diversity_scores),
        'serendipity': np.mean(serendipity_scores),
    }

In [156]:
def ubcf_with_serendipity(user_id):
    # Step 1: Get original recommendations from UBCF
    recs = recommend_ubcf(user_id, user_item_matrix, similarity_df, original_ratings_df, k=10, top_n=50)

    # Step 2: Apply serendipity reranking
    reranked_recs = rerank_for_serendipity(user_id, recs, original_ratings_df, popularity_weight=0.6)

    # Step 3: Return top-N
    return reranked_recs[:10]

In [157]:
ubcf_metrics_serendipity = evaluate_recommender_serendipity('UBCF', recommend_fn=ubcf_with_serendipity, df=original_ratings_df)

In [151]:
funk_metrics_serendipity = evaluate_recommender_serendipity('FunkSVD', recommend_fn=None, df=original_ratings_df, P=P, Q=Q) 

In [158]:
print("UBCF Metrics:", ubcf_metrics_serendipity)

UBCF Metrics: {'novelty': np.float64(16.692), 'diversity': np.float64(0.885276672185159), 'serendipity': np.float64(0.0)}


In [159]:
print("FunkSVD Metrics:", funk_metrics_serendipity)

FunkSVD Metrics: {'novelty': np.float64(5.1979999999999995), 'diversity': np.float64(0.968821842523899), 'serendipity': np.float64(0.0)}


### I made some changes to try to increase serendipity:
-  used rerank_for_serendipity to try to boost items that were less popular but still relevant
-  changed relevant items rating mean cut off to 3, so its not so strict as to what the interested items should be for users
-  changed both UBCF and FunkSVD to look at the top 50 for each user, and then after cut to top 10 using the serendipitiously reranked list
-  changed the popular list to just look at the 5 most common movies
    
Serendiptity still has stayed at 0 after these changes. Im unsure what isn't working as intended.

Possible problems is that all the relevant items are popular? This is unlikely as the popular list is only the 5 most popular movies. 

### 3. Compare and report on any change in accuracy before and after you’ve made the change in #2.

### Looking at Novelty and Diversity in this "Serendipitious Model":
- **Novelty** has massively dropped after these changes meaning obscure or less popular items are being recommended, **Diversity** has slightly increased for both suggesting there are now more variations among  the recommended items
- It's possible with these changes, we are now just trending towards suggesting bad movies, as we strive for less popular movies, we have to pull from the bottom of the barrel. Its also possible that the user experience will feel more random, and less personalized to a user's tastes. This is a indie maximalist approach to movie recommendation, probably most suitable for a buzzfeed type "10 Movies You've Never Heard Of!!!" list
- I think I might prefer the UBCF model this time as its suggesting slightly more popular movies, while still keeping a good amount of diversity on the list

In [161]:
print("UBCF Metrics:", ubcf_metrics)
print("UBCF serendipity Metrics:", ubcf_metrics_serendipity)
print("FunkSVD Metrics:", funk_metrics)
print("FunkSVD serendipity Metrics:", funk_metrics_serendipity)

UBCF Metrics: {'novelty': np.float64(57.873999999999995), 'diversity': np.float64(0.8659564624739196), 'serendipity': np.float64(0.0)}
UBCF serendipity Metrics: {'novelty': np.float64(16.692), 'diversity': np.float64(0.885276672185159), 'serendipity': np.float64(0.0)}
FunkSVD Metrics: {'novelty': np.float64(58.09299999999999), 'diversity': np.float64(0.9211369206904918), 'serendipity': np.float64(0.0)}
FunkSVD serendipity Metrics: {'novelty': np.float64(5.1979999999999995), 'diversity': np.float64(0.968821842523899), 'serendipity': np.float64(0.0)}


### 4. Conclusion

I looked at the MovieLends 100k dataset. I implemented both a User-Based Collaborative Filtering (UBCF) and FunkSVD recommender system. I did away with traditional accuracy meteics like RMSE this time, favoring serendipity, novelty, and diversity as methods of measuring out user's satification with our recommendations. 

The FunkSVD model has slightly higher novelty score, meaning more popular items are being recommended, but slightly higher diversity meaning there is more dissimilarity among recommended items. Our UBCF model has slightly lower novelty score, meaning slightly more obscure items are being recommended, but it has slightly lower diversity meaning there is more similarity among the recommended items. I would prefer the FunkSVD model, for more varied content in my top 10 list, at the cost of the movies being slightly more well known.

We tried to increase serendiptity by adding a popularity_weight hoping to boost boosting items that are unpopular (low global popularity) but still predicted to be relevant (predicted rating is high). Novelty has massively dropped after these changes meaning obscure or less popular items are being recommended, Diversity has slightly increased for both suggesting there are now more variations among the recommended items. Unfortuately, its possible with these changes, we are now just trending towards suggesting bad movies, as we strive for less popular movies, we have to pull from the bottom of the barrel.



### Online Evaluation Experiments

##### A/B Testing with Recommender Variants
- Compare our two recommenders (UBCF vs. FunkSVD) in live usage
- Randomly assigne users to "buckets" (Group A gets UBCF, Group B gets FunkSVD)

##### Recommendation Conversion Rate / Watch Rate 
- Did the user actually watch the item after we recommended it? We can calculate Watch rate

##### Serendipity Perception
- Track how much variety or surprise users actually experience and enjoy
- See how often users engage with our non-popular recommendations
- Ask them survey-based feedback: “Was this recommendation surprising and good?”

### Design online evaluation environment

##### 1. Frontend Logging System
- Log every recommendation shown to each user
- Log interactions: clicks, skips, watch duration, etc.

##### 2. A/B Testing Framework
- Randomly assign users to experimental groups and ensure users stick with their assigned group:
- Group A: FunkSVD
- Group B: Standard UBCF
- Group C: Serendipitious models

##### 3. Backend Pipeline
- Store logs in a database or data warehouse
- Periodically calculate online metrics (watch rate, CTR, etc.)